From 4b3ab9372ffa569827c8f7b7ffc7b69ba544a3bd Mon Sep 17 00:00:00 2001
From: Vignesh R <vigneshr@ti.com>
Date: Mon, 3 Dec 2018 13:31:18 +0530
Subject: iio: adc: ti_am335x_tscadc: Improve accuracy of measurement

When performing single ended measurements with TSCADC, its recommended
to set negative input (SEL_INM_SWC_3_0) of ADC step to ADC's VREFN in the
corresponding STEP_CONFIGx register.

Also, the positive(SEL_RFP_SWC_2_0) and negative(SEL_RFM_SWC_1_0)
reference voltage for ADC step needs to be set to VREFP and VREFN
respectively in STEP_CONFIGx register.
Without these changes, there may be variation of as much as ~2% in the
ADC's digital output which is bad for precise measurement.

Signed-off-by: Vignesh R <vigneshr@ti.com>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/ti_am335x_tscadc.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index b9a53e013bff..483168403ae5 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -78,6 +78,8 @@
 #define STEPCONFIG_YNN		BIT(8)
 #define STEPCONFIG_XNP		BIT(9)
 #define STEPCONFIG_YPN		BIT(10)
+#define STEPCONFIG_RFP(val)	((val) << 12)
+#define STEPCONFIG_RFP_VREFP	(0x3 << 12)
 #define STEPCONFIG_INM_MASK	(0xF << 15)
 #define STEPCONFIG_INM(val)	((val) << 15)
 #define STEPCONFIG_INM_ADCREFM	STEPCONFIG_INM(8)
@@ -86,6 +88,8 @@
 #define STEPCONFIG_INP_AN4	STEPCONFIG_INP(4)
 #define STEPCONFIG_INP_ADCREFM	STEPCONFIG_INP(8)
 #define STEPCONFIG_FIFO1	BIT(26)
+#define STEPCONFIG_RFM(val)	((val) << 23)
+#define STEPCONFIG_RFM_VREFN	(0x3 << 23)
 
 /* Delay register */
 #define STEPDELAY_OPEN_MASK	(0x3FFFF << 0)
-- 
cgit v1.2.3-71-gd317


From a54e950fdec3cde98caa04bc601cbdc95d0d319c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 7 Nov 2018 14:50:01 +0100
Subject: mfd: tmio: Typo s/use use/use/

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/tmio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 1e70060c92ce..aa696bcb1d12 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -83,7 +83,7 @@
 /* Some controllers have a CBSY bit */
 #define TMIO_MMC_HAVE_CBSY		BIT(11)
 
-/* Some controllers that support HS400 use use 4 taps while others use 8. */
+/* Some controllers that support HS400 use 4 taps while others use 8. */
 #define TMIO_MMC_HAVE_4TAP_HS400	BIT(13)
 
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
-- 
cgit v1.2.3-71-gd317


From 7f9472134a5af31bad191f074a5d416146da26f7 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Mon, 12 Nov 2018 15:28:37 +0000
Subject: mfd: madera: Add shared data for accessory detection

Add variables to struct madera that will be shared by the
extcon and audio codec drivers to synchronize output state
during accessory detection. Also add a mutex to protect
the DAPM pointer.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/madera-core.c       | 3 +++
 include/linux/mfd/madera/core.h | 7 +++++++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/madera-core.c b/drivers/mfd/madera-core.c
index 440030cecbbd..5b58a8aea902 100644
--- a/drivers/mfd/madera-core.c
+++ b/drivers/mfd/madera-core.c
@@ -15,6 +15,7 @@
 #include <linux/gpio.h>
 #include <linux/mfd/core.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/notifier.h>
 #include <linux/of.h>
 #include <linux/of_gpio.h>
@@ -357,6 +358,8 @@ int madera_dev_init(struct madera *madera)
 
 	dev_set_drvdata(madera->dev, madera);
 	BLOCKING_INIT_NOTIFIER_HEAD(&madera->notifier);
+	mutex_init(&madera->dapm_ptr_lock);
+
 	madera_set_micbias_info(madera);
 
 	/*
diff --git a/include/linux/mfd/madera/core.h b/include/linux/mfd/madera/core.h
index fe69c0f4398f..4d5d51a9c8a6 100644
--- a/include/linux/mfd/madera/core.h
+++ b/include/linux/mfd/madera/core.h
@@ -15,6 +15,7 @@
 #include <linux/gpio/consumer.h>
 #include <linux/interrupt.h>
 #include <linux/mfd/madera/pdata.h>
+#include <linux/mutex.h>
 #include <linux/notifier.h>
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
@@ -37,6 +38,8 @@ enum madera_type {
 
 #define MADERA_MAX_MICBIAS		4
 
+#define MADERA_MAX_HP_OUTPUT		3
+
 /* Notifier events */
 #define MADERA_NOTIFY_VOICE_TRIGGER	0x1
 #define MADERA_NOTIFY_HPDET		0x2
@@ -183,6 +186,10 @@ struct madera {
 	unsigned int num_childbias[MADERA_MAX_MICBIAS];
 
 	struct snd_soc_dapm_context *dapm;
+	struct mutex dapm_ptr_lock;
+	unsigned int hp_ena;
+	bool out_clamp[MADERA_MAX_HP_OUTPUT];
+	bool out_shorted[MADERA_MAX_HP_OUTPUT];
 
 	struct blocking_notifier_head notifier;
 };
-- 
cgit v1.2.3-71-gd317


From ddf5aaa8eecb6ccf51f311a513c3a5011fbe0d54 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Sun, 16 Dec 2018 15:10:44 +0100
Subject: mfd: ingenic-tcu: Fix bit field description in header

The description of the bit was inverted.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/ingenic-tcu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/ingenic-tcu.h b/include/linux/mfd/ingenic-tcu.h
index ab16ad283def..2083fa20821d 100644
--- a/include/linux/mfd/ingenic-tcu.h
+++ b/include/linux/mfd/ingenic-tcu.h
@@ -41,7 +41,7 @@
 #define TCU_TCSR_PRESCALE_LSB		3
 #define TCU_TCSR_PRESCALE_MASK		0x38
 
-#define TCU_TCSR_PWM_SD		BIT(9)	/* 0: Shutdown abruptly 1: gracefully */
+#define TCU_TCSR_PWM_SD		BIT(9)	/* 0: Shutdown gracefully 1: abruptly */
 #define TCU_TCSR_PWM_INITL_HIGH	BIT(8)	/* Sets the initial output level */
 #define TCU_TCSR_PWM_EN		BIT(7)	/* PWM pin output enable */
 
-- 
cgit v1.2.3-71-gd317


From c1f3375be60c562e24460d41b75e564c0a429835 Mon Sep 17 00:00:00 2001
From: Cheng-Yi Chiang <cychiang@chromium.org>
Date: Tue, 18 Dec 2018 17:06:26 +0800
Subject: mfd: cros_ec: Add commands to control codec

Add EC host commands to control codec on EC.

Signed-off-by: Cheng-Yi Chiang <cychiang@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec_commands.h | 94 ++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index 9a9631f0559e..fc91082d4c35 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -2790,6 +2790,100 @@ struct ec_response_battery_vendor_param {
 	uint32_t value;
 } __packed;
 
+/*****************************************************************************/
+/* Commands for I2S recording on audio codec. */
+
+#define EC_CMD_CODEC_I2S 0x00BC
+
+enum ec_codec_i2s_subcmd {
+	EC_CODEC_SET_SAMPLE_DEPTH = 0x0,
+	EC_CODEC_SET_GAIN = 0x1,
+	EC_CODEC_GET_GAIN = 0x2,
+	EC_CODEC_I2S_ENABLE = 0x3,
+	EC_CODEC_I2S_SET_CONFIG = 0x4,
+	EC_CODEC_I2S_SET_TDM_CONFIG = 0x5,
+	EC_CODEC_I2S_SET_BCLK = 0x6,
+};
+
+enum ec_sample_depth_value {
+	EC_CODEC_SAMPLE_DEPTH_16 = 0,
+	EC_CODEC_SAMPLE_DEPTH_24 = 1,
+};
+
+enum ec_i2s_config {
+	EC_DAI_FMT_I2S = 0,
+	EC_DAI_FMT_RIGHT_J = 1,
+	EC_DAI_FMT_LEFT_J = 2,
+	EC_DAI_FMT_PCM_A = 3,
+	EC_DAI_FMT_PCM_B = 4,
+	EC_DAI_FMT_PCM_TDM = 5,
+};
+
+struct ec_param_codec_i2s {
+	/*
+	 * enum ec_codec_i2s_subcmd
+	 */
+	uint8_t cmd;
+	union {
+		/*
+		 * EC_CODEC_SET_SAMPLE_DEPTH
+		 * Value should be one of ec_sample_depth_value.
+		 */
+		uint8_t depth;
+
+		/*
+		 * EC_CODEC_SET_GAIN
+		 * Value should be 0~43 for both channels.
+		 */
+		struct ec_param_codec_i2s_set_gain {
+			uint8_t left;
+			uint8_t right;
+		} __packed gain;
+
+		/*
+		 * EC_CODEC_I2S_ENABLE
+		 * 1 to enable, 0 to disable.
+		 */
+		uint8_t i2s_enable;
+
+		/*
+		 * EC_CODEC_I2S_SET_COFNIG
+		 * Value should be one of ec_i2s_config.
+		 */
+		uint8_t i2s_config;
+
+		/*
+		 * EC_CODEC_I2S_SET_TDM_CONFIG
+		 * Value should be one of ec_i2s_config.
+		 */
+		struct ec_param_codec_i2s_tdm {
+			/*
+			 * 0 to 496
+			 */
+			int16_t ch0_delay;
+			/*
+			 * -1 to 496
+			 */
+			int16_t ch1_delay;
+			uint8_t adjacent_to_ch0;
+			uint8_t adjacent_to_ch1;
+		} __packed tdm_param;
+
+		/*
+		 * EC_CODEC_I2S_SET_BCLK
+		 */
+		uint32_t bclk;
+	};
+} __packed;
+
+/*
+ * For subcommand EC_CODEC_GET_GAIN.
+ */
+struct ec_response_codec_gain {
+	uint8_t left;
+	uint8_t right;
+} __packed;
+
 /*****************************************************************************/
 /* System commands */
 
-- 
cgit v1.2.3-71-gd317


From 2d533a9287f2011632977e87ce2783f4c689c984 Mon Sep 17 00:00:00 2001
From: Denis Bolotin <dbolotin@marvell.com>
Date: Thu, 3 Jan 2019 12:02:39 +0200
Subject: qed: Fix qed_chain_set_prod() for PBL chains with non power of 2 page
 count

In PBL chains with non power of 2 page count, the producer is not at the
beginning of the chain when index is 0 after a wrap. Therefore, after the
producer index wrap around, page index should be calculated more carefully.

Signed-off-by: Denis Bolotin <dbolotin@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_chain.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
index 59ddf9af909e..2dd0a9ed5b36 100644
--- a/include/linux/qed/qed_chain.h
+++ b/include/linux/qed/qed_chain.h
@@ -663,6 +663,37 @@ out:
 static inline void qed_chain_set_prod(struct qed_chain *p_chain,
 				      u32 prod_idx, void *p_prod_elem)
 {
+	if (p_chain->mode == QED_CHAIN_MODE_PBL) {
+		u32 cur_prod, page_mask, page_cnt, page_diff;
+
+		cur_prod = is_chain_u16(p_chain) ? p_chain->u.chain16.prod_idx :
+			   p_chain->u.chain32.prod_idx;
+
+		/* Assume that number of elements in a page is power of 2 */
+		page_mask = ~p_chain->elem_per_page_mask;
+
+		/* Use "cur_prod - 1" and "prod_idx - 1" since producer index
+		 * reaches the first element of next page before the page index
+		 * is incremented. See qed_chain_produce().
+		 * Index wrap around is not a problem because the difference
+		 * between current and given producer indices is always
+		 * positive and lower than the chain's capacity.
+		 */
+		page_diff = (((cur_prod - 1) & page_mask) -
+			     ((prod_idx - 1) & page_mask)) /
+			    p_chain->elem_per_page;
+
+		page_cnt = qed_chain_get_page_cnt(p_chain);
+		if (is_chain_u16(p_chain))
+			p_chain->pbl.c.u16.prod_page_idx =
+				(p_chain->pbl.c.u16.prod_page_idx -
+				 page_diff + page_cnt) % page_cnt;
+		else
+			p_chain->pbl.c.u32.prod_page_idx =
+				(p_chain->pbl.c.u32.prod_page_idx -
+				 page_diff + page_cnt) % page_cnt;
+	}
+
 	if (is_chain_u16(p_chain))
 		p_chain->u.chain16.prod_idx = (u16) prod_idx;
 	else
-- 
cgit v1.2.3-71-gd317


From d3bd7413e0ca40b60cf60d4003246d067cafdeda Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 6 Jan 2019 00:54:37 +0100
Subject: bpf: fix sanitation of alu op with pointer / scalar type from
 different paths

While 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer
arithmetic") took care of rejecting alu op on pointer when e.g. pointer
came from two different map values with different map properties such as
value size, Jann reported that a case was not covered yet when a given
alu op is used in both "ptr_reg += reg" and "numeric_reg += reg" from
different branches where we would incorrectly try to sanitize based
on the pointer's limit. Catch this corner case and reject the program
instead.

Fixes: 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer arithmetic")
Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |  1 +
 kernel/bpf/verifier.c        | 61 ++++++++++++++++++++++++++++++++++----------
 2 files changed, 49 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 27b74947cd2b..573cca00a0e6 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -172,6 +172,7 @@ struct bpf_verifier_state_list {
 #define BPF_ALU_SANITIZE_SRC		1U
 #define BPF_ALU_SANITIZE_DST		2U
 #define BPF_ALU_NEG_VALUE		(1U << 2)
+#define BPF_ALU_NON_POINTER		(1U << 3)
 #define BPF_ALU_SANITIZE		(BPF_ALU_SANITIZE_SRC | \
 					 BPF_ALU_SANITIZE_DST)
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f6bc62a9ee8e..56674a7c3778 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3103,6 +3103,40 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
 	}
 }
 
+static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
+				    const struct bpf_insn *insn)
+{
+	return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
+}
+
+static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
+				       u32 alu_state, u32 alu_limit)
+{
+	/* If we arrived here from different branches with different
+	 * state or limits to sanitize, then this won't work.
+	 */
+	if (aux->alu_state &&
+	    (aux->alu_state != alu_state ||
+	     aux->alu_limit != alu_limit))
+		return -EACCES;
+
+	/* Corresponding fixup done in fixup_bpf_calls(). */
+	aux->alu_state = alu_state;
+	aux->alu_limit = alu_limit;
+	return 0;
+}
+
+static int sanitize_val_alu(struct bpf_verifier_env *env,
+			    struct bpf_insn *insn)
+{
+	struct bpf_insn_aux_data *aux = cur_aux(env);
+
+	if (can_skip_alu_sanitation(env, insn))
+		return 0;
+
+	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
+}
+
 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
 			    struct bpf_insn *insn,
 			    const struct bpf_reg_state *ptr_reg,
@@ -3117,7 +3151,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
 	struct bpf_reg_state tmp;
 	bool ret;
 
-	if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K)
+	if (can_skip_alu_sanitation(env, insn))
 		return 0;
 
 	/* We already marked aux for masking from non-speculative
@@ -3133,19 +3167,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
 
 	if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
 		return 0;
-
-	/* If we arrived here from different branches with different
-	 * limits to sanitize, then this won't work.
-	 */
-	if (aux->alu_state &&
-	    (aux->alu_state != alu_state ||
-	     aux->alu_limit != alu_limit))
+	if (update_alu_sanitation_state(aux, alu_state, alu_limit))
 		return -EACCES;
-
-	/* Corresponding fixup done in fixup_bpf_calls(). */
-	aux->alu_state = alu_state;
-	aux->alu_limit = alu_limit;
-
 do_sim:
 	/* Simulate and find potential out-of-bounds access under
 	 * speculative execution from truncation as a result of
@@ -3418,6 +3441,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 	s64 smin_val, smax_val;
 	u64 umin_val, umax_val;
 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
+	u32 dst = insn->dst_reg;
+	int ret;
 
 	if (insn_bitness == 32) {
 		/* Relevant for 32-bit RSH: Information can propagate towards
@@ -3452,6 +3477,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 
 	switch (opcode) {
 	case BPF_ADD:
+		ret = sanitize_val_alu(env, insn);
+		if (ret < 0) {
+			verbose(env, "R%d tried to add from different pointers or scalars\n", dst);
+			return ret;
+		}
 		if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
 		    signed_add_overflows(dst_reg->smax_value, smax_val)) {
 			dst_reg->smin_value = S64_MIN;
@@ -3471,6 +3501,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
 		break;
 	case BPF_SUB:
+		ret = sanitize_val_alu(env, insn);
+		if (ret < 0) {
+			verbose(env, "R%d tried to sub from different pointers or scalars\n", dst);
+			return ret;
+		}
 		if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
 		    signed_sub_overflows(dst_reg->smax_value, smin_val)) {
 			/* Overflow possible, we know nothing */
-- 
cgit v1.2.3-71-gd317


From 02669b17a433c242a40f01f14b691c9c9d1f8a13 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 5 Dec 2018 16:37:03 -0500
Subject: XArray: Turn xa_init_flags into a static inline

A regular xa_init_flags() put all dynamically-initialised XArrays into
the same locking class.  That leads to lockdep believing that taking
one XArray lock while holding another is a deadlock.  It's possible to
work around some of these situations with separate locking classes for
irq/bh/regular XArrays, and SINGLE_DEPTH_NESTING, but that's ugly, and
it doesn't work for all situations (where we have completely unrelated
XArrays).

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 19 ++++++++++++++++++-
 lib/xarray.c           | 29 -----------------------------
 2 files changed, 18 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index f492e21c4aa2..4cf3cd128689 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -286,7 +286,6 @@ struct xarray {
  */
 #define DEFINE_XARRAY_ALLOC(name) DEFINE_XARRAY_FLAGS(name, XA_FLAGS_ALLOC)
 
-void xa_init_flags(struct xarray *, gfp_t flags);
 void *xa_load(struct xarray *, unsigned long index);
 void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void *xa_erase(struct xarray *, unsigned long index);
@@ -303,6 +302,24 @@ unsigned int xa_extract(struct xarray *, void **dst, unsigned long start,
 		unsigned long max, unsigned int n, xa_mark_t);
 void xa_destroy(struct xarray *);
 
+/**
+ * xa_init_flags() - Initialise an empty XArray with flags.
+ * @xa: XArray.
+ * @flags: XA_FLAG values.
+ *
+ * If you need to initialise an XArray with special flags (eg you need
+ * to take the lock from interrupt context), use this function instead
+ * of xa_init().
+ *
+ * Context: Any context.
+ */
+static inline void xa_init_flags(struct xarray *xa, gfp_t flags)
+{
+	spin_lock_init(&xa->xa_lock);
+	xa->xa_flags = flags;
+	xa->xa_head = NULL;
+}
+
 /**
  * xa_init() - Initialise an empty XArray.
  * @xa: XArray.
diff --git a/lib/xarray.c b/lib/xarray.c
index 5f3f9311de89..dda6026d202e 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1250,35 +1250,6 @@ void *xas_find_conflict(struct xa_state *xas)
 }
 EXPORT_SYMBOL_GPL(xas_find_conflict);
 
-/**
- * xa_init_flags() - Initialise an empty XArray with flags.
- * @xa: XArray.
- * @flags: XA_FLAG values.
- *
- * If you need to initialise an XArray with special flags (eg you need
- * to take the lock from interrupt context), use this function instead
- * of xa_init().
- *
- * Context: Any context.
- */
-void xa_init_flags(struct xarray *xa, gfp_t flags)
-{
-	unsigned int lock_type;
-	static struct lock_class_key xa_lock_irq;
-	static struct lock_class_key xa_lock_bh;
-
-	spin_lock_init(&xa->xa_lock);
-	xa->xa_flags = flags;
-	xa->xa_head = NULL;
-
-	lock_type = xa_lock_type(xa);
-	if (lock_type == XA_LOCK_IRQ)
-		lockdep_set_class(&xa->xa_lock, &xa_lock_irq);
-	else if (lock_type == XA_LOCK_BH)
-		lockdep_set_class(&xa->xa_lock, &xa_lock_bh);
-}
-EXPORT_SYMBOL(xa_init_flags);
-
 /**
  * xa_load() - Load an entry from an XArray.
  * @xa: XArray.
-- 
cgit v1.2.3-71-gd317


From 4a31896c5b5a2715ecf4033426aa0a35066d92d6 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 17 Dec 2018 14:45:36 -0500
Subject: XArray: Change xa_for_each iterator

There were three problems with this API:
1. It took too many arguments; almost all users wanted to iterate over
every element in the array rather than a subset.
2. It required that 'index' be initialised before use, and there's no
realistic way to make GCC catch that.
3. 'index' and 'entry' were the opposite way round from every other
member of the XArray APIs.

So split it into three different APIs:

xa_for_each(xa, index, entry)
xa_for_each_start(xa, index, entry, start)
xa_for_each_marked(xa, index, entry, filter)

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 78 +++++++++++++++++++++++++++++++++++++++++---------
 lib/test_xarray.c      | 11 ++++---
 2 files changed, 70 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 4cf3cd128689..3d0ce8b267e3 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -359,20 +359,45 @@ static inline bool xa_marked(const struct xarray *xa, xa_mark_t mark)
 }
 
 /**
- * xa_for_each() - Iterate over a portion of an XArray.
+ * xa_for_each_start() - Iterate over a portion of an XArray.
  * @xa: XArray.
+ * @index: Index of @entry.
  * @entry: Entry retrieved from array.
+ * @start: First index to retrieve from array.
+ *
+ * During the iteration, @entry will have the value of the entry stored
+ * in @xa at @index.  You may modify @index during the iteration if you
+ * want to skip or reprocess indices.  It is safe to modify the array
+ * during the iteration.  At the end of the iteration, @entry will be set
+ * to NULL and @index will have a value less than or equal to max.
+ *
+ * xa_for_each_start() is O(n.log(n)) while xas_for_each() is O(n).  You have
+ * to handle your own locking with xas_for_each(), and if you have to unlock
+ * after each iteration, it will also end up being O(n.log(n)).
+ * xa_for_each_start() will spin if it hits a retry entry; if you intend to
+ * see retry entries, you should use the xas_for_each() iterator instead.
+ * The xas_for_each() iterator will expand into more inline code than
+ * xa_for_each_start().
+ *
+ * Context: Any context.  Takes and releases the RCU lock.
+ */
+#define xa_for_each_start(xa, index, entry, start)			\
+	for (index = start,						\
+	     entry = xa_find(xa, &index, ULONG_MAX, XA_PRESENT);	\
+	     entry;							\
+	     entry = xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT))
+
+/**
+ * xa_for_each() - Iterate over present entries in an XArray.
+ * @xa: XArray.
  * @index: Index of @entry.
- * @max: Maximum index to retrieve from array.
- * @filter: Selection criterion.
+ * @entry: Entry retrieved from array.
  *
- * Initialise @index to the lowest index you want to retrieve from the
- * array.  During the iteration, @entry will have the value of the entry
- * stored in @xa at @index.  The iteration will skip all entries in the
- * array which do not match @filter.  You may modify @index during the
- * iteration if you want to skip or reprocess indices.  It is safe to modify
- * the array during the iteration.  At the end of the iteration, @entry will
- * be set to NULL and @index will have a value less than or equal to max.
+ * During the iteration, @entry will have the value of the entry stored
+ * in @xa at @index.  You may modify @index during the iteration if you want
+ * to skip or reprocess indices.  It is safe to modify the array during the
+ * iteration.  At the end of the iteration, @entry will be set to NULL and
+ * @index will have a value less than or equal to max.
  *
  * xa_for_each() is O(n.log(n)) while xas_for_each() is O(n).  You have
  * to handle your own locking with xas_for_each(), and if you have to unlock
@@ -383,9 +408,36 @@ static inline bool xa_marked(const struct xarray *xa, xa_mark_t mark)
  *
  * Context: Any context.  Takes and releases the RCU lock.
  */
-#define xa_for_each(xa, entry, index, max, filter) \
-	for (entry = xa_find(xa, &index, max, filter); entry; \
-	     entry = xa_find_after(xa, &index, max, filter))
+#define xa_for_each(xa, index, entry) \
+	xa_for_each_start(xa, index, entry, 0)
+
+/**
+ * xa_for_each_marked() - Iterate over marked entries in an XArray.
+ * @xa: XArray.
+ * @index: Index of @entry.
+ * @entry: Entry retrieved from array.
+ * @filter: Selection criterion.
+ *
+ * During the iteration, @entry will have the value of the entry stored
+ * in @xa at @index.  The iteration will skip all entries in the array
+ * which do not match @filter.  You may modify @index during the iteration
+ * if you want to skip or reprocess indices.  It is safe to modify the array
+ * during the iteration.  At the end of the iteration, @entry will be set to
+ * NULL and @index will have a value less than or equal to max.
+ *
+ * xa_for_each_marked() is O(n.log(n)) while xas_for_each_marked() is O(n).
+ * You have to handle your own locking with xas_for_each(), and if you have
+ * to unlock after each iteration, it will also end up being O(n.log(n)).
+ * xa_for_each_marked() will spin if it hits a retry entry; if you intend to
+ * see retry entries, you should use the xas_for_each_marked() iterator
+ * instead.  The xas_for_each_marked() iterator will expand into more inline
+ * code than xa_for_each_marked().
+ *
+ * Context: Any context.  Takes and releases the RCU lock.
+ */
+#define xa_for_each_marked(xa, index, entry, filter) \
+	for (index = 0, entry = xa_find(xa, &index, ULONG_MAX, filter); \
+	     entry; entry = xa_find_after(xa, &index, ULONG_MAX, filter))
 
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index a885afde0aef..dc02eff562b8 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -357,7 +357,7 @@ static noinline void check_cmpxchg(struct xarray *xa)
 static noinline void check_reserve(struct xarray *xa)
 {
 	void *entry;
-	unsigned long index = 0;
+	unsigned long index;
 
 	/* An array with a reserved entry is not empty */
 	XA_BUG_ON(xa, !xa_empty(xa));
@@ -393,7 +393,7 @@ static noinline void check_reserve(struct xarray *xa)
 	xa_reserve(xa, 6, GFP_KERNEL);
 	xa_store_index(xa, 7, GFP_KERNEL);
 
-	xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) {
+	xa_for_each(xa, index, entry) {
 		XA_BUG_ON(xa, index != 5 && index != 7);
 	}
 	xa_destroy(xa);
@@ -812,17 +812,16 @@ static noinline void check_find_1(struct xarray *xa)
 static noinline void check_find_2(struct xarray *xa)
 {
 	void *entry;
-	unsigned long i, j, index = 0;
+	unsigned long i, j, index;
 
-	xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) {
+	xa_for_each(xa, index, entry) {
 		XA_BUG_ON(xa, true);
 	}
 
 	for (i = 0; i < 1024; i++) {
 		xa_store_index(xa, index, GFP_KERNEL);
 		j = 0;
-		index = 0;
-		xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) {
+		xa_for_each(xa, index, entry) {
 			XA_BUG_ON(xa, xa_mk_index(index) != entry);
 			XA_BUG_ON(xa, index != j++);
 		}
-- 
cgit v1.2.3-71-gd317


From 76b4e52995654af260f14558e0e07b5b039ae202 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 28 Dec 2018 23:20:44 -0500
Subject: XArray: Permit storing 2-byte-aligned pointers

On m68k, statically allocated pointers may only be two-byte aligned.
This clashes with the XArray's method for tagging internal pointers.
Permit storing these pointers in single slots (ie not in multislots).

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 18 +++++++++++++++---
 lib/test_xarray.c      | 30 ++++++++++++++++++++++++++++++
 lib/xarray.c           | 22 +++++++++++++---------
 3 files changed, 58 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 3d0ce8b267e3..435c25b29079 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -176,7 +176,8 @@ static inline bool xa_is_internal(const void *entry)
  */
 static inline bool xa_is_err(const void *entry)
 {
-	return unlikely(xa_is_internal(entry));
+	return unlikely(xa_is_internal(entry) &&
+			(unsigned long)entry >= -((MAX_ERRNO << 2) + 2));
 }
 
 /**
@@ -1039,8 +1040,8 @@ static inline bool xa_is_sibling(const void *entry)
 		(entry < xa_mk_sibling(XA_CHUNK_SIZE - 1));
 }
 
-#define XA_ZERO_ENTRY		xa_mk_internal(256)
-#define XA_RETRY_ENTRY		xa_mk_internal(257)
+#define XA_RETRY_ENTRY		xa_mk_internal(256)
+#define XA_ZERO_ENTRY		xa_mk_internal(257)
 
 /**
  * xa_is_zero() - Is the entry a zero entry?
@@ -1064,6 +1065,17 @@ static inline bool xa_is_retry(const void *entry)
 	return unlikely(entry == XA_RETRY_ENTRY);
 }
 
+/**
+ * xa_is_advanced() - Is the entry only permitted for the advanced API?
+ * @entry: Entry to be stored in the XArray.
+ *
+ * Return: %true if the entry cannot be stored by the normal API.
+ */
+static inline bool xa_is_advanced(const void *entry)
+{
+	return xa_is_internal(entry) && (entry <= XA_RETRY_ENTRY);
+}
+
 /**
  * typedef xa_update_node_t - A callback function from the XArray.
  * @node: The node which is being processed
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index dc02eff562b8..6e0212a60b08 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -1184,6 +1184,35 @@ static noinline void check_store_range(struct xarray *xa)
 	}
 }
 
+static void check_align_1(struct xarray *xa, char *name)
+{
+	int i;
+	unsigned int id;
+	unsigned long index;
+	void *entry;
+
+	for (i = 0; i < 8; i++) {
+		id = 0;
+		XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, name + i, GFP_KERNEL)
+				!= 0);
+		XA_BUG_ON(xa, id != i);
+	}
+	xa_for_each(xa, index, entry)
+		XA_BUG_ON(xa, xa_is_err(entry));
+	xa_destroy(xa);
+}
+
+static noinline void check_align(struct xarray *xa)
+{
+	char name[] = "Motorola 68000";
+
+	check_align_1(xa, name);
+	check_align_1(xa, name + 1);
+	check_align_1(xa, name + 2);
+	check_align_1(xa, name + 3);
+//	check_align_2(xa, name);
+}
+
 static LIST_HEAD(shadow_nodes);
 
 static void test_update_node(struct xa_node *node)
@@ -1333,6 +1362,7 @@ static int xarray_checks(void)
 	check_create_range(&array);
 	check_store_range(&array);
 	check_store_iter(&array);
+	check_align(&xa0);
 
 	check_workingset(&array, 0);
 	check_workingset(&array, 64);
diff --git a/lib/xarray.c b/lib/xarray.c
index dda6026d202e..bffa26b1f0d6 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -232,6 +232,8 @@ void *xas_load(struct xa_state *xas)
 		if (xas->xa_shift > node->shift)
 			break;
 		entry = xas_descend(xas, node);
+		if (node->shift == 0)
+			break;
 	}
 	return entry;
 }
@@ -506,7 +508,7 @@ static void xas_free_nodes(struct xa_state *xas, struct xa_node *top)
 	for (;;) {
 		void *entry = xa_entry_locked(xas->xa, node, offset);
 
-		if (xa_is_node(entry)) {
+		if (node->shift && xa_is_node(entry)) {
 			node = xa_to_node(entry);
 			offset = 0;
 			continue;
@@ -604,6 +606,7 @@ static int xas_expand(struct xa_state *xas, void *head)
 /*
  * xas_create() - Create a slot to store an entry in.
  * @xas: XArray operation state.
+ * @allow_root: %true if we can store the entry in the root directly
  *
  * Most users will not need to call this function directly, as it is called
  * by xas_store().  It is useful for doing conditional store operations
@@ -613,7 +616,7 @@ static int xas_expand(struct xa_state *xas, void *head)
  * If the slot was newly created, returns %NULL.  If it failed to create the
  * slot, returns %NULL and indicates the error in @xas.
  */
-static void *xas_create(struct xa_state *xas)
+static void *xas_create(struct xa_state *xas, bool allow_root)
 {
 	struct xarray *xa = xas->xa;
 	void *entry;
@@ -628,6 +631,8 @@ static void *xas_create(struct xa_state *xas)
 		shift = xas_expand(xas, entry);
 		if (shift < 0)
 			return NULL;
+		if (!shift && !allow_root)
+			shift = XA_CHUNK_SHIFT;
 		entry = xa_head_locked(xa);
 		slot = &xa->xa_head;
 	} else if (xas_error(xas)) {
@@ -687,7 +692,7 @@ void xas_create_range(struct xa_state *xas)
 	xas->xa_sibs = 0;
 
 	for (;;) {
-		xas_create(xas);
+		xas_create(xas, true);
 		if (xas_error(xas))
 			goto restore;
 		if (xas->xa_index <= (index | XA_CHUNK_MASK))
@@ -754,7 +759,7 @@ void *xas_store(struct xa_state *xas, void *entry)
 	bool value = xa_is_value(entry);
 
 	if (entry)
-		first = xas_create(xas);
+		first = xas_create(xas, !xa_is_node(entry));
 	else
 		first = xas_load(xas);
 
@@ -1279,7 +1284,6 @@ static void *xas_result(struct xa_state *xas, void *curr)
 {
 	if (xa_is_zero(curr))
 		return NULL;
-	XA_NODE_BUG_ON(xas->xa_node, xa_is_internal(curr));
 	if (xas_error(xas))
 		curr = xas->xa_node;
 	return curr;
@@ -1349,7 +1353,7 @@ void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 	XA_STATE(xas, xa, index);
 	void *curr;
 
-	if (WARN_ON_ONCE(xa_is_internal(entry)))
+	if (WARN_ON_ONCE(xa_is_advanced(entry)))
 		return XA_ERROR(-EINVAL);
 	if (xa_track_free(xa) && !entry)
 		entry = XA_ZERO_ENTRY;
@@ -1415,7 +1419,7 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
 	XA_STATE(xas, xa, index);
 	void *curr;
 
-	if (WARN_ON_ONCE(xa_is_internal(entry)))
+	if (WARN_ON_ONCE(xa_is_advanced(entry)))
 		return XA_ERROR(-EINVAL);
 	if (xa_track_free(xa) && !entry)
 		entry = XA_ZERO_ENTRY;
@@ -1538,7 +1542,7 @@ void *xa_store_range(struct xarray *xa, unsigned long first,
 			if (last + 1)
 				order = __ffs(last + 1);
 			xas_set_order(&xas, last, order);
-			xas_create(&xas);
+			xas_create(&xas, true);
 			if (xas_error(&xas))
 				goto unlock;
 		}
@@ -1580,7 +1584,7 @@ int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp)
 	XA_STATE(xas, xa, 0);
 	int err;
 
-	if (WARN_ON_ONCE(xa_is_internal(entry)))
+	if (WARN_ON_ONCE(xa_is_advanced(entry)))
 		return -EINVAL;
 	if (WARN_ON_ONCE(!xa_track_free(xa)))
 		return -EINVAL;
-- 
cgit v1.2.3-71-gd317


From b0606fed6eece16a421034eca0bbea9a08b90e91 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 2 Jan 2019 13:57:03 -0500
Subject: XArray: Honour reserved entries in xa_insert

xa_insert() should treat reserved entries as occupied, not as available.
Also, it should treat requests to insert a NULL pointer as a request
to reserve the slot.  Add xa_insert_bh() and xa_insert_irq() for
completeness.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst |  15 +++---
 include/linux/xarray.h            | 110 ++++++++++++++++++++++++--------------
 lib/test_xarray.c                 |   8 +--
 lib/xarray.c                      |  41 ++++++++++++++
 4 files changed, 126 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index 6a6d67acaf69..5d54b27c6eba 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -108,12 +108,13 @@ some, but not all of the other indices changing.
 
 Sometimes you need to ensure that a subsequent call to :c:func:`xa_store`
 will not need to allocate memory.  The :c:func:`xa_reserve` function
-will store a reserved entry at the indicated index.  Users of the normal
-API will see this entry as containing ``NULL``.  If you do not need to
-use the reserved entry, you can call :c:func:`xa_release` to remove the
-unused entry.  If another user has stored to the entry in the meantime,
-:c:func:`xa_release` will do nothing; if instead you want the entry to
-become ``NULL``, you should use :c:func:`xa_erase`.
+will store a reserved entry at the indicated index.  Users of the
+normal API will see this entry as containing ``NULL``.  If you do
+not need to use the reserved entry, you can call :c:func:`xa_release`
+to remove the unused entry.  If another user has stored to the entry
+in the meantime, :c:func:`xa_release` will do nothing; if instead you
+want the entry to become ``NULL``, you should use :c:func:`xa_erase`.
+Using :c:func:`xa_insert` on a reserved entry will fail.
 
 If all entries in the array are ``NULL``, the :c:func:`xa_empty` function
 will return ``true``.
@@ -183,6 +184,8 @@ Takes xa_lock internally:
  * :c:func:`xa_store_bh`
  * :c:func:`xa_store_irq`
  * :c:func:`xa_insert`
+ * :c:func:`xa_insert_bh`
+ * :c:func:`xa_insert_irq`
  * :c:func:`xa_erase`
  * :c:func:`xa_erase_bh`
  * :c:func:`xa_erase_irq`
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 435c25b29079..12244aa98a69 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -463,39 +463,12 @@ void *__xa_erase(struct xarray *, unsigned long index);
 void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old,
 		void *entry, gfp_t);
+int __xa_insert(struct xarray *, unsigned long index, void *entry, gfp_t);
 int __xa_alloc(struct xarray *, u32 *id, u32 max, void *entry, gfp_t);
 int __xa_reserve(struct xarray *, unsigned long index, gfp_t);
 void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
 
-/**
- * __xa_insert() - Store this entry in the XArray unless another entry is
- *			already present.
- * @xa: XArray.
- * @index: Index into array.
- * @entry: New entry.
- * @gfp: Memory allocation flags.
- *
- * If you would rather see the existing entry in the array, use __xa_cmpxchg().
- * This function is for users who don't care what the entry is, only that
- * one is present.
- *
- * Context: Any context.  Expects xa_lock to be held on entry.  May
- *	    release and reacquire xa_lock if the @gfp flags permit.
- * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
- * -ENOMEM if memory could not be allocated.
- */
-static inline int __xa_insert(struct xarray *xa, unsigned long index,
-		void *entry, gfp_t gfp)
-{
-	void *curr = __xa_cmpxchg(xa, index, NULL, entry, gfp);
-	if (!curr)
-		return 0;
-	if (xa_is_err(curr))
-		return xa_err(curr);
-	return -EEXIST;
-}
-
 /**
  * xa_store_bh() - Store this entry in the XArray.
  * @xa: XArray.
@@ -685,24 +658,83 @@ static inline void *xa_cmpxchg_irq(struct xarray *xa, unsigned long index,
  * @entry: New entry.
  * @gfp: Memory allocation flags.
  *
- * If you would rather see the existing entry in the array, use xa_cmpxchg().
- * This function is for users who don't care what the entry is, only that
- * one is present.
+ * Inserting a NULL entry will store a reserved entry (like xa_reserve())
+ * if no entry is present.  Inserting will fail if a reserved entry is
+ * present, even though loading from this index will return NULL.
  *
- * Context: Process context.  Takes and releases the xa_lock.
- *	    May sleep if the @gfp flags permit.
+ * Context: Any context.  Takes and releases the xa_lock.  May sleep if
+ * the @gfp flags permit.
  * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
  * -ENOMEM if memory could not be allocated.
  */
 static inline int xa_insert(struct xarray *xa, unsigned long index,
 		void *entry, gfp_t gfp)
 {
-	void *curr = xa_cmpxchg(xa, index, NULL, entry, gfp);
-	if (!curr)
-		return 0;
-	if (xa_is_err(curr))
-		return xa_err(curr);
-	return -EEXIST;
+	int err;
+
+	xa_lock(xa);
+	err = __xa_insert(xa, index, entry, gfp);
+	xa_unlock(xa);
+
+	return err;
+}
+
+/**
+ * xa_insert_bh() - Store this entry in the XArray unless another entry is
+ *			already present.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * Inserting a NULL entry will store a reserved entry (like xa_reserve())
+ * if no entry is present.  Inserting will fail if a reserved entry is
+ * present, even though loading from this index will return NULL.
+ *
+ * Context: Any context.  Takes and releases the xa_lock while
+ * disabling softirqs.  May sleep if the @gfp flags permit.
+ * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
+ * -ENOMEM if memory could not be allocated.
+ */
+static inline int xa_insert_bh(struct xarray *xa, unsigned long index,
+		void *entry, gfp_t gfp)
+{
+	int err;
+
+	xa_lock_bh(xa);
+	err = __xa_insert(xa, index, entry, gfp);
+	xa_unlock_bh(xa);
+
+	return err;
+}
+
+/**
+ * xa_insert_irq() - Store this entry in the XArray unless another entry is
+ *			already present.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * Inserting a NULL entry will store a reserved entry (like xa_reserve())
+ * if no entry is present.  Inserting will fail if a reserved entry is
+ * present, even though loading from this index will return NULL.
+ *
+ * Context: Process context.  Takes and releases the xa_lock while
+ * disabling interrupts.  May sleep if the @gfp flags permit.
+ * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
+ * -ENOMEM if memory could not be allocated.
+ */
+static inline int xa_insert_irq(struct xarray *xa, unsigned long index,
+		void *entry, gfp_t gfp)
+{
+	int err;
+
+	xa_lock_irq(xa);
+	err = __xa_insert(xa, index, entry, gfp);
+	xa_unlock_irq(xa);
+
+	return err;
 }
 
 /**
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 6e0212a60b08..3cf17338b0a4 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -382,10 +382,12 @@ static noinline void check_reserve(struct xarray *xa)
 	xa_erase_index(xa, 12345678);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
-	/* And so does xa_insert */
+	/* But xa_insert does not */
 	xa_reserve(xa, 12345678, GFP_KERNEL);
-	XA_BUG_ON(xa, xa_insert(xa, 12345678, xa_mk_value(12345678), 0) != 0);
-	xa_erase_index(xa, 12345678);
+	XA_BUG_ON(xa, xa_insert(xa, 12345678, xa_mk_value(12345678), 0) !=
+			-EEXIST);
+	XA_BUG_ON(xa, xa_empty(xa));
+	XA_BUG_ON(xa, xa_erase(xa, 12345678) != NULL);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	/* Can iterate through a reserved entry */
diff --git a/lib/xarray.c b/lib/xarray.c
index bffa26b1f0d6..81c3171ddde9 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1439,6 +1439,47 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
 }
 EXPORT_SYMBOL(__xa_cmpxchg);
 
+/**
+ * __xa_insert() - Store this entry in the XArray if no entry is present.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * Inserting a NULL entry will store a reserved entry (like xa_reserve())
+ * if no entry is present.  Inserting will fail if a reserved entry is
+ * present, even though loading from this index will return NULL.
+ *
+ * Context: Any context.  Expects xa_lock to be held on entry.  May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
+ * -ENOMEM if memory could not be allocated.
+ */
+int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
+{
+	XA_STATE(xas, xa, index);
+	void *curr;
+
+	if (WARN_ON_ONCE(xa_is_advanced(entry)))
+		return -EINVAL;
+	if (!entry)
+		entry = XA_ZERO_ENTRY;
+
+	do {
+		curr = xas_load(&xas);
+		if (!curr) {
+			xas_store(&xas, entry);
+			if (xa_track_free(xa))
+				xas_clear_mark(&xas, XA_FREE_MARK);
+		} else {
+			xas_set_err(&xas, -EEXIST);
+		}
+	} while (__xas_nomem(&xas, gfp));
+
+	return xas_error(&xas);
+}
+EXPORT_SYMBOL(__xa_insert);
+
 /**
  * __xa_reserve() - Reserve this index in the XArray.
  * @xa: XArray.
-- 
cgit v1.2.3-71-gd317


From 1cb95e072ede5e3d6a54eefd520db21b45985896 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Jan 2019 15:34:52 -0800
Subject: libnvdimm/dimm: Fix security capability detection for non-Intel
 NVDIMMs

Kees reports a crash with the following signature...

 RIP: 0010:nvdimm_visible+0x79/0x80
 [..]
 Call Trace:
  internal_create_group+0xf4/0x380
  sysfs_create_groups+0x46/0xb0
  device_add+0x331/0x680
  nd_async_device_register+0x15/0x60
  async_run_entry_fn+0x38/0x100

...when starting a QEMU environment with "label-less" DIMM. Without
labels QEMU does not publish any DSM methods. Without defined methods
the NVDIMM_FAMILY type is not established and the nfit driver will skip
registering security operations.

In that case the security state should be initialized to a negative
value in __nvdimm_create() and nvdimm_visible() should skip
interrogating the specific ops. However, since 'enum
nvdimm_security_state' was only defined to contain positive values the
"if (nvdimm->sec.state < 0)" check always fails.

Define a negative error state to allow negative state values to be
handled as expected.

Fixes: f2989396553a ("acpi/nfit, libnvdimm: Introduce nvdimm_security_ops")
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reported-by: Kees Cook <keescook@chromium.org>
Tested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 5440f11b0907..7315977b64da 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -160,6 +160,7 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
 }
 
 enum nvdimm_security_state {
+	NVDIMM_SECURITY_ERROR = -1,
 	NVDIMM_SECURITY_DISABLED,
 	NVDIMM_SECURITY_UNLOCKED,
 	NVDIMM_SECURITY_LOCKED,
-- 
cgit v1.2.3-71-gd317


From 3e2ffd655cc6a694608d997738989ff5572a8266 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 2 Jan 2019 15:57:49 -0500
Subject: include/linux/compiler*.h: fix OPTIMIZER_HIDE_VAR

Since commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h
mutually exclusive") clang no longer reuses the OPTIMIZER_HIDE_VAR macro
from compiler-gcc - instead it gets the version in
include/linux/compiler.h.  Unfortunately that version doesn't actually
prevent compiler from optimizing out the variable.

Fix up by moving the macro out from compiler-gcc.h to compiler.h.
Compilers without incline asm support will keep working
since it's protected by an ifdef.

Also fix up comments to match reality since we are no longer overriding
any macros.

Build-tested with gcc and clang.

Fixes: 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive")
Cc: Eli Friedman <efriedma@codeaurora.org>
Cc: Joe Perches <joe@perches.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler-clang.h | 5 ++---
 include/linux/compiler-gcc.h   | 4 ----
 include/linux/compiler-intel.h | 4 +---
 include/linux/compiler.h       | 4 +++-
 4 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 39f668d5066b..333a6695a918 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -3,9 +3,8 @@
 #error "Please don't include <linux/compiler-clang.h> directly, include <linux/compiler.h> instead."
 #endif
 
-/* Some compiler specific definitions are overwritten here
- * for Clang compiler
- */
+/* Compiler specific definitions for Clang compiler */
+
 #define uninitialized_var(x) x = *(&(x))
 
 /* same as gcc, this was present in clang-2.6 so we can assume it works
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 5776da43da97..7b834e37d0c0 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -58,10 +58,6 @@
 	(typeof(ptr)) (__ptr + (off));					\
 })
 
-/* Make the optimizer believe the variable can be manipulated arbitrarily. */
-#define OPTIMIZER_HIDE_VAR(var)						\
-	__asm__ ("" : "=r" (var) : "0" (var))
-
 /*
  * A trick to suppress uninitialized variable warning without generating any
  * code
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index 517bd14e1222..b17f3cd18334 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -5,9 +5,7 @@
 
 #ifdef __ECC
 
-/* Some compiler specific definitions are overwritten here
- * for Intel ECC compiler
- */
+/* Compiler specific definitions for Intel ECC compiler */
 
 #include <asm/intrinsics.h>
 
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index fc5004a4b07d..445348facea9 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -161,7 +161,9 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 #endif
 
 #ifndef OPTIMIZER_HIDE_VAR
-#define OPTIMIZER_HIDE_VAR(var) barrier()
+/* Make the optimizer believe the variable can be manipulated arbitrarily. */
+#define OPTIMIZER_HIDE_VAR(var)						\
+	__asm__ ("" : "=r" (var) : "0" (var))
 #endif
 
 /* Not-quite-unique ID. */
-- 
cgit v1.2.3-71-gd317


From ba50bf1ce9a51fc97db58b96d01306aa70bc3979 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Mon, 17 Dec 2018 20:16:09 +0000
Subject: Drivers: hv: vmbus: Check for ring when getting debug info

fc96df16a1ce is good and can already fix the "return stack garbage" issue,
but let's also improve hv_ringbuffer_get_debuginfo(), which would silently
return stack garbage, if people forget to check channel->state or
ring_info->ring_buffer, when using the function in the future.

Having an error check in the function would eliminate the potential risk.

Add a Fixes tag to indicate the patch depdendency.

Fixes: fc96df16a1ce ("Drivers: hv: vmbus: Return -EINVAL for the sys files for unopened channels")
Cc: stable@vger.kernel.org
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/hv/ring_buffer.c | 31 ++++++++---------
 drivers/hv/vmbus_drv.c   | 91 ++++++++++++++++++++++++++++++++----------------
 include/linux/hyperv.h   |  5 +--
 3 files changed, 79 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 64d0c85d5161..1f1a55e07733 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -164,26 +164,25 @@ hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi,
 }
 
 /* Get various debug metrics for the specified ring buffer. */
-void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info,
-				 struct hv_ring_buffer_debug_info *debug_info)
+int hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info,
+				struct hv_ring_buffer_debug_info *debug_info)
 {
 	u32 bytes_avail_towrite;
 	u32 bytes_avail_toread;
 
-	if (ring_info->ring_buffer) {
-		hv_get_ringbuffer_availbytes(ring_info,
-					&bytes_avail_toread,
-					&bytes_avail_towrite);
-
-		debug_info->bytes_avail_toread = bytes_avail_toread;
-		debug_info->bytes_avail_towrite = bytes_avail_towrite;
-		debug_info->current_read_index =
-			ring_info->ring_buffer->read_index;
-		debug_info->current_write_index =
-			ring_info->ring_buffer->write_index;
-		debug_info->current_interrupt_mask =
-			ring_info->ring_buffer->interrupt_mask;
-	}
+	if (!ring_info->ring_buffer)
+		return -EINVAL;
+
+	hv_get_ringbuffer_availbytes(ring_info,
+				     &bytes_avail_toread,
+				     &bytes_avail_towrite);
+	debug_info->bytes_avail_toread = bytes_avail_toread;
+	debug_info->bytes_avail_towrite = bytes_avail_towrite;
+	debug_info->current_read_index = ring_info->ring_buffer->read_index;
+	debug_info->current_write_index = ring_info->ring_buffer->write_index;
+	debug_info->current_interrupt_mask
+		= ring_info->ring_buffer->interrupt_mask;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(hv_ringbuffer_get_debuginfo);
 
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index d0ff65675292..403fee01572c 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -313,12 +313,16 @@ static ssize_t out_intr_mask_show(struct device *dev,
 {
 	struct hv_device *hv_dev = device_to_hv_device(dev);
 	struct hv_ring_buffer_debug_info outbound;
+	int ret;
 
 	if (!hv_dev->channel)
 		return -ENODEV;
-	if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
-		return -EINVAL;
-	hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
+
+	ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound,
+					  &outbound);
+	if (ret < 0)
+		return ret;
+
 	return sprintf(buf, "%d\n", outbound.current_interrupt_mask);
 }
 static DEVICE_ATTR_RO(out_intr_mask);
@@ -328,12 +332,15 @@ static ssize_t out_read_index_show(struct device *dev,
 {
 	struct hv_device *hv_dev = device_to_hv_device(dev);
 	struct hv_ring_buffer_debug_info outbound;
+	int ret;
 
 	if (!hv_dev->channel)
 		return -ENODEV;
-	if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
-		return -EINVAL;
-	hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
+
+	ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound,
+					  &outbound);
+	if (ret < 0)
+		return ret;
 	return sprintf(buf, "%d\n", outbound.current_read_index);
 }
 static DEVICE_ATTR_RO(out_read_index);
@@ -344,12 +351,15 @@ static ssize_t out_write_index_show(struct device *dev,
 {
 	struct hv_device *hv_dev = device_to_hv_device(dev);
 	struct hv_ring_buffer_debug_info outbound;
+	int ret;
 
 	if (!hv_dev->channel)
 		return -ENODEV;
-	if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
-		return -EINVAL;
-	hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
+
+	ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound,
+					  &outbound);
+	if (ret < 0)
+		return ret;
 	return sprintf(buf, "%d\n", outbound.current_write_index);
 }
 static DEVICE_ATTR_RO(out_write_index);
@@ -360,12 +370,15 @@ static ssize_t out_read_bytes_avail_show(struct device *dev,
 {
 	struct hv_device *hv_dev = device_to_hv_device(dev);
 	struct hv_ring_buffer_debug_info outbound;
+	int ret;
 
 	if (!hv_dev->channel)
 		return -ENODEV;
-	if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
-		return -EINVAL;
-	hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
+
+	ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound,
+					  &outbound);
+	if (ret < 0)
+		return ret;
 	return sprintf(buf, "%d\n", outbound.bytes_avail_toread);
 }
 static DEVICE_ATTR_RO(out_read_bytes_avail);
@@ -376,12 +389,15 @@ static ssize_t out_write_bytes_avail_show(struct device *dev,
 {
 	struct hv_device *hv_dev = device_to_hv_device(dev);
 	struct hv_ring_buffer_debug_info outbound;
+	int ret;
 
 	if (!hv_dev->channel)
 		return -ENODEV;
-	if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
-		return -EINVAL;
-	hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
+
+	ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound,
+					  &outbound);
+	if (ret < 0)
+		return ret;
 	return sprintf(buf, "%d\n", outbound.bytes_avail_towrite);
 }
 static DEVICE_ATTR_RO(out_write_bytes_avail);
@@ -391,12 +407,15 @@ static ssize_t in_intr_mask_show(struct device *dev,
 {
 	struct hv_device *hv_dev = device_to_hv_device(dev);
 	struct hv_ring_buffer_debug_info inbound;
+	int ret;
 
 	if (!hv_dev->channel)
 		return -ENODEV;
-	if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
-		return -EINVAL;
-	hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+
+	ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+	if (ret < 0)
+		return ret;
+
 	return sprintf(buf, "%d\n", inbound.current_interrupt_mask);
 }
 static DEVICE_ATTR_RO(in_intr_mask);
@@ -406,12 +425,15 @@ static ssize_t in_read_index_show(struct device *dev,
 {
 	struct hv_device *hv_dev = device_to_hv_device(dev);
 	struct hv_ring_buffer_debug_info inbound;
+	int ret;
 
 	if (!hv_dev->channel)
 		return -ENODEV;
-	if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
-		return -EINVAL;
-	hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+
+	ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+	if (ret < 0)
+		return ret;
+
 	return sprintf(buf, "%d\n", inbound.current_read_index);
 }
 static DEVICE_ATTR_RO(in_read_index);
@@ -421,12 +443,15 @@ static ssize_t in_write_index_show(struct device *dev,
 {
 	struct hv_device *hv_dev = device_to_hv_device(dev);
 	struct hv_ring_buffer_debug_info inbound;
+	int ret;
 
 	if (!hv_dev->channel)
 		return -ENODEV;
-	if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
-		return -EINVAL;
-	hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+
+	ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+	if (ret < 0)
+		return ret;
+
 	return sprintf(buf, "%d\n", inbound.current_write_index);
 }
 static DEVICE_ATTR_RO(in_write_index);
@@ -437,12 +462,15 @@ static ssize_t in_read_bytes_avail_show(struct device *dev,
 {
 	struct hv_device *hv_dev = device_to_hv_device(dev);
 	struct hv_ring_buffer_debug_info inbound;
+	int ret;
 
 	if (!hv_dev->channel)
 		return -ENODEV;
-	if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
-		return -EINVAL;
-	hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+
+	ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+	if (ret < 0)
+		return ret;
+
 	return sprintf(buf, "%d\n", inbound.bytes_avail_toread);
 }
 static DEVICE_ATTR_RO(in_read_bytes_avail);
@@ -453,12 +481,15 @@ static ssize_t in_write_bytes_avail_show(struct device *dev,
 {
 	struct hv_device *hv_dev = device_to_hv_device(dev);
 	struct hv_ring_buffer_debug_info inbound;
+	int ret;
 
 	if (!hv_dev->channel)
 		return -ENODEV;
-	if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
-		return -EINVAL;
-	hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+
+	ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+	if (ret < 0)
+		return ret;
+
 	return sprintf(buf, "%d\n", inbound.bytes_avail_towrite);
 }
 static DEVICE_ATTR_RO(in_write_bytes_avail);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index f0885cc01db6..dcb6977afce9 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1159,8 +1159,9 @@ struct hv_ring_buffer_debug_info {
 	u32 bytes_avail_towrite;
 };
 
-void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info,
-			    struct hv_ring_buffer_debug_info *debug_info);
+
+int hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info,
+				struct hv_ring_buffer_debug_info *debug_info);
 
 /* Vmbus interface */
 #define vmbus_driver_register(driver)	\
-- 
cgit v1.2.3-71-gd317


From 321c46b91550adc03054125fa7a1639390608e1a Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Thu, 3 Jan 2019 08:34:17 +0100
Subject: MIPS: BCM47XX: Setup struct device for the SoC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

So far we never had any device registered for the SoC. This resulted in
some small issues that we kept ignoring like:
1) Not working GPIOLIB_IRQCHIP (gpiochip_irqchip_add_key() failing)
2) Lack of proper tree in the /sys/devices/
3) mips_dma_alloc_coherent() silently handling empty coherent_dma_mask

Kernel 4.19 came with a lot of DMA changes and caused a regression on
bcm47xx. Starting with the commit f8c55dc6e828 ("MIPS: use generic dma
noncoherent ops for simple noncoherent platforms") DMA coherent
allocations just fail. Example:
[    1.114914] bgmac_bcma bcma0:2: Allocation of TX ring 0x200 failed
[    1.121215] bgmac_bcma bcma0:2: Unable to alloc memory for DMA
[    1.127626] bgmac_bcma: probe of bcma0:2 failed with error -12
[    1.133838] bgmac_bcma: Broadcom 47xx GBit MAC driver loaded

The bgmac driver also triggers a WARNING:
[    0.959486] ------------[ cut here ]------------
[    0.964387] WARNING: CPU: 0 PID: 1 at ./include/linux/dma-mapping.h:516 bgmac_enet_probe+0x1b4/0x5c4
[    0.973751] Modules linked in:
[    0.976913] CPU: 0 PID: 1 Comm: swapper Not tainted 4.19.9 #0
[    0.982750] Stack : 804a0000 804597c4 00000000 00000000 80458fd8 8381bc2c 838282d4 80481a47
[    0.991367]         8042e3ec 00000001 804d38f0 00000204 83980000 00000065 8381bbe0 6f55b24f
[    0.999975]         00000000 00000000 80520000 00002018 00000000 00000075 00000007 00000000
[    1.008583]         00000000 80480000 000ee811 00000000 00000000 00000000 80432c00 80248db8
[    1.017196]         00000009 00000204 83980000 803ad7b0 00000000 801feeec 00000000 804d0000
[    1.025804]         ...
[    1.028325] Call Trace:
[    1.030875] [<8000aef8>] show_stack+0x58/0x100
[    1.035513] [<8001f8b4>] __warn+0xe4/0x118
[    1.039708] [<8001f9a4>] warn_slowpath_null+0x48/0x64
[    1.044935] [<80248db8>] bgmac_enet_probe+0x1b4/0x5c4
[    1.050101] [<802498e0>] bgmac_probe+0x558/0x590
[    1.054906] [<80252fd0>] bcma_device_probe+0x38/0x70
[    1.060017] [<8020e1e8>] really_probe+0x170/0x2e8
[    1.064891] [<8020e714>] __driver_attach+0xa4/0xec
[    1.069784] [<8020c1e0>] bus_for_each_dev+0x58/0xb0
[    1.074833] [<8020d590>] bus_add_driver+0xf8/0x218
[    1.079731] [<8020ef24>] driver_register+0xcc/0x11c
[    1.084804] [<804b54cc>] bgmac_init+0x1c/0x44
[    1.089258] [<8000121c>] do_one_initcall+0x7c/0x1a0
[    1.094343] [<804a1d34>] kernel_init_freeable+0x150/0x218
[    1.099886] [<803a082c>] kernel_init+0x10/0x104
[    1.104583] [<80005878>] ret_from_kernel_thread+0x14/0x1c
[    1.110107] ---[ end trace f441c0d873d1fb5b ]---

This patch setups a "struct device" (and passes it to the bcma) which
allows fixing all the mentioned problems. It'll also require a tiny bcma
patch which will follow through the wireless tree & its maintainer.

Fixes: f8c55dc6e828 ("MIPS: use generic dma noncoherent ops for simple noncoherent platforms")
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: linux-wireless@vger.kernel.org
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Cc: stable@vger.kernel.org # v4.19+
---
 arch/mips/bcm47xx/setup.c     | 31 +++++++++++++++++++++++++++++++
 include/linux/bcma/bcma_soc.h |  1 +
 2 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index 6054d49e608e..fe3773539eff 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -173,6 +173,31 @@ void __init plat_mem_setup(void)
 	pm_power_off = bcm47xx_machine_halt;
 }
 
+#ifdef CONFIG_BCM47XX_BCMA
+static struct device * __init bcm47xx_setup_device(void)
+{
+	struct device *dev;
+	int err;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return NULL;
+
+	err = dev_set_name(dev, "bcm47xx_soc");
+	if (err) {
+		pr_err("Failed to set SoC device name: %d\n", err);
+		kfree(dev);
+		return NULL;
+	}
+
+	err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32));
+	if (err)
+		pr_err("Failed to set SoC DMA mask: %d\n", err);
+
+	return dev;
+}
+#endif
+
 /*
  * This finishes bus initialization doing things that were not possible without
  * kmalloc. Make sure to call it late enough (after mm_init).
@@ -183,6 +208,10 @@ void __init bcm47xx_bus_setup(void)
 	if (bcm47xx_bus_type == BCM47XX_BUS_TYPE_BCMA) {
 		int err;
 
+		bcm47xx_bus.bcma.dev = bcm47xx_setup_device();
+		if (!bcm47xx_bus.bcma.dev)
+			panic("Failed to setup SoC device\n");
+
 		err = bcma_host_soc_init(&bcm47xx_bus.bcma);
 		if (err)
 			panic("Failed to initialize BCMA bus (err %d)", err);
@@ -235,6 +264,8 @@ static int __init bcm47xx_register_bus_complete(void)
 #endif
 #ifdef CONFIG_BCM47XX_BCMA
 	case BCM47XX_BUS_TYPE_BCMA:
+		if (device_register(bcm47xx_bus.bcma.dev))
+			pr_err("Failed to register SoC device\n");
 		bcma_bus_register(&bcm47xx_bus.bcma.bus);
 		break;
 #endif
diff --git a/include/linux/bcma/bcma_soc.h b/include/linux/bcma/bcma_soc.h
index 7cca5f859a90..f3c43519baa7 100644
--- a/include/linux/bcma/bcma_soc.h
+++ b/include/linux/bcma/bcma_soc.h
@@ -6,6 +6,7 @@
 
 struct bcma_soc {
 	struct bcma_bus bus;
+	struct device *dev;
 };
 
 int __init bcma_host_soc_register(struct bcma_soc *soc);
-- 
cgit v1.2.3-71-gd317


From ee46967fc6e74d412fe1ec15f77fdb8624bde2b0 Mon Sep 17 00:00:00 2001
From: Peter Hutterer <peter.hutterer@who-t.net>
Date: Wed, 9 Jan 2019 13:50:18 +1000
Subject: HID: core: replace the collection tree pointers with indices

Previously, the pointer to the parent collection was stored. If a device
exceeds 16 collections (HID_DEFAULT_NUM_COLLECTIONS), the array to store
the collections is reallocated, the pointer to the parent collection becomes
invalid.

Replace the pointers with an index-based lookup into the collections array.

Fixes: c53431eb696f3c ("HID: core: store the collections as a basic tree")
Reported-by: Pandruvada, Srinivas <srinivas.pandruvada@intel.com>
Signed-off-by: Peter Hutterer <peter.hutterer@who-t.net>
Tested-by: Kyle Pelton <kyle.d.pelton@linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 32 +++++++++++++++++++++-----------
 include/linux/hid.h    |  4 ++--
 2 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index f41d5fe51abe..f9093dedf647 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -125,6 +125,7 @@ static int open_collection(struct hid_parser *parser, unsigned type)
 {
 	struct hid_collection *collection;
 	unsigned usage;
+	int collection_index;
 
 	usage = parser->local.usage[0];
 
@@ -167,13 +168,13 @@ static int open_collection(struct hid_parser *parser, unsigned type)
 	parser->collection_stack[parser->collection_stack_ptr++] =
 		parser->device->maxcollection;
 
-	collection = parser->device->collection +
-		parser->device->maxcollection++;
+	collection_index = parser->device->maxcollection++;
+	collection = parser->device->collection + collection_index;
 	collection->type = type;
 	collection->usage = usage;
 	collection->level = parser->collection_stack_ptr - 1;
-	collection->parent = parser->active_collection;
-	parser->active_collection = collection;
+	collection->parent_idx = parser->active_collection_idx;
+	parser->active_collection_idx = collection_index;
 
 	if (type == HID_COLLECTION_APPLICATION)
 		parser->device->maxapplication++;
@@ -192,8 +193,13 @@ static int close_collection(struct hid_parser *parser)
 		return -EINVAL;
 	}
 	parser->collection_stack_ptr--;
-	if (parser->active_collection)
-		parser->active_collection = parser->active_collection->parent;
+	if (parser->active_collection_idx != -1) {
+		struct hid_device *device = parser->device;
+		struct hid_collection *c;
+
+		c = &device->collection[parser->active_collection_idx];
+		parser->active_collection_idx = c->parent_idx;
+	}
 	return 0;
 }
 
@@ -819,6 +825,7 @@ static int hid_scan_report(struct hid_device *hid)
 		return -ENOMEM;
 
 	parser->device = hid;
+	parser->active_collection_idx = -1;
 	hid->group = HID_GROUP_GENERIC;
 
 	/*
@@ -1006,10 +1013,12 @@ static void hid_apply_multiplier_to_field(struct hid_device *hid,
 		usage = &field->usage[i];
 
 		collection = &hid->collection[usage->collection_index];
-		while (collection && collection != multiplier_collection)
-			collection = collection->parent;
+		while (collection->parent_idx != -1 &&
+		       collection != multiplier_collection)
+			collection = &hid->collection[collection->parent_idx];
 
-		if (collection || multiplier_collection == NULL)
+		if (collection->parent_idx != -1 ||
+		    multiplier_collection == NULL)
 			usage->resolution_multiplier = effective_multiplier;
 
 	}
@@ -1044,9 +1053,9 @@ static void hid_apply_multiplier(struct hid_device *hid,
 	 * applicable fields later.
 	 */
 	multiplier_collection = &hid->collection[multiplier->usage->collection_index];
-	while (multiplier_collection &&
+	while (multiplier_collection->parent_idx != -1 &&
 	       multiplier_collection->type != HID_COLLECTION_LOGICAL)
-		multiplier_collection = multiplier_collection->parent;
+		multiplier_collection = &hid->collection[multiplier_collection->parent_idx];
 
 	effective_multiplier = hid_calculate_multiplier(hid, multiplier);
 
@@ -1170,6 +1179,7 @@ int hid_open_report(struct hid_device *device)
 	}
 
 	parser->device = device;
+	parser->active_collection_idx = -1;
 
 	end = start + size;
 
diff --git a/include/linux/hid.h b/include/linux/hid.h
index d99287327ef2..992bbb7196df 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -430,7 +430,7 @@ struct hid_local {
  */
 
 struct hid_collection {
-	struct hid_collection *parent;
+	int parent_idx; /* device->collection */
 	unsigned type;
 	unsigned usage;
 	unsigned level;
@@ -658,7 +658,7 @@ struct hid_parser {
 	unsigned int         *collection_stack;
 	unsigned int          collection_stack_ptr;
 	unsigned int          collection_stack_size;
-	struct hid_collection *active_collection;
+	int                   active_collection_idx; /* device->collection */
 	struct hid_device    *device;
 	unsigned int          scan_flags;
 };
-- 
cgit v1.2.3-71-gd317


From 8ce5f84157530ffa64b3e0acf00b9261f41c8da8 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 11 Dec 2018 14:31:05 -0600
Subject: of: Remove struct device_node.type pointer

Now that all users of device_node.type pointer have been removed in
favor of accessor functions, we can remove it.

Cc: Frank Rowand <frowand.list@gmail.com>
Cc: devicetree@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/of/dynamic.c | 3 ---
 drivers/of/fdt.c     | 4 ----
 drivers/of/overlay.c | 3 ---
 drivers/of/pdt.c     | 1 -
 include/linux/of.h   | 1 -
 5 files changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index a09c1c3cf831..49b16f76d78e 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -207,11 +207,8 @@ static void __of_attach_node(struct device_node *np)
 
 	if (!of_node_check_flag(np, OF_OVERLAY)) {
 		np->name = __of_get_property(np, "name", NULL);
-		np->type = __of_get_property(np, "device_type", NULL);
 		if (!np->name)
 			np->name = "<NULL>";
-		if (!np->type)
-			np->type = "<NULL>";
 
 		phandle = __of_get_property(np, "phandle", &sz);
 		if (!phandle)
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 7099c652c6a5..9cc1461aac7d 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -314,12 +314,8 @@ static bool populate_node(const void *blob,
 	populate_properties(blob, offset, mem, np, pathp, dryrun);
 	if (!dryrun) {
 		np->name = of_get_property(np, "name", NULL);
-		np->type = of_get_property(np, "device_type", NULL);
-
 		if (!np->name)
 			np->name = "<NULL>";
-		if (!np->type)
-			np->type = "<NULL>";
 	}
 
 	*pnp = np;
diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
index 2b5ac43a5690..c423e94baf0f 100644
--- a/drivers/of/overlay.c
+++ b/drivers/of/overlay.c
@@ -423,12 +423,9 @@ static int add_changeset_node(struct overlay_changeset *ovcs,
 
 		tchild->parent = target->np;
 		tchild->name = __of_get_property(node, "name", NULL);
-		tchild->type = __of_get_property(node, "device_type", NULL);
 
 		if (!tchild->name)
 			tchild->name = "<NULL>";
-		if (!tchild->type)
-			tchild->type = "<NULL>";
 
 		/* ignore obsolete "linux,phandle" */
 		phandle = __of_get_property(node, "phandle", &size);
diff --git a/drivers/of/pdt.c b/drivers/of/pdt.c
index d3185063d369..7eda43c66c91 100644
--- a/drivers/of/pdt.c
+++ b/drivers/of/pdt.c
@@ -155,7 +155,6 @@ static struct device_node * __init of_pdt_create_node(phandle node,
 	dp->parent = parent;
 
 	dp->name = of_pdt_get_one_property(node, "name");
-	dp->type = of_pdt_get_one_property(node, "device_type");
 	dp->phandle = node;
 
 	dp->properties = of_pdt_build_prop_list(node);
diff --git a/include/linux/of.h b/include/linux/of.h
index fe472e5195a9..e240992e5cb6 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -50,7 +50,6 @@ struct of_irq_controller;
 
 struct device_node {
 	const char *name;
-	const char *type;
 	phandle phandle;
 	const char *full_name;
 	struct fwnode_handle fwnode;
-- 
cgit v1.2.3-71-gd317


From 73ab1cb2de9e3efe7f818d5453de271e5371df1d Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Wed, 9 Jan 2019 02:23:56 +0900
Subject: umh: add exit routine for UMH process

A UMH process which is created by the fork_usermode_blob() such as
bpfilter needs to release members of the umh_info when process is
terminated.
But the do_exit() does not release members of the umh_info. hence module
which uses UMH needs own code to detect whether UMH process is
terminated or not.
But this implementation needs extra code for checking the status of
UMH process. it eventually makes the code more complex.

The new PF_UMH flag is added and it is used to identify UMH processes.
The exit_umh() does not release members of the umh_info.
Hence umh_info->cleanup callback should release both members of the
umh_info and the private data.

Suggested-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sched.h |  9 +++++++++
 include/linux/umh.h   |  2 ++
 kernel/exit.c         |  1 +
 kernel/umh.c          | 33 +++++++++++++++++++++++++++++++--
 4 files changed, 43 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 89541d248893..e35e35b9fc48 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1406,6 +1406,7 @@ extern struct pid *cad_pid;
 #define PF_RANDOMIZE		0x00400000	/* Randomize virtual address space */
 #define PF_SWAPWRITE		0x00800000	/* Allowed to write to swap */
 #define PF_MEMSTALL		0x01000000	/* Stalled due to lack of memory */
+#define PF_UMH			0x02000000	/* I'm an Usermodehelper process */
 #define PF_NO_SETAFFINITY	0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
 #define PF_MCE_EARLY		0x08000000      /* Early kill for mce process policy */
 #define PF_MUTEX_TESTER		0x20000000	/* Thread belongs to the rt mutex tester */
@@ -1904,6 +1905,14 @@ static inline void rseq_execve(struct task_struct *t)
 
 #endif
 
+void __exit_umh(struct task_struct *tsk);
+
+static inline void exit_umh(struct task_struct *tsk)
+{
+	if (unlikely(tsk->flags & PF_UMH))
+		__exit_umh(tsk);
+}
+
 #ifdef CONFIG_DEBUG_RSEQ
 
 void rseq_syscall(struct pt_regs *regs);
diff --git a/include/linux/umh.h b/include/linux/umh.h
index 235f51b62c71..0c08de356d0d 100644
--- a/include/linux/umh.h
+++ b/include/linux/umh.h
@@ -47,6 +47,8 @@ struct umh_info {
 	const char *cmdline;
 	struct file *pipe_to_umh;
 	struct file *pipe_from_umh;
+	struct list_head list;
+	void (*cleanup)(struct umh_info *info);
 	pid_t pid;
 };
 int fork_usermode_blob(void *data, size_t len, struct umh_info *info);
diff --git a/kernel/exit.c b/kernel/exit.c
index 8a01b671dc1f..dad70419195c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -866,6 +866,7 @@ void __noreturn do_exit(long code)
 	exit_task_namespaces(tsk);
 	exit_task_work(tsk);
 	exit_thread(tsk);
+	exit_umh(tsk);
 
 	/*
 	 * Flush inherited counters to the parent - before the parent
diff --git a/kernel/umh.c b/kernel/umh.c
index 0baa672e023c..d937cbad903a 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -37,6 +37,8 @@ static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;
 static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;
 static DEFINE_SPINLOCK(umh_sysctl_lock);
 static DECLARE_RWSEM(umhelper_sem);
+static LIST_HEAD(umh_list);
+static DEFINE_MUTEX(umh_list_lock);
 
 static void call_usermodehelper_freeinfo(struct subprocess_info *info)
 {
@@ -100,10 +102,12 @@ static int call_usermodehelper_exec_async(void *data)
 	commit_creds(new);
 
 	sub_info->pid = task_pid_nr(current);
-	if (sub_info->file)
+	if (sub_info->file) {
 		retval = do_execve_file(sub_info->file,
 					sub_info->argv, sub_info->envp);
-	else
+		if (!retval)
+			current->flags |= PF_UMH;
+	} else
 		retval = do_execve(getname_kernel(sub_info->path),
 				   (const char __user *const __user *)sub_info->argv,
 				   (const char __user *const __user *)sub_info->envp);
@@ -517,6 +521,11 @@ int fork_usermode_blob(void *data, size_t len, struct umh_info *info)
 		goto out;
 
 	err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
+	if (!err) {
+		mutex_lock(&umh_list_lock);
+		list_add(&info->list, &umh_list);
+		mutex_unlock(&umh_list_lock);
+	}
 out:
 	fput(file);
 	return err;
@@ -679,6 +688,26 @@ static int proc_cap_handler(struct ctl_table *table, int write,
 	return 0;
 }
 
+void __exit_umh(struct task_struct *tsk)
+{
+	struct umh_info *info;
+	pid_t pid = tsk->pid;
+
+	mutex_lock(&umh_list_lock);
+	list_for_each_entry(info, &umh_list, list) {
+		if (info->pid == pid) {
+			list_del(&info->list);
+			mutex_unlock(&umh_list_lock);
+			goto out;
+		}
+	}
+	mutex_unlock(&umh_list_lock);
+	return;
+out:
+	if (info->cleanup)
+		info->cleanup(info);
+}
+
 struct ctl_table usermodehelper_table[] = {
 	{
 		.procname	= "bset",
-- 
cgit v1.2.3-71-gd317


From 5b4cb650e569db2e6a09d2fa0ef8eb789a0ac5d8 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Wed, 9 Jan 2019 02:24:34 +0900
Subject: net: bpfilter: use cleanup callback to release umh_info

Now, UMH process is killed, do_exit() calls the umh_info->cleanup callback
to release members of the umh_info.
This patch makes bpfilter_umh's cleanup routine to use the
umh_info->cleanup callback.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpfilter.h     | 11 ++++++++---
 net/bpfilter/bpfilter_kern.c | 23 ++++++++++-------------
 net/ipv4/bpfilter/sockopt.c  | 33 ++++++++++++++++++++++++++-------
 3 files changed, 44 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h
index f02cee0225d4..70ffeed280e9 100644
--- a/include/linux/bpfilter.h
+++ b/include/linux/bpfilter.h
@@ -3,13 +3,18 @@
 #define _LINUX_BPFILTER_H
 
 #include <uapi/linux/bpfilter.h>
+#include <linux/umh.h>
 
 struct sock;
 int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
 			    unsigned int optlen);
 int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
 			    int __user *optlen);
-extern int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
-				       char __user *optval,
-				       unsigned int optlen, bool is_set);
+struct bpfilter_umh_ops {
+	struct umh_info info;
+	int (*sockopt)(struct sock *sk, int optname,
+		       char __user *optval,
+		       unsigned int optlen, bool is_set);
+};
+extern struct bpfilter_umh_ops bpfilter_ops;
 #endif
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index 7acfc83087d5..a68940b74c01 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -13,7 +13,6 @@
 extern char bpfilter_umh_start;
 extern char bpfilter_umh_end;
 
-static struct umh_info info;
 /* since ip_getsockopt() can run in parallel, serialize access to umh */
 static DEFINE_MUTEX(bpfilter_lock);
 
@@ -28,16 +27,13 @@ static void shutdown_umh(struct umh_info *info)
 		force_sig(SIGKILL, tsk);
 		put_task_struct(tsk);
 	}
-	fput(info->pipe_to_umh);
-	fput(info->pipe_from_umh);
-	info->pid = 0;
 }
 
 static void __stop_umh(void)
 {
 	if (IS_ENABLED(CONFIG_INET)) {
-		bpfilter_process_sockopt = NULL;
-		shutdown_umh(&info);
+		bpfilter_ops.sockopt = NULL;
+		shutdown_umh(&bpfilter_ops.info);
 	}
 }
 
@@ -64,9 +60,10 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname,
 	req.addr = (long __force __user)optval;
 	req.len = optlen;
 	mutex_lock(&bpfilter_lock);
-	if (!info.pid)
+	if (!bpfilter_ops.info.pid)
 		goto out;
-	n = __kernel_write(info.pipe_to_umh, &req, sizeof(req), &pos);
+	n = __kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req),
+			   &pos);
 	if (n != sizeof(req)) {
 		pr_err("write fail %zd\n", n);
 		__stop_umh();
@@ -74,7 +71,8 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname,
 		goto out;
 	}
 	pos = 0;
-	n = kernel_read(info.pipe_from_umh, &reply, sizeof(reply), &pos);
+	n = kernel_read(bpfilter_ops.info.pipe_from_umh, &reply, sizeof(reply),
+			&pos);
 	if (n != sizeof(reply)) {
 		pr_err("read fail %zd\n", n);
 		__stop_umh();
@@ -92,13 +90,12 @@ static int __init load_umh(void)
 	int err;
 
 	/* fork usermode process */
-	info.cmdline = "bpfilter_umh";
 	err = fork_usermode_blob(&bpfilter_umh_start,
 				 &bpfilter_umh_end - &bpfilter_umh_start,
-				 &info);
+				 &bpfilter_ops.info);
 	if (err)
 		return err;
-	pr_info("Loaded bpfilter_umh pid %d\n", info.pid);
+	pr_info("Loaded bpfilter_umh pid %d\n", bpfilter_ops.info.pid);
 
 	/* health check that usermode process started correctly */
 	if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) {
@@ -106,7 +103,7 @@ static int __init load_umh(void)
 		return -EFAULT;
 	}
 	if (IS_ENABLED(CONFIG_INET))
-		bpfilter_process_sockopt = &__bpfilter_process_sockopt;
+		bpfilter_ops.sockopt = &__bpfilter_process_sockopt;
 
 	return 0;
 }
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
index 5e04ed25bc0e..c326cfbc0f62 100644
--- a/net/ipv4/bpfilter/sockopt.c
+++ b/net/ipv4/bpfilter/sockopt.c
@@ -1,28 +1,37 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/module.h>
 #include <linux/uaccess.h>
 #include <linux/bpfilter.h>
 #include <uapi/linux/bpf.h>
 #include <linux/wait.h>
 #include <linux/kmod.h>
+#include <linux/fs.h>
+#include <linux/file.h>
 
-int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
-				char __user *optval,
-				unsigned int optlen, bool is_set);
-EXPORT_SYMBOL_GPL(bpfilter_process_sockopt);
+struct bpfilter_umh_ops bpfilter_ops;
+EXPORT_SYMBOL_GPL(bpfilter_ops);
+
+static void bpfilter_umh_cleanup(struct umh_info *info)
+{
+	fput(info->pipe_to_umh);
+	fput(info->pipe_from_umh);
+	info->pid = 0;
+}
 
 static int bpfilter_mbox_request(struct sock *sk, int optname,
 				 char __user *optval,
 				 unsigned int optlen, bool is_set)
 {
-	if (!bpfilter_process_sockopt) {
+	if (!bpfilter_ops.sockopt) {
 		int err = request_module("bpfilter");
 
 		if (err)
 			return err;
-		if (!bpfilter_process_sockopt)
+		if (!bpfilter_ops.sockopt)
 			return -ECHILD;
 	}
-	return bpfilter_process_sockopt(sk, optname, optval, optlen, is_set);
+	return bpfilter_ops.sockopt(sk, optname, optval, optlen, is_set);
 }
 
 int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
@@ -41,3 +50,13 @@ int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
 
 	return bpfilter_mbox_request(sk, optname, optval, len, false);
 }
+
+static int __init bpfilter_sockopt_init(void)
+{
+	bpfilter_ops.info.cmdline = "bpfilter_umh";
+	bpfilter_ops.info.cleanup = &bpfilter_umh_cleanup;
+
+	return 0;
+}
+
+module_init(bpfilter_sockopt_init);
-- 
cgit v1.2.3-71-gd317


From 61fbf5933d42b02f552123af5a87a06335a3b4db Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Wed, 9 Jan 2019 02:24:53 +0900
Subject: net: bpfilter: restart bpfilter_umh when error occurred

The bpfilter_umh will be stopped via __stop_umh() when the bpfilter
error occurred.
The bpfilter_umh() couldn't start again because there is no restart
routine.

The section of the bpfilter_umh_{start/end} is no longer .init.rodata
because these area should be reused in the restart routine. hence
the section name is changed to .bpfilter_umh.

The bpfilter_ops->start() is restart callback. it will be called when
bpfilter_umh is stopped.
The stop bit means bpfilter_umh is stopped. this bit is set by both
start and stop routine.

Before this patch,
Test commands:
   $ iptables -vnL
   $ kill -9 <pid of bpfilter_umh>
   $ iptables -vnL
   [  480.045136] bpfilter: write fail -32
   $ iptables -vnL

All iptables commands will fail.

After this patch,
Test commands:
   $ iptables -vnL
   $ kill -9 <pid of bpfilter_umh>
   $ iptables -vnL
   $ iptables -vnL

Now, all iptables commands will work.

Fixes: d2ba09c17a06 ("net: add skeleton of bpfilter kernel module")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpfilter.h         |  2 ++
 net/bpfilter/bpfilter_kern.c     | 37 +++++++++++++++++++++++++++----------
 net/bpfilter/bpfilter_umh_blob.S |  2 +-
 net/ipv4/bpfilter/sockopt.c      | 11 ++++++++++-
 4 files changed, 40 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h
index 70ffeed280e9..8ebcbdd70bdc 100644
--- a/include/linux/bpfilter.h
+++ b/include/linux/bpfilter.h
@@ -15,6 +15,8 @@ struct bpfilter_umh_ops {
 	int (*sockopt)(struct sock *sk, int optname,
 		       char __user *optval,
 		       unsigned int optlen, bool is_set);
+	int (*start)(void);
+	bool stop;
 };
 extern struct bpfilter_umh_ops bpfilter_ops;
 #endif
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index a68940b74c01..c0fcde910a7a 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -16,13 +16,14 @@ extern char bpfilter_umh_end;
 /* since ip_getsockopt() can run in parallel, serialize access to umh */
 static DEFINE_MUTEX(bpfilter_lock);
 
-static void shutdown_umh(struct umh_info *info)
+static void shutdown_umh(void)
 {
 	struct task_struct *tsk;
 
-	if (!info->pid)
+	if (bpfilter_ops.stop)
 		return;
-	tsk = get_pid_task(find_vpid(info->pid), PIDTYPE_PID);
+
+	tsk = get_pid_task(find_vpid(bpfilter_ops.info.pid), PIDTYPE_PID);
 	if (tsk) {
 		force_sig(SIGKILL, tsk);
 		put_task_struct(tsk);
@@ -31,10 +32,8 @@ static void shutdown_umh(struct umh_info *info)
 
 static void __stop_umh(void)
 {
-	if (IS_ENABLED(CONFIG_INET)) {
-		bpfilter_ops.sockopt = NULL;
-		shutdown_umh(&bpfilter_ops.info);
-	}
+	if (IS_ENABLED(CONFIG_INET))
+		shutdown_umh();
 }
 
 static void stop_umh(void)
@@ -85,7 +84,7 @@ out:
 	return ret;
 }
 
-static int __init load_umh(void)
+static int start_umh(void)
 {
 	int err;
 
@@ -95,6 +94,7 @@ static int __init load_umh(void)
 				 &bpfilter_ops.info);
 	if (err)
 		return err;
+	bpfilter_ops.stop = false;
 	pr_info("Loaded bpfilter_umh pid %d\n", bpfilter_ops.info.pid);
 
 	/* health check that usermode process started correctly */
@@ -102,14 +102,31 @@ static int __init load_umh(void)
 		stop_umh();
 		return -EFAULT;
 	}
-	if (IS_ENABLED(CONFIG_INET))
-		bpfilter_ops.sockopt = &__bpfilter_process_sockopt;
 
 	return 0;
 }
 
+static int __init load_umh(void)
+{
+	int err;
+
+	if (!bpfilter_ops.stop)
+		return -EFAULT;
+	err = start_umh();
+	if (!err && IS_ENABLED(CONFIG_INET)) {
+		bpfilter_ops.sockopt = &__bpfilter_process_sockopt;
+		bpfilter_ops.start = &start_umh;
+	}
+
+	return err;
+}
+
 static void __exit fini_umh(void)
 {
+	if (IS_ENABLED(CONFIG_INET)) {
+		bpfilter_ops.start = NULL;
+		bpfilter_ops.sockopt = NULL;
+	}
 	stop_umh();
 }
 module_init(load_umh);
diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S
index 40311d10d2f2..7f1c521dcc2f 100644
--- a/net/bpfilter/bpfilter_umh_blob.S
+++ b/net/bpfilter/bpfilter_umh_blob.S
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-	.section .init.rodata, "a"
+	.section .bpfilter_umh, "a"
 	.global bpfilter_umh_start
 bpfilter_umh_start:
 	.incbin "net/bpfilter/bpfilter_umh"
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
index c326cfbc0f62..de84ede4e765 100644
--- a/net/ipv4/bpfilter/sockopt.c
+++ b/net/ipv4/bpfilter/sockopt.c
@@ -14,6 +14,7 @@ EXPORT_SYMBOL_GPL(bpfilter_ops);
 
 static void bpfilter_umh_cleanup(struct umh_info *info)
 {
+	bpfilter_ops.stop = true;
 	fput(info->pipe_to_umh);
 	fput(info->pipe_from_umh);
 	info->pid = 0;
@@ -23,14 +24,21 @@ static int bpfilter_mbox_request(struct sock *sk, int optname,
 				 char __user *optval,
 				 unsigned int optlen, bool is_set)
 {
+	int err;
+
 	if (!bpfilter_ops.sockopt) {
-		int err = request_module("bpfilter");
+		err = request_module("bpfilter");
 
 		if (err)
 			return err;
 		if (!bpfilter_ops.sockopt)
 			return -ECHILD;
 	}
+	if (bpfilter_ops.stop) {
+		err = bpfilter_ops.start();
+		if (err)
+			return err;
+	}
 	return bpfilter_ops.sockopt(sk, optname, optval, optlen, is_set);
 }
 
@@ -53,6 +61,7 @@ int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
 
 static int __init bpfilter_sockopt_init(void)
 {
+	bpfilter_ops.stop = true;
 	bpfilter_ops.info.cmdline = "bpfilter_umh";
 	bpfilter_ops.info.cleanup = &bpfilter_umh_cleanup;
 
-- 
cgit v1.2.3-71-gd317


From 71a8508402b570127d6500c1ad456bbd33ccf187 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Wed, 9 Jan 2019 02:25:10 +0900
Subject: net: bpfilter: disallow to remove bpfilter module while being used

The bpfilter.ko module can be removed while functions of the bpfilter.ko
are executing. so panic can occurred. in order to protect that, locks can
be used. a bpfilter_lock protects routines in the
__bpfilter_process_sockopt() but it's not enough because __exit routine
can be executed concurrently.

Now, the bpfilter_umh can not run in parallel.
So, the module do not removed while it's being used and it do not
double-create UMH process.
The members of the umh_info and the bpfilter_umh_ops are protected by
the bpfilter_umh_ops.lock.

test commands:
   while :
   do
	iptables -I FORWARD -m string --string ap --algo kmp &
	modprobe -rv bpfilter &
   done

splat looks like:
[  298.623435] BUG: unable to handle kernel paging request at fffffbfff807440b
[  298.628512] #PF error: [normal kernel read fault]
[  298.633018] PGD 124327067 P4D 124327067 PUD 11c1a3067 PMD 119eb2067 PTE 0
[  298.638859] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
[  298.638859] CPU: 0 PID: 2997 Comm: iptables Not tainted 4.20.0+ #154
[  298.638859] RIP: 0010:__mutex_lock+0x6b9/0x16a0
[  298.638859] Code: c0 00 00 e8 89 82 ff ff 80 bd 8f fc ff ff 00 0f 85 d9 05 00 00 48 8b 85 80 fc ff ff 48 bf 00 00 00 00 00 fc ff df 48 c1 e8 03 <80> 3c 38 00 0f 85 1d 0e 00 00 48 8b 85 c8 fc ff ff 49 39 47 58 c6
[  298.638859] RSP: 0018:ffff88810e7777a0 EFLAGS: 00010202
[  298.638859] RAX: 1ffffffff807440b RBX: ffff888111bd4d80 RCX: 0000000000000000
[  298.638859] RDX: 1ffff110235ff806 RSI: ffff888111bd5538 RDI: dffffc0000000000
[  298.638859] RBP: ffff88810e777b30 R08: 0000000080000002 R09: 0000000000000000
[  298.638859] R10: 0000000000000000 R11: 0000000000000000 R12: fffffbfff168a42c
[  298.638859] R13: ffff888111bd4d80 R14: ffff8881040e9a05 R15: ffffffffc03a2000
[  298.638859] FS:  00007f39e3758700(0000) GS:ffff88811ae00000(0000) knlGS:0000000000000000
[  298.638859] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  298.638859] CR2: fffffbfff807440b CR3: 000000011243e000 CR4: 00000000001006f0
[  298.638859] Call Trace:
[  298.638859]  ? mutex_lock_io_nested+0x1560/0x1560
[  298.638859]  ? kasan_kmalloc+0xa0/0xd0
[  298.638859]  ? kmem_cache_alloc+0x1c2/0x260
[  298.638859]  ? __alloc_file+0x92/0x3c0
[  298.638859]  ? alloc_empty_file+0x43/0x120
[  298.638859]  ? alloc_file_pseudo+0x220/0x330
[  298.638859]  ? sock_alloc_file+0x39/0x160
[  298.638859]  ? __sys_socket+0x113/0x1d0
[  298.638859]  ? __x64_sys_socket+0x6f/0xb0
[  298.638859]  ? do_syscall_64+0x138/0x560
[  298.638859]  ? entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  298.638859]  ? __alloc_file+0x92/0x3c0
[  298.638859]  ? init_object+0x6b/0x80
[  298.638859]  ? cyc2ns_read_end+0x10/0x10
[  298.638859]  ? cyc2ns_read_end+0x10/0x10
[  298.638859]  ? hlock_class+0x140/0x140
[  298.638859]  ? sched_clock_local+0xd4/0x140
[  298.638859]  ? sched_clock_local+0xd4/0x140
[  298.638859]  ? check_flags.part.37+0x440/0x440
[  298.638859]  ? __lock_acquire+0x4f90/0x4f90
[  298.638859]  ? set_rq_offline.part.89+0x140/0x140
[ ... ]

Fixes: d2ba09c17a06 ("net: add skeleton of bpfilter kernel module")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpfilter.h     |  2 ++
 net/bpfilter/bpfilter_kern.c | 28 +++++++++++-----------------
 net/ipv4/bpfilter/sockopt.c  | 22 ++++++++++++++++------
 3 files changed, 29 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h
index 8ebcbdd70bdc..d815622cd31e 100644
--- a/include/linux/bpfilter.h
+++ b/include/linux/bpfilter.h
@@ -12,6 +12,8 @@ int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
 			    int __user *optlen);
 struct bpfilter_umh_ops {
 	struct umh_info info;
+	/* since ip_getsockopt() can run in parallel, serialize access to umh */
+	struct mutex lock;
 	int (*sockopt)(struct sock *sk, int optname,
 		       char __user *optval,
 		       unsigned int optlen, bool is_set);
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index c0fcde910a7a..7ee4fea93637 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -13,9 +13,6 @@
 extern char bpfilter_umh_start;
 extern char bpfilter_umh_end;
 
-/* since ip_getsockopt() can run in parallel, serialize access to umh */
-static DEFINE_MUTEX(bpfilter_lock);
-
 static void shutdown_umh(void)
 {
 	struct task_struct *tsk;
@@ -36,13 +33,6 @@ static void __stop_umh(void)
 		shutdown_umh();
 }
 
-static void stop_umh(void)
-{
-	mutex_lock(&bpfilter_lock);
-	__stop_umh();
-	mutex_unlock(&bpfilter_lock);
-}
-
 static int __bpfilter_process_sockopt(struct sock *sk, int optname,
 				      char __user *optval,
 				      unsigned int optlen, bool is_set)
@@ -58,7 +48,6 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname,
 	req.cmd = optname;
 	req.addr = (long __force __user)optval;
 	req.len = optlen;
-	mutex_lock(&bpfilter_lock);
 	if (!bpfilter_ops.info.pid)
 		goto out;
 	n = __kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req),
@@ -80,7 +69,6 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname,
 	}
 	ret = reply.status;
 out:
-	mutex_unlock(&bpfilter_lock);
 	return ret;
 }
 
@@ -99,7 +87,7 @@ static int start_umh(void)
 
 	/* health check that usermode process started correctly */
 	if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) {
-		stop_umh();
+		shutdown_umh();
 		return -EFAULT;
 	}
 
@@ -110,24 +98,30 @@ static int __init load_umh(void)
 {
 	int err;
 
-	if (!bpfilter_ops.stop)
-		return -EFAULT;
+	mutex_lock(&bpfilter_ops.lock);
+	if (!bpfilter_ops.stop) {
+		err = -EFAULT;
+		goto out;
+	}
 	err = start_umh();
 	if (!err && IS_ENABLED(CONFIG_INET)) {
 		bpfilter_ops.sockopt = &__bpfilter_process_sockopt;
 		bpfilter_ops.start = &start_umh;
 	}
-
+out:
+	mutex_unlock(&bpfilter_ops.lock);
 	return err;
 }
 
 static void __exit fini_umh(void)
 {
+	mutex_lock(&bpfilter_ops.lock);
 	if (IS_ENABLED(CONFIG_INET)) {
+		shutdown_umh();
 		bpfilter_ops.start = NULL;
 		bpfilter_ops.sockopt = NULL;
 	}
-	stop_umh();
+	mutex_unlock(&bpfilter_ops.lock);
 }
 module_init(load_umh);
 module_exit(fini_umh);
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
index de84ede4e765..1e976bb93d99 100644
--- a/net/ipv4/bpfilter/sockopt.c
+++ b/net/ipv4/bpfilter/sockopt.c
@@ -14,10 +14,12 @@ EXPORT_SYMBOL_GPL(bpfilter_ops);
 
 static void bpfilter_umh_cleanup(struct umh_info *info)
 {
+	mutex_lock(&bpfilter_ops.lock);
 	bpfilter_ops.stop = true;
 	fput(info->pipe_to_umh);
 	fput(info->pipe_from_umh);
 	info->pid = 0;
+	mutex_unlock(&bpfilter_ops.lock);
 }
 
 static int bpfilter_mbox_request(struct sock *sk, int optname,
@@ -25,21 +27,28 @@ static int bpfilter_mbox_request(struct sock *sk, int optname,
 				 unsigned int optlen, bool is_set)
 {
 	int err;
-
+	mutex_lock(&bpfilter_ops.lock);
 	if (!bpfilter_ops.sockopt) {
+		mutex_unlock(&bpfilter_ops.lock);
 		err = request_module("bpfilter");
+		mutex_lock(&bpfilter_ops.lock);
 
 		if (err)
-			return err;
-		if (!bpfilter_ops.sockopt)
-			return -ECHILD;
+			goto out;
+		if (!bpfilter_ops.sockopt) {
+			err = -ECHILD;
+			goto out;
+		}
 	}
 	if (bpfilter_ops.stop) {
 		err = bpfilter_ops.start();
 		if (err)
-			return err;
+			goto out;
 	}
-	return bpfilter_ops.sockopt(sk, optname, optval, optlen, is_set);
+	err = bpfilter_ops.sockopt(sk, optname, optval, optlen, is_set);
+out:
+	mutex_unlock(&bpfilter_ops.lock);
+	return err;
 }
 
 int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
@@ -61,6 +70,7 @@ int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
 
 static int __init bpfilter_sockopt_init(void)
 {
+	mutex_init(&bpfilter_ops.lock);
 	bpfilter_ops.stop = true;
 	bpfilter_ops.info.cmdline = "bpfilter_umh";
 	bpfilter_ops.info.cleanup = &bpfilter_umh_cleanup;
-- 
cgit v1.2.3-71-gd317


From 19ba9ecf24189bd74d070aa1b1c4bcb9fe4ae849 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Mon, 14 Jan 2019 11:40:47 +0300
Subject: XArray: Fix typo in comment

Seems copy and paste typo, not a big deal but still
for consistency sake better to fix.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 12244aa98a69..7da665f5cb20 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -496,7 +496,7 @@ static inline void *xa_store_bh(struct xarray *xa, unsigned long index,
 }
 
 /**
- * xa_store_irq() - Erase this entry from the XArray.
+ * xa_store_irq() - Store this entry in the XArray.
  * @xa: XArray.
  * @index: Index into array.
  * @entry: New entry.
-- 
cgit v1.2.3-71-gd317


From b89a07c4373b27321b1f6d4b4fdc369fd45ef79d Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Thu, 3 Jan 2019 17:08:03 +0100
Subject: virtio: fix virtio_config_ops description

- get_features has returned 64 bits since commit d025477368792
  ("virtio: add support for 64 bit features.")
- properly mark all optional callbacks

Signed-off-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Halil Pasic <pasic@linux.ibm.com>
---
 include/linux/virtio_config.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 32baf8e26735..7087ef946ba7 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -22,7 +22,7 @@ struct irq_affinity;
  *	offset: the offset of the configuration field
  *	buf: the buffer to read the field value from.
  *	len: the length of the buffer
- * @generation: config generation counter
+ * @generation: config generation counter (optional)
  *	vdev: the virtio_device
  *	Returns the config generation counter
  * @get_status: read the status byte
@@ -48,17 +48,17 @@ struct irq_affinity;
  * @del_vqs: free virtqueues found by find_vqs().
  * @get_features: get the array of feature bits for this device.
  *	vdev: the virtio_device
- *	Returns the first 32 feature bits (all we currently need).
+ *	Returns the first 64 feature bits (all we currently need).
  * @finalize_features: confirm what device features we'll be using.
  *	vdev: the virtio_device
  *	This gives the final feature bits for the device: it can change
  *	the dev->feature bits if it wants.
  *	Returns 0 on success or error status
- * @bus_name: return the bus name associated with the device
+ * @bus_name: return the bus name associated with the device (optional)
  *	vdev: the virtio_device
  *      This returns a pointer to the bus name a la pci_name from which
  *      the caller can then copy.
- * @set_vq_affinity: set the affinity for a virtqueue.
+ * @set_vq_affinity: set the affinity for a virtqueue (optional).
  * @get_vq_affinity: get the affinity for a virtqueue (optional).
  */
 typedef void vq_callback_t(struct virtqueue *);
-- 
cgit v1.2.3-71-gd317


From d1c1dad89e7a8be2cfdc7b92deca2c8048f0d263 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Thu, 3 Jan 2019 17:08:04 +0100
Subject: virtio: document virtio_config_ops restrictions

Some transports (e.g. virtio-ccw) implement virtio operations that
seem to be a simple read/write as something more involved that
cannot be done from an atomic context.

Give at least a hint about that.

Signed-off-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 7087ef946ba7..987b6491b946 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -12,6 +12,11 @@ struct irq_affinity;
 
 /**
  * virtio_config_ops - operations for configuring a virtio device
+ * Note: Do not assume that a transport implements all of the operations
+ *       getting/setting a value as a simple read/write! Generally speaking,
+ *       any of @get/@set, @get_status/@set_status, or @get_features/
+ *       @finalize_features are NOT safe to be called from an atomic
+ *       context.
  * @get: read the value of a configuration field
  *	vdev: the virtio_device
  *	offset: the offset of the configuration field
-- 
cgit v1.2.3-71-gd317


From 1950f462916edc9581168ca8d5882a8101e8bbcf Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Mon, 14 Jan 2019 08:19:22 +0100
Subject: HID: core: simplify active collection tracking

Manually tracking an active collection to set collection parents is not
necessary, we just have to look one step back into the collection stack
to find the correct parent.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Reviewed-by: Peter Hutterer <peter.hutterer@who-t.net>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/hid-core.c | 13 ++-----------
 include/linux/hid.h    |  1 -
 2 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index f9093dedf647..9993b692598f 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -173,8 +173,8 @@ static int open_collection(struct hid_parser *parser, unsigned type)
 	collection->type = type;
 	collection->usage = usage;
 	collection->level = parser->collection_stack_ptr - 1;
-	collection->parent_idx = parser->active_collection_idx;
-	parser->active_collection_idx = collection_index;
+	collection->parent_idx = (collection->level == 0) ? -1 :
+		parser->collection_stack[collection->level - 1];
 
 	if (type == HID_COLLECTION_APPLICATION)
 		parser->device->maxapplication++;
@@ -193,13 +193,6 @@ static int close_collection(struct hid_parser *parser)
 		return -EINVAL;
 	}
 	parser->collection_stack_ptr--;
-	if (parser->active_collection_idx != -1) {
-		struct hid_device *device = parser->device;
-		struct hid_collection *c;
-
-		c = &device->collection[parser->active_collection_idx];
-		parser->active_collection_idx = c->parent_idx;
-	}
 	return 0;
 }
 
@@ -825,7 +818,6 @@ static int hid_scan_report(struct hid_device *hid)
 		return -ENOMEM;
 
 	parser->device = hid;
-	parser->active_collection_idx = -1;
 	hid->group = HID_GROUP_GENERIC;
 
 	/*
@@ -1179,7 +1171,6 @@ int hid_open_report(struct hid_device *device)
 	}
 
 	parser->device = device;
-	parser->active_collection_idx = -1;
 
 	end = start + size;
 
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 992bbb7196df..f9707d1dcb58 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -658,7 +658,6 @@ struct hid_parser {
 	unsigned int         *collection_stack;
 	unsigned int          collection_stack_ptr;
 	unsigned int          collection_stack_size;
-	int                   active_collection_idx; /* device->collection */
 	struct hid_device    *device;
 	unsigned int          scan_flags;
 };
-- 
cgit v1.2.3-71-gd317


From 890d14d2d4b57ff5a149309da3ed36c8a529987f Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Wed, 16 Jan 2019 17:42:35 +0100
Subject: fbdev: fbmem: convert CONFIG_FB_LOGO_CENTER into a cmd line option

A command line option is much more flexible than a config option and
the supporting code is small. Gets rid of #ifdefs in the code too...

Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 Documentation/fb/fbcon.txt       |  8 ++++++++
 drivers/video/fbdev/core/fbcon.c |  7 +++++++
 drivers/video/fbdev/core/fbmem.c | 19 ++++++++++---------
 drivers/video/logo/Kconfig       |  9 ---------
 include/linux/fb.h               |  1 +
 5 files changed, 26 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/fb/fbcon.txt b/Documentation/fb/fbcon.txt
index 62af30511a95..60a5ec04e8f0 100644
--- a/Documentation/fb/fbcon.txt
+++ b/Documentation/fb/fbcon.txt
@@ -163,6 +163,14 @@ C. Boot options
 	be preserved until there actually is some text is output to the console.
 	This option causes fbcon to bind immediately to the fbdev device.
 
+7. fbcon=logo-pos:<location>
+
+	The only possible 'location' is 'center' (without quotes), and when
+	given, the bootup logo is moved from the default top-left corner
+	location to the center of the framebuffer. If more than one logo is
+	displayed due to multiple CPUs, the collected line of logos is moved
+	as a whole.
+
 C. Attaching, Detaching and Unloading
 
 Before going on to how to attach, detach and unload the framebuffer console, an
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 8976190b6c1f..bfa1360ec750 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -510,6 +510,13 @@ static int __init fb_console_setup(char *this_opt)
 			continue;
 		}
 #endif
+
+		if (!strncmp(options, "logo-pos:", 9)) {
+			options += 9;
+			if (!strcmp(options, "center"))
+				fb_center_logo = true;
+			continue;
+		}
 	}
 	return 1;
 }
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 558ed2ed3124..cb43a2258c51 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -53,6 +53,9 @@ EXPORT_SYMBOL(registered_fb);
 int num_registered_fb __read_mostly;
 EXPORT_SYMBOL(num_registered_fb);
 
+bool fb_center_logo __read_mostly;
+EXPORT_SYMBOL(fb_center_logo);
+
 static struct fb_info *get_fb_info(unsigned int idx)
 {
 	struct fb_info *fb_info;
@@ -506,8 +509,7 @@ static int fb_show_logo_line(struct fb_info *info, int rotate,
 		fb_set_logo(info, logo, logo_new, fb_logo.depth);
 	}
 
-#ifdef CONFIG_FB_LOGO_CENTER
-	{
+	if (fb_center_logo) {
 		int xres = info->var.xres;
 		int yres = info->var.yres;
 
@@ -520,11 +522,11 @@ static int fb_show_logo_line(struct fb_info *info, int rotate,
 			--n;
 		image.dx = (xres - n * (logo->width + 8) - 8) / 2;
 		image.dy = y ?: (yres - logo->height) / 2;
+	} else {
+		image.dx = 0;
+		image.dy = y;
 	}
-#else
-	image.dx = 0;
-	image.dy = y;
-#endif
+
 	image.width = logo->width;
 	image.height = logo->height;
 
@@ -684,9 +686,8 @@ int fb_prepare_logo(struct fb_info *info, int rotate)
  	}
 
 	height = fb_logo.logo->height;
-#ifdef CONFIG_FB_LOGO_CENTER
-	height += (yres - fb_logo.logo->height) / 2;
-#endif
+	if (fb_center_logo)
+		height += (yres - fb_logo.logo->height) / 2;
 
 	return fb_prepare_extra_logos(info, height, yres);
 }
diff --git a/drivers/video/logo/Kconfig b/drivers/video/logo/Kconfig
index 1e972c4e88b1..d1f6196c8b9a 100644
--- a/drivers/video/logo/Kconfig
+++ b/drivers/video/logo/Kconfig
@@ -10,15 +10,6 @@ menuconfig LOGO
 
 if LOGO
 
-config FB_LOGO_CENTER
-	bool "Center the logo"
-	depends on FB=y
-	help
-	  When this option is selected, the bootup logo is centered both
-	  horizontally and vertically. If more than one logo is displayed
-	  due to multiple CPUs, the collected line of logos is centered
-	  as a whole.
-
 config FB_LOGO_EXTRA
 	bool
 	depends on FB=y
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 7cdd31a69719..f52ef0ad6781 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -653,6 +653,7 @@ extern int fb_new_modelist(struct fb_info *info);
 
 extern struct fb_info *registered_fb[FB_MAX];
 extern int num_registered_fb;
+extern bool fb_center_logo;
 extern struct class *fb_class;
 
 #define for_each_registered_fb(i)		\
-- 
cgit v1.2.3-71-gd317


From 9e857a40dc4eba15a739b4194d7db873d82c28a0 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Tue, 15 Jan 2019 16:55:30 +0100
Subject: net: phy: Add missing features to PHY drivers

The bcm87xx and micrel driver has PHYs which are missing the .features
value. Add them. The bcm87xx is a 10G FEC only PHY. Add the needed
features definition of this PHY.

Fixes: 719655a14971 ("net: phy: Replace phy driver features u32 with link_mode bitmap")
Reported-by: Scott Wood <oss@buserror.net>
Reported-by: Camelia Groza <camelia.groza@nxp.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm87xx.c    |  2 ++
 drivers/net/phy/micrel.c     |  1 +
 drivers/net/phy/phy_device.c | 12 ++++++++++++
 include/linux/phy.h          |  2 ++
 4 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c
index 1b350183bffb..a271239748f2 100644
--- a/drivers/net/phy/bcm87xx.c
+++ b/drivers/net/phy/bcm87xx.c
@@ -197,6 +197,7 @@ static struct phy_driver bcm87xx_driver[] = {
 	.phy_id		= PHY_ID_BCM8706,
 	.phy_id_mask	= 0xffffffff,
 	.name		= "Broadcom BCM8706",
+	.features	= PHY_10GBIT_FEC_FEATURES,
 	.config_init	= bcm87xx_config_init,
 	.config_aneg	= bcm87xx_config_aneg,
 	.read_status	= bcm87xx_read_status,
@@ -208,6 +209,7 @@ static struct phy_driver bcm87xx_driver[] = {
 	.phy_id		= PHY_ID_BCM8727,
 	.phy_id_mask	= 0xffffffff,
 	.name		= "Broadcom BCM8727",
+	.features	= PHY_10GBIT_FEC_FEATURES,
 	.config_init	= bcm87xx_config_init,
 	.config_aneg	= bcm87xx_config_aneg,
 	.read_status	= bcm87xx_read_status,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 7828d17f0662..b1f959935f50 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -1099,6 +1099,7 @@ static struct phy_driver ksphy_driver[] = {
 	.phy_id		= PHY_ID_KSZ8873MLL,
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
 	.name		= "Micrel KSZ8873MLL Switch",
+	.features	= PHY_BASIC_FEATURES,
 	.config_init	= kszphy_config_init,
 	.config_aneg	= ksz8873mll_config_aneg,
 	.read_status	= ksz8873mll_read_status,
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 51990002d495..bf3ce48a1e5d 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -61,6 +61,9 @@ EXPORT_SYMBOL_GPL(phy_gbit_all_ports_features);
 __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init;
 EXPORT_SYMBOL_GPL(phy_10gbit_features);
 
+__ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init;
+EXPORT_SYMBOL_GPL(phy_10gbit_fec_features);
+
 static const int phy_basic_ports_array[] = {
 	ETHTOOL_LINK_MODE_Autoneg_BIT,
 	ETHTOOL_LINK_MODE_TP_BIT,
@@ -109,6 +112,11 @@ const int phy_10gbit_features_array[1] = {
 };
 EXPORT_SYMBOL_GPL(phy_10gbit_features_array);
 
+const int phy_10gbit_fec_features_array[1] = {
+	ETHTOOL_LINK_MODE_10000baseR_FEC_BIT,
+};
+EXPORT_SYMBOL_GPL(phy_10gbit_fec_features_array);
+
 __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init;
 EXPORT_SYMBOL_GPL(phy_10gbit_full_features);
 
@@ -191,6 +199,10 @@ static void features_init(void)
 	linkmode_set_bit_array(phy_10gbit_full_features_array,
 			       ARRAY_SIZE(phy_10gbit_full_features_array),
 			       phy_10gbit_full_features);
+	/* 10G FEC only */
+	linkmode_set_bit_array(phy_10gbit_fec_features_array,
+			       ARRAY_SIZE(phy_10gbit_fec_features_array),
+			       phy_10gbit_fec_features);
 }
 
 void phy_device_free(struct phy_device *phydev)
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 3b051f761450..55114657a577 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -48,6 +48,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_features) __ro_after_init;
 extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_fibre_features) __ro_after_init;
 extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_all_ports_features) __ro_after_init;
 extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init;
+extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init;
 extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init;
 
 #define PHY_BASIC_FEATURES ((unsigned long *)&phy_basic_features)
@@ -56,6 +57,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_ini
 #define PHY_GBIT_FIBRE_FEATURES ((unsigned long *)&phy_gbit_fibre_features)
 #define PHY_GBIT_ALL_PORTS_FEATURES ((unsigned long *)&phy_gbit_all_ports_features)
 #define PHY_10GBIT_FEATURES ((unsigned long *)&phy_10gbit_features)
+#define PHY_10GBIT_FEC_FEATURES ((unsigned long *)&phy_10gbit_fec_features)
 #define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features)
 
 extern const int phy_10_100_features_array[4];
-- 
cgit v1.2.3-71-gd317


From edcddd4c879af48ec922d680b2d56834c085683b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 17 Jan 2019 07:15:35 -0500
Subject: XArray: Fix an arithmetic error in xa_is_err

There is a math problem here which leads to a lot of static checker
warnings for me:

net/sunrpc/clnt.c:451 rpc_new_client() error: (-4096) too low for ERR_PTR

Error values are from -1 to -4095 or from 0xffffffff to 0xfffff001 in
hexadecimal.  (I am assuming a 32 bit system for simplicity).  We are
using the lowest two bits to hold some internal XArray data so the
error is shifted two spaces to the left.  0xfffff001 << 2 is 0xffffc004.
And finally we want to check that BIT(1) is set so we add 2 which gives
us 0xffffc006.

In other words, we should be checking that "entry >= 0xffffc006", but
the check is actually testing if "entry >= 0xffffc002".

Fixes: 76b4e5299565 ("XArray: Permit storing 2-byte-aligned pointers")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
[Use xa_mk_internal() instead of changing the bracketing]
Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 7da665f5cb20..5d9d318bcf7a 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -177,7 +177,7 @@ static inline bool xa_is_internal(const void *entry)
 static inline bool xa_is_err(const void *entry)
 {
 	return unlikely(xa_is_internal(entry) &&
-			(unsigned long)entry >= -((MAX_ERRNO << 2) + 2));
+			entry >= xa_mk_internal(-MAX_ERRNO));
 }
 
 /**
-- 
cgit v1.2.3-71-gd317


From 70921ae25f944423f0abf096f73455c586da0652 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Tue, 8 Jan 2019 17:04:32 -0700
Subject: genirq: Fix the kerneldoc comment for struct irq_affinity_desc

A recent commit added a new field but did not update the kerneldoc comment,
leading to this build warning:

  ./include/linux/interrupt.h:268: warning: Function parameter or member 'is_managed' not described in 'irq_affinity_desc'

Add the missing information, making the docs build 0.001% quieter.

Fixes: c410abbbacb9 ("genirq/affinity: Add is_managed to struct irq_affinity_desc")
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Dou Liyang <douliyangs@gmail.com>
Link: https://lkml.kernel.org/r/20190108170432.59bae8a6@lwn.net
---
 include/linux/interrupt.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index c672f34235e7..4a728dba02e2 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -260,6 +260,7 @@ struct irq_affinity {
 /**
  * struct irq_affinity_desc - Interrupt affinity descriptor
  * @mask:	cpumask to hold the affinity assignment
+ * @is_managed: 1 if the interrupt is managed internally
  */
 struct irq_affinity_desc {
 	struct cpumask	mask;
-- 
cgit v1.2.3-71-gd317


From 6c57f0458022298e4da1729c67bd33ce41c14e7a Mon Sep 17 00:00:00 2001
From: Ross Lagerwall <ross.lagerwall@citrix.com>
Date: Thu, 17 Jan 2019 15:34:38 +0000
Subject: net: Fix usage of pskb_trim_rcsum

In certain cases, pskb_trim_rcsum() may change skb pointers.
Reinitialize header pointers afterwards to avoid potential
use-after-frees. Add a note in the documentation of
pskb_trim_rcsum(). Found by KASAN.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/pppoe.c                  | 1 +
 include/linux/skbuff.h                   | 1 +
 net/bridge/br_netfilter_ipv6.c           | 1 +
 net/bridge/netfilter/nft_reject_bridge.c | 1 +
 net/ipv4/ip_input.c                      | 1 +
 5 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 62dc564b251d..f22639f0116a 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -445,6 +445,7 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (pskb_trim_rcsum(skb, len))
 		goto drop;
 
+	ph = pppoe_hdr(skb);
 	pn = pppoe_pernet(dev_net(dev));
 
 	/* Note that get_item does a sock_hold(), so sk_pppox(po)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 93f56fddd92a..95d25b010a25 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3218,6 +3218,7 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len);
  *
  *	This is exactly the same as pskb_trim except that it ensures the
  *	checksum of received packets are still valid after the operation.
+ *	It can change skb pointers.
  */
 
 static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 94039f588f1d..564710f88f93 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -131,6 +131,7 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb)
 					IPSTATS_MIB_INDISCARDS);
 			goto drop;
 		}
+		hdr = ipv6_hdr(skb);
 	}
 	if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb))
 		goto drop;
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 08cbed7d940e..419e8edf23ba 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -229,6 +229,7 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook)
 	    pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h)))
 		return false;
 
+	ip6h = ipv6_hdr(skb);
 	thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo);
 	if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
 		return false;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 26921f6b3b92..51d8efba6de2 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -488,6 +488,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
 		goto drop;
 	}
 
+	iph = ip_hdr(skb);
 	skb->transport_header = skb->network_header + iph->ihl*4;
 
 	/* Remove any debris in the socket control block */
-- 
cgit v1.2.3-71-gd317


From 3e64cf7a435ed0500e3adaa8aada2272d3ae8abc Mon Sep 17 00:00:00 2001
From: Camelia Groza <camelia.groza@nxp.com>
Date: Thu, 17 Jan 2019 14:22:36 +0200
Subject: net: phy: phy driver features are mandatory

Since phy driver features became a link_mode bitmap, phy drivers that
don't have a list of features configured will cause the kernel to crash
when probed.

Prevent the phy driver from registering if the features field is missing.

Fixes: 719655a14971 ("net: phy: Replace phy driver features u32 with link_mode bitmap")
Reported-by: Scott Wood <oss@buserror.net>
Signed-off-by: Camelia Groza <camelia.groza@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 5 +++++
 include/linux/phy.h          | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index bf3ce48a1e5d..46c86725a693 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -2255,6 +2255,11 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner)
 {
 	int retval;
 
+	if (WARN_ON(!new_driver->features)) {
+		pr_err("%s: Driver features are missing\n", new_driver->name);
+		return -EINVAL;
+	}
+
 	new_driver->mdiodrv.flags |= MDIO_DEVICE_IS_PHY;
 	new_driver->mdiodrv.driver.name = new_driver->name;
 	new_driver->mdiodrv.driver.bus = &mdio_bus_type;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 55114657a577..ef20aeea10cc 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -469,8 +469,8 @@ struct phy_device {
  *   only works for PHYs with IDs which match this field
  * name: The friendly name of this PHY type
  * phy_id_mask: Defines the important bits of the phy_id
- * features: A list of features (speed, duplex, etc) supported
- *   by this PHY
+ * features: A mandatory list of features (speed, duplex, etc)
+ *   supported by this PHY
  * flags: A bitfield defining certain other features this PHY
  *   supports (like interrupts)
  *
-- 
cgit v1.2.3-71-gd317


From e6018c0f5c996e61639adce6a0697391a2861916 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 17 Dec 2018 10:14:53 +0100
Subject: sched/wake_q: Document wake_q_add()

The only guarantee provided by wake_q_add() is that a wakeup will
happen after it, it does _NOT_ guarantee the wakeup will be delayed
until the matching wake_up_q().

If wake_q_add() fails the cmpxchg() a concurrent wakeup is pending and
that can happen at any time after the cmpxchg(). This means we should
not rely on the wakeup happening at wake_q_up(), but should be ready
for wake_q_add() to issue the wakeup.

The delay; if provided (most likely); should only result in more efficient
behaviour.

Reported-by: Yongji Xie <elohimes@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/wake_q.h |  6 +++++-
 kernel/sched/core.c          | 12 ++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
index 10b19a192b2d..545f37138057 100644
--- a/include/linux/sched/wake_q.h
+++ b/include/linux/sched/wake_q.h
@@ -24,9 +24,13 @@
  * called near the end of a function. Otherwise, the list can be
  * re-initialized for later re-use by wake_q_init().
  *
- * Note that this can cause spurious wakeups. schedule() callers
+ * NOTE that this can cause spurious wakeups. schedule() callers
  * must ensure the call is done inside a loop, confirming that the
  * wakeup condition has in fact occurred.
+ *
+ * NOTE that there is no guarantee the wakeup will happen any later than the
+ * wake_q_add() location. Therefore task must be ready to be woken at the
+ * location of the wake_q_add().
  */
 
 #include <linux/sched.h>
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a674c7db2f29..cc814933f7d6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -396,6 +396,18 @@ static bool set_nr_if_polling(struct task_struct *p)
 #endif
 #endif
 
+/**
+ * wake_q_add() - queue a wakeup for 'later' waking.
+ * @head: the wake_q_head to add @task to
+ * @task: the task to queue for 'later' wakeup
+ *
+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
+ * instantly.
+ *
+ * This function must be used as-if it were wake_up_process(); IOW the task
+ * must be ready to be woken at this location.
+ */
 void wake_q_add(struct wake_q_head *head, struct task_struct *task)
 {
 	struct wake_q_node *node = &task->wake_q;
-- 
cgit v1.2.3-71-gd317


From 1cd7386549f9b6f2f230da54aa9e7fe2d6c216d2 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 19 Jan 2019 08:45:56 -0800
Subject: libnvdimm/security: Require nvdimm_security_setup_events() to succeed

The following warning:

    ACPI0012:00: security event setup failed: -19

...is meant to capture exceptional failures of sysfs_get_dirent(),
however it will also fail in the common case when security support is
disabled. A few issues:

1/ A dev_warn() report for a common case is too chatty
2/ The setup of this notifier is generic, no need for it to be driven
   from the nfit driver, it can exist completely in the core.
3/ If it fails for any reason besides security support being disabled,
   that's fatal and should abort DIMM activation. Userspace may hang if
   it never gets overwrite notifications.
4/ The dirent needs to be released.

Move the call to the core 'dimm' driver, make it conditional on security
support being active, make it fatal for the exceptional case, add the
missing sysfs_put() at device disable time.

Fixes: 7d988097c546 ("...Add security DSM overwrite support")
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c   |  5 -----
 drivers/nvdimm/dimm.c      |  6 ++++++
 drivers/nvdimm/dimm_devs.c | 22 +++++++++++++++++-----
 drivers/nvdimm/nd.h        |  1 +
 include/linux/libnvdimm.h  |  1 -
 5 files changed, 24 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 5143e11e3b0f..c1fb06654749 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2042,11 +2042,6 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 		if (!nvdimm)
 			continue;
 
-		rc = nvdimm_security_setup_events(nvdimm);
-		if (rc < 0)
-			dev_warn(acpi_desc->dev,
-				"security event setup failed: %d\n", rc);
-
 		nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit");
 		if (nfit_kernfs)
 			nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs,
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index 0cf58cabc9ed..3cf50274fadb 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -26,6 +26,12 @@ static int nvdimm_probe(struct device *dev)
 	struct nvdimm_drvdata *ndd;
 	int rc;
 
+	rc = nvdimm_security_setup_events(dev);
+	if (rc < 0) {
+		dev_err(dev, "security event setup failed: %d\n", rc);
+		return rc;
+	}
+
 	rc = nvdimm_check_config_data(dev);
 	if (rc) {
 		/* not required for non-aliased nvdimm, ex. NVDIMM-N */
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 4890310df874..efe412a6b5b9 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -578,13 +578,25 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus,
 }
 EXPORT_SYMBOL_GPL(__nvdimm_create);
 
-int nvdimm_security_setup_events(struct nvdimm *nvdimm)
+static void shutdown_security_notify(void *data)
 {
-	nvdimm->sec.overwrite_state = sysfs_get_dirent(nvdimm->dev.kobj.sd,
-			"security");
+	struct nvdimm *nvdimm = data;
+
+	sysfs_put(nvdimm->sec.overwrite_state);
+}
+
+int nvdimm_security_setup_events(struct device *dev)
+{
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+
+	if (nvdimm->sec.state < 0 || !nvdimm->sec.ops
+			|| !nvdimm->sec.ops->overwrite)
+		return 0;
+	nvdimm->sec.overwrite_state = sysfs_get_dirent(dev->kobj.sd, "security");
 	if (!nvdimm->sec.overwrite_state)
-		return -ENODEV;
-	return 0;
+		return -ENOMEM;
+
+	return devm_add_action_or_reset(dev, shutdown_security_notify, nvdimm);
 }
 EXPORT_SYMBOL_GPL(nvdimm_security_setup_events);
 
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index cfde992684e7..379bf4305e61 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -250,6 +250,7 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
 void nvdimm_set_aliasing(struct device *dev);
 void nvdimm_set_locked(struct device *dev);
 void nvdimm_clear_locked(struct device *dev);
+int nvdimm_security_setup_events(struct device *dev);
 #if IS_ENABLED(CONFIG_NVDIMM_KEYS)
 int nvdimm_security_unlock(struct device *dev);
 #else
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 7315977b64da..ad609617aeb8 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -235,7 +235,6 @@ static inline struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus,
 			cmd_mask, num_flush, flush_wpq, NULL, NULL);
 }
 
-int nvdimm_security_setup_events(struct nvdimm *nvdimm);
 const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd);
 const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd);
 u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
-- 
cgit v1.2.3-71-gd317


From 7fc5854f8c6efae9e7624970ab49a1eac2faefb1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Dec 2017 08:38:30 -0800
Subject: writeback: synchronize sync(2) against cgroup writeback membership
 switches

sync_inodes_sb() can race against cgwb (cgroup writeback) membership
switches and fail to writeback some inodes.  For example, if an inode
switches to another wb while sync_inodes_sb() is in progress, the new
wb might not be visible to bdi_split_work_to_wbs() at all or the inode
might jump from a wb which hasn't issued writebacks yet to one which
already has.

This patch adds backing_dev_info->wb_switch_rwsem to synchronize cgwb
switch path against sync_inodes_sb() so that sync_inodes_sb() is
guaranteed to see all the target wbs and inodes can't jump wbs to
escape syncing.

v2: Fixed misplaced rwsem init.  Spotted by Jiufei.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Jiufei Xue <xuejiufei@gmail.com>
Link: http://lkml.kernel.org/r/dc694ae2-f07f-61e1-7097-7c8411cee12d@gmail.com
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/fs-writeback.c                | 40 ++++++++++++++++++++++++++++++++++++++--
 include/linux/backing-dev-defs.h |  1 +
 mm/backing-dev.c                 |  1 +
 3 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index b40168fcc94a..36855c1f8daf 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -331,11 +331,22 @@ struct inode_switch_wbs_context {
 	struct work_struct	work;
 };
 
+static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
+{
+	down_write(&bdi->wb_switch_rwsem);
+}
+
+static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
+{
+	up_write(&bdi->wb_switch_rwsem);
+}
+
 static void inode_switch_wbs_work_fn(struct work_struct *work)
 {
 	struct inode_switch_wbs_context *isw =
 		container_of(work, struct inode_switch_wbs_context, work);
 	struct inode *inode = isw->inode;
+	struct backing_dev_info *bdi = inode_to_bdi(inode);
 	struct address_space *mapping = inode->i_mapping;
 	struct bdi_writeback *old_wb = inode->i_wb;
 	struct bdi_writeback *new_wb = isw->new_wb;
@@ -343,6 +354,12 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 	struct page *page;
 	bool switched = false;
 
+	/*
+	 * If @inode switches cgwb membership while sync_inodes_sb() is
+	 * being issued, sync_inodes_sb() might miss it.  Synchronize.
+	 */
+	down_read(&bdi->wb_switch_rwsem);
+
 	/*
 	 * By the time control reaches here, RCU grace period has passed
 	 * since I_WB_SWITCH assertion and all wb stat update transactions
@@ -428,6 +445,8 @@ skip_switch:
 	spin_unlock(&new_wb->list_lock);
 	spin_unlock(&old_wb->list_lock);
 
+	up_read(&bdi->wb_switch_rwsem);
+
 	if (switched) {
 		wb_wakeup(new_wb);
 		wb_put(old_wb);
@@ -468,9 +487,18 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
 	if (inode->i_state & I_WB_SWITCH)
 		return;
 
+	/*
+	 * Avoid starting new switches while sync_inodes_sb() is in
+	 * progress.  Otherwise, if the down_write protected issue path
+	 * blocks heavily, we might end up starting a large number of
+	 * switches which will block on the rwsem.
+	 */
+	if (!down_read_trylock(&bdi->wb_switch_rwsem))
+		return;
+
 	isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
 	if (!isw)
-		return;
+		goto out_unlock;
 
 	/* find and pin the new wb */
 	rcu_read_lock();
@@ -504,12 +532,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
 	 * Let's continue after I_WB_SWITCH is guaranteed to be visible.
 	 */
 	call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
-	return;
+	goto out_unlock;
 
 out_free:
 	if (isw->new_wb)
 		wb_put(isw->new_wb);
 	kfree(isw);
+out_unlock:
+	up_read(&bdi->wb_switch_rwsem);
 }
 
 /**
@@ -887,6 +917,9 @@ fs_initcall(cgroup_writeback_init);
 
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
+static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+
 static struct bdi_writeback *
 locked_inode_to_wb_and_lock_list(struct inode *inode)
 	__releases(&inode->i_lock)
@@ -2413,8 +2446,11 @@ void sync_inodes_sb(struct super_block *sb)
 		return;
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
+	/* protect against inode wb switch, see inode_switch_wbs_work_fn() */
+	bdi_down_write_wb_switch_rwsem(bdi);
 	bdi_split_work_to_wbs(bdi, &work, false);
 	wb_wait_for_completion(bdi, &done);
+	bdi_up_write_wb_switch_rwsem(bdi);
 
 	wait_sb_inodes(sb);
 }
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index c31157135598..07e02d6df5ad 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -190,6 +190,7 @@ struct backing_dev_info {
 	struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
 	struct rb_root cgwb_congested_tree; /* their congested states */
 	struct mutex cgwb_release_mutex;  /* protect shutdown of wb structs */
+	struct rw_semaphore wb_switch_rwsem; /* no cgwb switch while syncing */
 #else
 	struct bdi_writeback_congested *wb_congested;
 #endif
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 8a8bb8796c6c..72e6d0c55cfa 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -689,6 +689,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
 	INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
 	bdi->cgwb_congested_tree = RB_ROOT;
 	mutex_init(&bdi->cgwb_release_mutex);
+	init_rwsem(&bdi->wb_switch_rwsem);
 
 	ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
 	if (!ret) {
-- 
cgit v1.2.3-71-gd317


From c75860e48a7634ff8dc050842211f79a0e4e6c46 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <tomer.tayar@cavium.com>
Date: Sun, 20 Jan 2019 11:36:38 +0200
Subject: qed: Add infrastructure for error detection and recovery

This patch adds the detection and handling of a parity error ("process kill
event"), including the update of the protocol drivers, and the prevention
of any HW access that will lead to device access towards the host while
recovery is in progress.
It also provides the means for the protocol drivers to trigger a recovery
process on their decision.

Signed-off-by: Tomer Tayar <tomer.tayar@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h          |  4 ++
 drivers/net/ethernet/qlogic/qed/qed_dev.c      | 41 +++++++----
 drivers/net/ethernet/qlogic/qed/qed_hsi.h      |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_hw.c       | 11 +++
 drivers/net/ethernet/qlogic/qed/qed_main.c     | 30 ++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.c      | 94 ++++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h      | 32 +++++++++
 drivers/net/ethernet/qlogic/qed/qed_reg_addr.h |  2 +
 drivers/net/ethernet/qlogic/qed/qed_spq.c      | 22 ++++++
 drivers/net/ethernet/qlogic/qed/qed_sriov.c    |  9 ++-
 include/linux/qed/qed_if.h                     | 20 ++++++
 11 files changed, 251 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index b352e313e1f6..3b0955d34716 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -804,6 +804,9 @@ struct qed_dev {
 
 	u32				mcp_nvm_resp;
 
+	/* Recovery */
+	bool recov_in_prog;
+
 	/* Linux specific here */
 	struct  qede_dev		*edev;
 	struct  pci_dev			*pdev;
@@ -943,6 +946,7 @@ void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt);
 u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
 		   u32 input_len, u8 *input_buf,
 		   u32 max_size, u8 *unzip_buf);
+void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn);
 void qed_get_protocol_stats(struct qed_dev *cdev,
 			    enum qed_mcp_protocol_type type,
 			    union qed_mcp_protocol_stats *stats);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index fa5f07e65672..b17003d9066c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -2140,6 +2140,11 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 			   "Load request was sent. Load code: 0x%x\n",
 			   load_code);
 
+		/* Only relevant for recovery:
+		 * Clear the indication after LOAD_REQ is responded by the MFW.
+		 */
+		cdev->recov_in_prog = false;
+
 		qed_mcp_set_capabilities(p_hwfn, p_hwfn->p_main_ptt);
 
 		qed_reset_mb_shadow(p_hwfn, p_hwfn->p_main_ptt);
@@ -2291,6 +2296,9 @@ static void qed_hw_timers_stop(struct qed_dev *cdev,
 	qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_CONN, 0x0);
 	qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_TASK, 0x0);
 
+	if (cdev->recov_in_prog)
+		return;
+
 	for (i = 0; i < QED_HW_STOP_RETRY_LIMIT; i++) {
 		if ((!qed_rd(p_hwfn, p_ptt,
 			     TM_REG_PF_SCAN_ACTIVE_CONN)) &&
@@ -2353,12 +2361,14 @@ int qed_hw_stop(struct qed_dev *cdev)
 		p_hwfn->hw_init_done = false;
 
 		/* Send unload command to MCP */
-		rc = qed_mcp_unload_req(p_hwfn, p_ptt);
-		if (rc) {
-			DP_NOTICE(p_hwfn,
-				  "Failed sending a UNLOAD_REQ command. rc = %d.\n",
-				  rc);
-			rc2 = -EINVAL;
+		if (!cdev->recov_in_prog) {
+			rc = qed_mcp_unload_req(p_hwfn, p_ptt);
+			if (rc) {
+				DP_NOTICE(p_hwfn,
+					  "Failed sending a UNLOAD_REQ command. rc = %d.\n",
+					  rc);
+				rc2 = -EINVAL;
+			}
 		}
 
 		qed_slowpath_irq_sync(p_hwfn);
@@ -2400,16 +2410,18 @@ int qed_hw_stop(struct qed_dev *cdev)
 		qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DB_ENABLE, 0);
 		qed_wr(p_hwfn, p_ptt, QM_REG_PF_EN, 0);
 
-		qed_mcp_unload_done(p_hwfn, p_ptt);
-		if (rc) {
-			DP_NOTICE(p_hwfn,
-				  "Failed sending a UNLOAD_DONE command. rc = %d.\n",
-				  rc);
-			rc2 = -EINVAL;
+		if (!cdev->recov_in_prog) {
+			rc = qed_mcp_unload_done(p_hwfn, p_ptt);
+			if (rc) {
+				DP_NOTICE(p_hwfn,
+					  "Failed sending a UNLOAD_DONE command. rc = %d.\n",
+					  rc);
+				rc2 = -EINVAL;
+			}
 		}
 	}
 
-	if (IS_PF(cdev)) {
+	if (IS_PF(cdev) && !cdev->recov_in_prog) {
 		p_hwfn = QED_LEADING_HWFN(cdev);
 		p_ptt = QED_LEADING_HWFN(cdev)->p_main_ptt;
 
@@ -3459,6 +3471,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
 				 void __iomem *p_doorbells,
 				 enum qed_pci_personality personality)
 {
+	struct qed_dev *cdev = p_hwfn->cdev;
 	int rc = 0;
 
 	/* Split PCI bars evenly between hwfns */
@@ -3511,7 +3524,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
 	/* Sending a mailbox to the MFW should be done after qed_get_hw_info()
 	 * is called as it sets the ports number in an engine.
 	 */
-	if (IS_LEAD_HWFN(p_hwfn)) {
+	if (IS_LEAD_HWFN(p_hwfn) && !cdev->recov_in_prog) {
 		rc = qed_mcp_initiate_pf_flr(p_hwfn, p_hwfn->p_main_ptt);
 		if (rc)
 			DP_NOTICE(p_hwfn, "Failed to initiate PF FLR\n");
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index b13cfb449d8f..417121e74ee9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -12827,7 +12827,7 @@ enum MFW_DRV_MSG_TYPE {
 	MFW_DRV_MSG_LLDP_DATA_UPDATED,
 	MFW_DRV_MSG_DCBX_REMOTE_MIB_UPDATED,
 	MFW_DRV_MSG_DCBX_OPERATIONAL_MIB_UPDATED,
-	MFW_DRV_MSG_RESERVED4,
+	MFW_DRV_MSG_ERROR_RECOVERY,
 	MFW_DRV_MSG_BW_UPDATE,
 	MFW_DRV_MSG_S_TAG_UPDATE,
 	MFW_DRV_MSG_GET_LAN_STATS,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 70504dcf4087..72ec1c6bdf70 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -703,6 +703,17 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
 	int qed_status = 0;
 	u32 offset = 0;
 
+	if (p_hwfn->cdev->recov_in_prog) {
+		DP_VERBOSE(p_hwfn,
+			   NETIF_MSG_HW,
+			   "Recovery is in progress. Avoid DMAE transaction [{src: addr 0x%llx, type %d}, {dst: addr 0x%llx, type %d}, size %d].\n",
+			   src_addr, src_type, dst_addr, dst_type,
+			   size_in_dwords);
+
+		/* Let the flow complete w/o any error handling */
+		return 0;
+	}
+
 	qed_dmae_opcode(p_hwfn,
 			(src_type == QED_DMAE_ADDRESS_GRC),
 			(dst_type == QED_DMAE_ADDRESS_GRC),
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 6adf5bda9811..b47352643fb5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -359,6 +359,8 @@ static struct qed_dev *qed_probe(struct pci_dev *pdev,
 
 	qed_init_dp(cdev, params->dp_module, params->dp_level);
 
+	cdev->recov_in_prog = params->recov_in_prog;
+
 	rc = qed_init_pci(cdev, pdev);
 	if (rc) {
 		DP_ERR(cdev, "init pci failed\n");
@@ -2203,6 +2205,15 @@ static int qed_nvm_get_image(struct qed_dev *cdev, enum qed_nvm_images type,
 	return qed_mcp_get_nvm_image(hwfn, type, buf, len);
 }
 
+void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn)
+{
+	struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common;
+	void *cookie = p_hwfn->cdev->ops_cookie;
+
+	if (ops && ops->schedule_recovery_handler)
+		ops->schedule_recovery_handler(cookie);
+}
+
 static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
 			    void *handle)
 {
@@ -2226,6 +2237,23 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode)
 	return status;
 }
 
+static int qed_recovery_process(struct qed_dev *cdev)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt;
+	int rc = 0;
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return -EAGAIN;
+
+	rc = qed_start_recovery_process(p_hwfn, p_ptt);
+
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
 static int qed_update_wol(struct qed_dev *cdev, bool enabled)
 {
 	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
@@ -2380,6 +2408,8 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.nvm_get_image = &qed_nvm_get_image,
 	.set_coalesce = &qed_set_coalesce,
 	.set_led = &qed_set_led,
+	.recovery_process = &qed_recovery_process,
+	.recovery_prolog = &qed_recovery_prolog,
 	.update_drv_state = &qed_update_drv_state,
 	.update_mac = &qed_update_mac,
 	.update_mtu = &qed_update_mtu,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 1024484d7dd8..bb8541847aa5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1549,6 +1549,60 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_up)
 	return 0;
 }
 
+u32 qed_get_process_kill_counter(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt)
+{
+	u32 path_offsize_addr, path_offsize, path_addr, proc_kill_cnt;
+
+	if (IS_VF(p_hwfn->cdev))
+		return -EINVAL;
+
+	path_offsize_addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
+						 PUBLIC_PATH);
+	path_offsize = qed_rd(p_hwfn, p_ptt, path_offsize_addr);
+	path_addr = SECTION_ADDR(path_offsize, QED_PATH_ID(p_hwfn));
+
+	proc_kill_cnt = qed_rd(p_hwfn, p_ptt,
+			       path_addr +
+			       offsetof(struct public_path, process_kill)) &
+			PROCESS_KILL_COUNTER_MASK;
+
+	return proc_kill_cnt;
+}
+
+static void qed_mcp_handle_process_kill(struct qed_hwfn *p_hwfn,
+					struct qed_ptt *p_ptt)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	u32 proc_kill_cnt;
+
+	/* Prevent possible attentions/interrupts during the recovery handling
+	 * and till its load phase, during which they will be re-enabled.
+	 */
+	qed_int_igu_disable_int(p_hwfn, p_ptt);
+
+	DP_NOTICE(p_hwfn, "Received a process kill indication\n");
+
+	/* The following operations should be done once, and thus in CMT mode
+	 * are carried out by only the first HW function.
+	 */
+	if (p_hwfn != QED_LEADING_HWFN(cdev))
+		return;
+
+	if (cdev->recov_in_prog) {
+		DP_NOTICE(p_hwfn,
+			  "Ignoring the indication since a recovery process is already in progress\n");
+		return;
+	}
+
+	cdev->recov_in_prog = true;
+
+	proc_kill_cnt = qed_get_process_kill_counter(p_hwfn, p_ptt);
+	DP_NOTICE(p_hwfn, "Process kill counter: %d\n", proc_kill_cnt);
+
+	qed_schedule_recovery_handler(p_hwfn);
+}
+
 static void qed_mcp_send_protocol_stats(struct qed_hwfn *p_hwfn,
 					struct qed_ptt *p_ptt,
 					enum MFW_DRV_MSG_TYPE type)
@@ -1779,6 +1833,9 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
 		case MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE:
 			qed_mcp_handle_transceiver_change(p_hwfn, p_ptt);
 			break;
+		case MFW_DRV_MSG_ERROR_RECOVERY:
+			qed_mcp_handle_process_kill(p_hwfn, p_ptt);
+			break;
 		case MFW_DRV_MSG_GET_LAN_STATS:
 		case MFW_DRV_MSG_GET_FCOE_STATS:
 		case MFW_DRV_MSG_GET_ISCSI_STATS:
@@ -2324,6 +2381,43 @@ int qed_mcp_get_flash_size(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
+int qed_start_recovery_process(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+
+	if (cdev->recov_in_prog) {
+		DP_NOTICE(p_hwfn,
+			  "Avoid triggering a recovery since such a process is already in progress\n");
+		return -EAGAIN;
+	}
+
+	DP_NOTICE(p_hwfn, "Triggering a recovery process\n");
+	qed_wr(p_hwfn, p_ptt, MISC_REG_AEU_GENERAL_ATTN_35, 0x1);
+
+	return 0;
+}
+
+#define QED_RECOVERY_PROLOG_SLEEP_MS    100
+
+int qed_recovery_prolog(struct qed_dev *cdev)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = p_hwfn->p_main_ptt;
+	int rc;
+
+	/* Allow ongoing PCIe transactions to complete */
+	msleep(QED_RECOVERY_PROLOG_SLEEP_MS);
+
+	/* Clear the PF's internal FID_enable in the PXP */
+	rc = qed_pglueb_set_pfid_enable(p_hwfn, p_ptt, false);
+	if (rc)
+		DP_NOTICE(p_hwfn,
+			  "qed_pglueb_set_pfid_enable() failed. rc = %d.\n",
+			  rc);
+
+	return rc;
+}
+
 static int
 qed_mcp_config_vf_msix_bb(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt, u8 vf_id, u8 num)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 387c5e649136..6e1d72a669ae 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -440,6 +440,38 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
 			 struct qed_ptt *p_ptt,
 			 struct qed_mcp_drv_version *p_ver);
 
+/**
+ * @brief Read the MFW process kill counter
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @return u32
+ */
+u32 qed_get_process_kill_counter(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt);
+
+/**
+ * @brief Trigger a recovery process
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *
+ * @return int
+ */
+int qed_start_recovery_process(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+/**
+ * @brief A recovery handler must call this function as its first step.
+ *        It is assumed that the handler is not run from an interrupt context.
+ *
+ *  @param cdev
+ *  @param p_ptt
+ *
+ * @return int
+ */
+int qed_recovery_prolog(struct qed_dev *cdev);
+
 /**
  * @brief Notify MFW about the change in base device properties
  *
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index 8939ed6e08b7..5ce825ca5f24 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -518,6 +518,8 @@
 	0x180824UL
 #define  MISC_REG_AEU_GENERAL_ATTN_0 \
 	0x008400UL
+#define MISC_REG_AEU_GENERAL_ATTN_35 \
+	0x00848cUL
 #define  CAU_REG_SB_ADDR_MEMORY \
 	0x1c8000UL
 #define  CAU_REG_SB_VAR_MEMORY \
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index eb88bbc6b193..3e0f7c46bb1b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -790,6 +790,17 @@ static int qed_spq_pend_post(struct qed_hwfn *p_hwfn)
 				 SPQ_HIGH_PRI_RESERVE_DEFAULT);
 }
 
+static void qed_spq_recov_set_ret_code(struct qed_spq_entry *p_ent,
+				       u8 *fw_return_code)
+{
+	if (!fw_return_code)
+		return;
+
+	if (p_ent->elem.hdr.protocol_id == PROTOCOLID_ROCE ||
+	    p_ent->elem.hdr.protocol_id == PROTOCOLID_IWARP)
+		*fw_return_code = RDMA_RETURN_OK;
+}
+
 /* Avoid overriding of SPQ entries when getting out-of-order completions, by
  * marking the completions in a bitmap and increasing the chain consumer only
  * for the first successive completed entries.
@@ -825,6 +836,17 @@ int qed_spq_post(struct qed_hwfn *p_hwfn,
 		return -EINVAL;
 	}
 
+	if (p_hwfn->cdev->recov_in_prog) {
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_SPQ,
+			   "Recovery is in progress. Skip spq post [cmd %02x protocol %02x]\n",
+			   p_ent->elem.hdr.cmd_id, p_ent->elem.hdr.protocol_id);
+
+		/* Let the flow complete w/o any error handling */
+		qed_spq_recov_set_ret_code(p_ent, fw_return_code);
+		return 0;
+	}
+
 	/* Complete the entry */
 	rc = qed_spq_fill_entry(p_hwfn, p_ent);
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index ca6290fa0f30..71e28be58102 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -4447,6 +4447,13 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled)
 	if (cdev->p_iov_info && cdev->p_iov_info->num_vfs && pci_enabled)
 		pci_disable_sriov(cdev->pdev);
 
+	if (cdev->recov_in_prog) {
+		DP_VERBOSE(cdev,
+			   QED_MSG_IOV,
+			   "Skip SRIOV disable operations in the device since a recovery is in progress\n");
+		goto out;
+	}
+
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *hwfn = &cdev->hwfns[i];
 		struct qed_ptt *ptt = qed_ptt_acquire(hwfn);
@@ -4486,7 +4493,7 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled)
 
 		qed_ptt_release(hwfn, ptt);
 	}
-
+out:
 	qed_iov_set_vfs_to_disable(cdev, false);
 
 	return 0;
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 91c536a01b56..c2a1b7dbe4eb 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -764,6 +764,7 @@ struct qed_probe_params {
 	u32 dp_module;
 	u8 dp_level;
 	bool is_vf;
+	bool recov_in_prog;
 };
 
 #define QED_DRV_VER_STR_SIZE 12
@@ -810,6 +811,7 @@ struct qed_common_cb_ops {
 	void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc);
 	void	(*link_update)(void			*dev,
 			       struct qed_link_output	*link);
+	void (*schedule_recovery_handler)(void *dev);
 	void	(*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
 	void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data);
 	void (*get_protocol_tlv_data)(void *dev, void *data);
@@ -1057,6 +1059,24 @@ struct qed_common_ops {
 	int (*db_recovery_del)(struct qed_dev *cdev,
 			       void __iomem *db_addr, void *db_data);
 
+/**
+ * @brief recovery_process - Trigger a recovery process
+ *
+ * @param cdev
+ *
+ * @return 0 on success, error otherwise.
+ */
+	int (*recovery_process)(struct qed_dev *cdev);
+
+/**
+ * @brief recovery_prolog - Execute the prolog operations of a recovery process
+ *
+ * @param cdev
+ *
+ * @return 0 on success, error otherwise.
+ */
+	int (*recovery_prolog)(struct qed_dev *cdev);
+
 /**
  * @brief update_drv_state - API to inform the change in the driver state.
  *
-- 
cgit v1.2.3-71-gd317


From 278396de78a9b59a692bc140233bde3a9d8a8a31 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <tomer.tayar@cavium.com>
Date: Sun, 20 Jan 2019 11:36:39 +0200
Subject: qede: Error recovery process

This patch adds the error recovery process in the qede driver.
The process includes a partial/customized driver unload and load, which
allows it to look like a short suspend period to the kernel while
preserving the net devices' state.

Signed-off-by: Tomer Tayar <tomer.tayar@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede.h      |   3 +
 drivers/net/ethernet/qlogic/qede/qede_main.c | 300 ++++++++++++++++++++++-----
 drivers/net/ethernet/qlogic/qede/qede_rdma.c |  64 ++++--
 include/linux/qed/qede_rdma.h                |  21 +-
 4 files changed, 314 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 613249d1e967..843416404aeb 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -162,6 +162,7 @@ struct qede_rdma_dev {
 	struct list_head entry;
 	struct list_head rdma_event_list;
 	struct workqueue_struct *rdma_wq;
+	bool exp_recovery;
 };
 
 struct qede_ptp;
@@ -264,6 +265,7 @@ struct qede_dev {
 enum QEDE_STATE {
 	QEDE_STATE_CLOSED,
 	QEDE_STATE_OPEN,
+	QEDE_STATE_RECOVERY,
 };
 
 #define HILO_U64(hi, lo)		((((u64)(hi)) << 32) + (lo))
@@ -462,6 +464,7 @@ struct qede_fastpath {
 #define QEDE_CSUM_UNNECESSARY		BIT(1)
 #define QEDE_TUNN_CSUM_UNNECESSARY	BIT(2)
 
+#define QEDE_SP_RECOVERY		0
 #define QEDE_SP_RX_MODE			1
 
 #ifdef CONFIG_RFS_ACCEL
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 5a74fcbdbc2b..de955f2b2980 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -133,23 +133,12 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id);
 static void qede_remove(struct pci_dev *pdev);
 static void qede_shutdown(struct pci_dev *pdev);
 static void qede_link_update(void *dev, struct qed_link_output *link);
+static void qede_schedule_recovery_handler(void *dev);
+static void qede_recovery_handler(struct qede_dev *edev);
 static void qede_get_eth_tlv_data(void *edev, void *data);
 static void qede_get_generic_tlv_data(void *edev,
 				      struct qed_generic_tlvs *data);
 
-/* The qede lock is used to protect driver state change and driver flows that
- * are not reentrant.
- */
-void __qede_lock(struct qede_dev *edev)
-{
-	mutex_lock(&edev->qede_lock);
-}
-
-void __qede_unlock(struct qede_dev *edev)
-{
-	mutex_unlock(&edev->qede_lock);
-}
-
 #ifdef CONFIG_QED_SRIOV
 static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
 			    __be16 vlan_proto)
@@ -231,6 +220,7 @@ static struct qed_eth_cb_ops qede_ll_ops = {
 		.arfs_filter_op = qede_arfs_filter_op,
 #endif
 		.link_update = qede_link_update,
+		.schedule_recovery_handler = qede_schedule_recovery_handler,
 		.get_generic_tlv_data = qede_get_generic_tlv_data,
 		.get_protocol_tlv_data = qede_get_eth_tlv_data,
 	},
@@ -950,11 +940,57 @@ err:
 	return -ENOMEM;
 }
 
+/* The qede lock is used to protect driver state change and driver flows that
+ * are not reentrant.
+ */
+void __qede_lock(struct qede_dev *edev)
+{
+	mutex_lock(&edev->qede_lock);
+}
+
+void __qede_unlock(struct qede_dev *edev)
+{
+	mutex_unlock(&edev->qede_lock);
+}
+
+/* This version of the lock should be used when acquiring the RTNL lock is also
+ * needed in addition to the internal qede lock.
+ */
+void qede_lock(struct qede_dev *edev)
+{
+	rtnl_lock();
+	__qede_lock(edev);
+}
+
+void qede_unlock(struct qede_dev *edev)
+{
+	__qede_unlock(edev);
+	rtnl_unlock();
+}
+
 static void qede_sp_task(struct work_struct *work)
 {
 	struct qede_dev *edev = container_of(work, struct qede_dev,
 					     sp_task.work);
 
+	/* The locking scheme depends on the specific flag:
+	 * In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to
+	 * ensure that ongoing flows are ended and new ones are not started.
+	 * In other cases - only the internal qede lock should be acquired.
+	 */
+
+	if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) {
+#ifdef CONFIG_QED_SRIOV
+		/* SRIOV must be disabled outside the lock to avoid a deadlock.
+		 * The recovery of the active VFs is currently not supported.
+		 */
+		qede_sriov_configure(edev->pdev, 0);
+#endif
+		qede_lock(edev);
+		qede_recovery_handler(edev);
+		qede_unlock(edev);
+	}
+
 	__qede_lock(edev);
 
 	if (test_and_clear_bit(QEDE_SP_RX_MODE, &edev->sp_flags))
@@ -1031,8 +1067,13 @@ static void qede_log_probe(struct qede_dev *edev)
 
 enum qede_probe_mode {
 	QEDE_PROBE_NORMAL,
+	QEDE_PROBE_RECOVERY,
 };
 
+#define QEDE_RDMA_PROBE_MODE(mode) \
+	((mode) == QEDE_PROBE_NORMAL ? QEDE_RDMA_PROBE_NORMAL \
+				     : QEDE_RDMA_PROBE_RECOVERY)
+
 static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 			bool is_vf, enum qede_probe_mode mode)
 {
@@ -1051,6 +1092,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 	probe_params.dp_module = dp_module;
 	probe_params.dp_level = dp_level;
 	probe_params.is_vf = is_vf;
+	probe_params.recov_in_prog = (mode == QEDE_PROBE_RECOVERY);
 	cdev = qed_ops->common->probe(pdev, &probe_params);
 	if (!cdev) {
 		rc = -ENODEV;
@@ -1078,11 +1120,20 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 	if (rc)
 		goto err2;
 
-	edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module,
-				   dp_level);
-	if (!edev) {
-		rc = -ENOMEM;
-		goto err2;
+	if (mode != QEDE_PROBE_RECOVERY) {
+		edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module,
+					   dp_level);
+		if (!edev) {
+			rc = -ENOMEM;
+			goto err2;
+		}
+	} else {
+		struct net_device *ndev = pci_get_drvdata(pdev);
+
+		edev = netdev_priv(ndev);
+		edev->cdev = cdev;
+		memset(&edev->stats, 0, sizeof(edev->stats));
+		memcpy(&edev->dev_info, &dev_info, sizeof(dev_info));
 	}
 
 	if (is_vf)
@@ -1090,28 +1141,31 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 
 	qede_init_ndev(edev);
 
-	rc = qede_rdma_dev_add(edev);
+	rc = qede_rdma_dev_add(edev, QEDE_RDMA_PROBE_MODE(mode));
 	if (rc)
 		goto err3;
 
-	/* Prepare the lock prior to the registration of the netdev,
-	 * as once it's registered we might reach flows requiring it
-	 * [it's even possible to reach a flow needing it directly
-	 * from there, although it's unlikely].
-	 */
-	INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
-	mutex_init(&edev->qede_lock);
-	rc = register_netdev(edev->ndev);
-	if (rc) {
-		DP_NOTICE(edev, "Cannot register net-device\n");
-		goto err4;
+	if (mode != QEDE_PROBE_RECOVERY) {
+		/* Prepare the lock prior to the registration of the netdev,
+		 * as once it's registered we might reach flows requiring it
+		 * [it's even possible to reach a flow needing it directly
+		 * from there, although it's unlikely].
+		 */
+		INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
+		mutex_init(&edev->qede_lock);
+
+		rc = register_netdev(edev->ndev);
+		if (rc) {
+			DP_NOTICE(edev, "Cannot register net-device\n");
+			goto err4;
+		}
 	}
 
 	edev->ops->common->set_name(cdev, edev->ndev->name);
 
 	/* PTP not supported on VFs */
 	if (!is_vf)
-		qede_ptp_enable(edev, true);
+		qede_ptp_enable(edev, (mode == QEDE_PROBE_NORMAL));
 
 	edev->ops->register_ops(cdev, &qede_ll_ops, edev);
 
@@ -1126,7 +1180,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 	return 0;
 
 err4:
-	qede_rdma_dev_remove(edev);
+	qede_rdma_dev_remove(edev, QEDE_RDMA_PROBE_MODE(mode));
 err3:
 	free_netdev(edev->ndev);
 err2:
@@ -1162,8 +1216,13 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 enum qede_remove_mode {
 	QEDE_REMOVE_NORMAL,
+	QEDE_REMOVE_RECOVERY,
 };
 
+#define QEDE_RDMA_REMOVE_MODE(mode) \
+	((mode) == QEDE_REMOVE_NORMAL ? QEDE_RDMA_REMOVE_NORMAL \
+			      : QEDE_RDMA_REMOVE_RECOVERY)
+
 static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 {
 	struct net_device *ndev = pci_get_drvdata(pdev);
@@ -1172,15 +1231,19 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 
 	DP_INFO(edev, "Starting qede_remove\n");
 
-	qede_rdma_dev_remove(edev);
-	unregister_netdev(ndev);
-	cancel_delayed_work_sync(&edev->sp_task);
+	qede_rdma_dev_remove(edev, QEDE_RDMA_REMOVE_MODE(mode));
 
-	qede_ptp_disable(edev);
+	if (mode != QEDE_REMOVE_RECOVERY) {
+		unregister_netdev(ndev);
 
-	edev->ops->common->set_power_state(cdev, PCI_D0);
+		cancel_delayed_work_sync(&edev->sp_task);
 
-	pci_set_drvdata(pdev, NULL);
+		edev->ops->common->set_power_state(cdev, PCI_D0);
+
+		pci_set_drvdata(pdev, NULL);
+	}
+
+	qede_ptp_disable(edev);
 
 	/* Use global ops since we've freed edev */
 	qed_ops->common->slowpath_stop(cdev);
@@ -1194,7 +1257,8 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 	 * [e.g., QED register callbacks] won't break anything when
 	 * accessing the netdevice.
 	 */
-	 free_netdev(ndev);
+	if (mode != QEDE_REMOVE_RECOVERY)
+		free_netdev(ndev);
 
 	dev_info(&pdev->dev, "Ending qede_remove successfully\n");
 }
@@ -1539,6 +1603,58 @@ static int qede_alloc_mem_load(struct qede_dev *edev)
 	return 0;
 }
 
+static void qede_empty_tx_queue(struct qede_dev *edev,
+				struct qede_tx_queue *txq)
+{
+	unsigned int pkts_compl = 0, bytes_compl = 0;
+	struct netdev_queue *netdev_txq;
+	int rc, len = 0;
+
+	netdev_txq = netdev_get_tx_queue(edev->ndev, txq->ndev_txq_id);
+
+	while (qed_chain_get_cons_idx(&txq->tx_pbl) !=
+	       qed_chain_get_prod_idx(&txq->tx_pbl)) {
+		DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
+			   "Freeing a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
+			   txq->index, qed_chain_get_cons_idx(&txq->tx_pbl),
+			   qed_chain_get_prod_idx(&txq->tx_pbl));
+
+		rc = qede_free_tx_pkt(edev, txq, &len);
+		if (rc) {
+			DP_NOTICE(edev,
+				  "Failed to free a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
+				  txq->index,
+				  qed_chain_get_cons_idx(&txq->tx_pbl),
+				  qed_chain_get_prod_idx(&txq->tx_pbl));
+			break;
+		}
+
+		bytes_compl += len;
+		pkts_compl++;
+		txq->sw_tx_cons++;
+	}
+
+	netdev_tx_completed_queue(netdev_txq, pkts_compl, bytes_compl);
+}
+
+static void qede_empty_tx_queues(struct qede_dev *edev)
+{
+	int i;
+
+	for_each_queue(i)
+		if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
+			int cos;
+
+			for_each_cos_in_txq(edev, cos) {
+				struct qede_fastpath *fp;
+
+				fp = &edev->fp_array[i];
+				qede_empty_tx_queue(edev,
+						    &fp->txq[cos]);
+			}
+		}
+}
+
 /* This function inits fp content and resets the SB, RXQ and TXQ structures */
 static void qede_init_fp(struct qede_dev *edev)
 {
@@ -2053,6 +2169,7 @@ out:
 
 enum qede_unload_mode {
 	QEDE_UNLOAD_NORMAL,
+	QEDE_UNLOAD_RECOVERY,
 };
 
 static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
@@ -2068,7 +2185,8 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
 
 	clear_bit(QEDE_FLAGS_LINK_REQUESTED, &edev->flags);
 
-	edev->state = QEDE_STATE_CLOSED;
+	if (mode != QEDE_UNLOAD_RECOVERY)
+		edev->state = QEDE_STATE_CLOSED;
 
 	qede_rdma_dev_event_close(edev);
 
@@ -2076,17 +2194,20 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
 	netif_tx_disable(edev->ndev);
 	netif_carrier_off(edev->ndev);
 
-	/* Reset the link */
-	memset(&link_params, 0, sizeof(link_params));
-	link_params.link_up = false;
-	edev->ops->common->set_link(edev->cdev, &link_params);
-	rc = qede_stop_queues(edev);
-	if (rc) {
-		qede_sync_free_irqs(edev);
-		goto out;
-	}
+	if (mode != QEDE_UNLOAD_RECOVERY) {
+		/* Reset the link */
+		memset(&link_params, 0, sizeof(link_params));
+		link_params.link_up = false;
+		edev->ops->common->set_link(edev->cdev, &link_params);
 
-	DP_INFO(edev, "Stopped Queues\n");
+		rc = qede_stop_queues(edev);
+		if (rc) {
+			qede_sync_free_irqs(edev);
+			goto out;
+		}
+
+		DP_INFO(edev, "Stopped Queues\n");
+	}
 
 	qede_vlan_mark_nonconfigured(edev);
 	edev->ops->fastpath_stop(edev->cdev);
@@ -2102,18 +2223,26 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
 
 	qede_napi_disable_remove(edev);
 
+	if (mode == QEDE_UNLOAD_RECOVERY)
+		qede_empty_tx_queues(edev);
+
 	qede_free_mem_load(edev);
 	qede_free_fp_array(edev);
 
 out:
 	if (!is_locked)
 		__qede_unlock(edev);
+
+	if (mode != QEDE_UNLOAD_RECOVERY)
+		DP_NOTICE(edev, "Link is down\n");
+
 	DP_INFO(edev, "Ending qede unload\n");
 }
 
 enum qede_load_mode {
 	QEDE_LOAD_NORMAL,
 	QEDE_LOAD_RELOAD,
+	QEDE_LOAD_RECOVERY,
 };
 
 static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
@@ -2293,6 +2422,77 @@ static void qede_link_update(void *dev, struct qed_link_output *link)
 	}
 }
 
+static void qede_schedule_recovery_handler(void *dev)
+{
+	struct qede_dev *edev = dev;
+
+	if (edev->state == QEDE_STATE_RECOVERY) {
+		DP_NOTICE(edev,
+			  "Avoid scheduling a recovery handling since already in recovery state\n");
+		return;
+	}
+
+	set_bit(QEDE_SP_RECOVERY, &edev->sp_flags);
+	schedule_delayed_work(&edev->sp_task, 0);
+
+	DP_INFO(edev, "Scheduled a recovery handler\n");
+}
+
+static void qede_recovery_failed(struct qede_dev *edev)
+{
+	netdev_err(edev->ndev, "Recovery handling has failed. Power cycle is needed.\n");
+
+	netif_device_detach(edev->ndev);
+
+	if (edev->cdev)
+		edev->ops->common->set_power_state(edev->cdev, PCI_D3hot);
+}
+
+static void qede_recovery_handler(struct qede_dev *edev)
+{
+	u32 curr_state = edev->state;
+	int rc;
+
+	DP_NOTICE(edev, "Starting a recovery process\n");
+
+	/* No need to acquire first the qede_lock since is done by qede_sp_task
+	 * before calling this function.
+	 */
+	edev->state = QEDE_STATE_RECOVERY;
+
+	edev->ops->common->recovery_prolog(edev->cdev);
+
+	if (curr_state == QEDE_STATE_OPEN)
+		qede_unload(edev, QEDE_UNLOAD_RECOVERY, true);
+
+	__qede_remove(edev->pdev, QEDE_REMOVE_RECOVERY);
+
+	rc = __qede_probe(edev->pdev, edev->dp_module, edev->dp_level,
+			  IS_VF(edev), QEDE_PROBE_RECOVERY);
+	if (rc) {
+		edev->cdev = NULL;
+		goto err;
+	}
+
+	if (curr_state == QEDE_STATE_OPEN) {
+		rc = qede_load(edev, QEDE_LOAD_RECOVERY, true);
+		if (rc)
+			goto err;
+
+		qede_config_rx_mode(edev->ndev);
+		udp_tunnel_get_rx_info(edev->ndev);
+	}
+
+	edev->state = curr_state;
+
+	DP_NOTICE(edev, "Recovery handling is done\n");
+
+	return;
+
+err:
+	qede_recovery_failed(edev);
+}
+
 static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
 	struct netdev_queue *netdev_txq;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
index 1900bf7e67d1..9668e5e47d5f 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
@@ -50,6 +50,8 @@ static void _qede_rdma_dev_add(struct qede_dev *edev)
 	if (!qedr_drv)
 		return;
 
+	/* Leftovers from previous error recovery */
+	edev->rdma_info.exp_recovery = false;
 	edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev,
 						 edev->ndev);
 }
@@ -87,21 +89,26 @@ static void qede_rdma_destroy_wq(struct qede_dev *edev)
 	destroy_workqueue(edev->rdma_info.rdma_wq);
 }
 
-int qede_rdma_dev_add(struct qede_dev *edev)
+int qede_rdma_dev_add(struct qede_dev *edev, enum qede_rdma_probe_mode mode)
 {
-	int rc = 0;
+	int rc;
 
-	if (qede_rdma_supported(edev)) {
-		rc = qede_rdma_create_wq(edev);
-		if (rc)
-			return rc;
+	if (!qede_rdma_supported(edev))
+		return 0;
 
-		INIT_LIST_HEAD(&edev->rdma_info.entry);
-		mutex_lock(&qedr_dev_list_lock);
-		list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
-		_qede_rdma_dev_add(edev);
-		mutex_unlock(&qedr_dev_list_lock);
-	}
+	/* Cannot start qedr while recovering since it wasn't fully stopped */
+	if (mode == QEDE_RDMA_PROBE_RECOVERY)
+		return 0;
+
+	rc = qede_rdma_create_wq(edev);
+	if (rc)
+		return rc;
+
+	INIT_LIST_HEAD(&edev->rdma_info.entry);
+	mutex_lock(&qedr_dev_list_lock);
+	list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
+	_qede_rdma_dev_add(edev);
+	mutex_unlock(&qedr_dev_list_lock);
 
 	return rc;
 }
@@ -110,19 +117,31 @@ static void _qede_rdma_dev_remove(struct qede_dev *edev)
 {
 	if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev)
 		qedr_drv->remove(edev->rdma_info.qedr_dev);
-	edev->rdma_info.qedr_dev = NULL;
 }
 
-void qede_rdma_dev_remove(struct qede_dev *edev)
+void qede_rdma_dev_remove(struct qede_dev *edev,
+			  enum qede_rdma_remove_mode mode)
 {
 	if (!qede_rdma_supported(edev))
 		return;
 
-	qede_rdma_destroy_wq(edev);
-	mutex_lock(&qedr_dev_list_lock);
-	_qede_rdma_dev_remove(edev);
-	list_del(&edev->rdma_info.entry);
-	mutex_unlock(&qedr_dev_list_lock);
+	/* Cannot remove qedr while recovering since it wasn't fully stopped */
+	if (mode == QEDE_RDMA_REMOVE_NORMAL) {
+		qede_rdma_destroy_wq(edev);
+		mutex_lock(&qedr_dev_list_lock);
+		if (!edev->rdma_info.exp_recovery)
+			_qede_rdma_dev_remove(edev);
+		edev->rdma_info.qedr_dev = NULL;
+		list_del(&edev->rdma_info.entry);
+		mutex_unlock(&qedr_dev_list_lock);
+	} else {
+		if (!edev->rdma_info.exp_recovery) {
+			mutex_lock(&qedr_dev_list_lock);
+			_qede_rdma_dev_remove(edev);
+			mutex_unlock(&qedr_dev_list_lock);
+		}
+		edev->rdma_info.exp_recovery = true;
+	}
 }
 
 static void _qede_rdma_dev_open(struct qede_dev *edev)
@@ -204,7 +223,8 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv)
 
 	mutex_lock(&qedr_dev_list_lock);
 	list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
-		if (edev->rdma_info.qedr_dev)
+		/* If device has experienced recovery it was already removed */
+		if (edev->rdma_info.qedr_dev && !edev->rdma_info.exp_recovery)
 			_qede_rdma_dev_remove(edev);
 	}
 	qedr_drv = NULL;
@@ -284,6 +304,10 @@ static void qede_rdma_add_event(struct qede_dev *edev,
 {
 	struct qede_rdma_event_work *event_node;
 
+	/* If a recovery was experienced avoid adding the event */
+	if (edev->rdma_info.exp_recovery)
+		return;
+
 	if (!edev->rdma_info.qedr_dev)
 		return;
 
diff --git a/include/linux/qed/qede_rdma.h b/include/linux/qed/qede_rdma.h
index 9904617a9730..e29d7199c10e 100644
--- a/include/linux/qed/qede_rdma.h
+++ b/include/linux/qed/qede_rdma.h
@@ -55,6 +55,16 @@ struct qede_rdma_event_work {
 	enum qede_rdma_event event;
 };
 
+enum qede_rdma_probe_mode {
+	QEDE_RDMA_PROBE_NORMAL,
+	QEDE_RDMA_PROBE_RECOVERY,
+};
+
+enum qede_rdma_remove_mode {
+	QEDE_RDMA_REMOVE_NORMAL,
+	QEDE_RDMA_REMOVE_RECOVERY,
+};
+
 struct qedr_driver {
 	unsigned char name[32];
 
@@ -74,21 +84,24 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv);
 bool qede_rdma_supported(struct qede_dev *dev);
 
 #if IS_ENABLED(CONFIG_QED_RDMA)
-int qede_rdma_dev_add(struct qede_dev *dev);
+int qede_rdma_dev_add(struct qede_dev *dev, enum qede_rdma_probe_mode mode);
 void qede_rdma_dev_event_open(struct qede_dev *dev);
 void qede_rdma_dev_event_close(struct qede_dev *dev);
-void qede_rdma_dev_remove(struct qede_dev *dev);
+void qede_rdma_dev_remove(struct qede_dev *dev,
+			  enum qede_rdma_remove_mode mode);
 void qede_rdma_event_changeaddr(struct qede_dev *edr);
 
 #else
-static inline int qede_rdma_dev_add(struct qede_dev *dev)
+static inline int qede_rdma_dev_add(struct qede_dev *dev,
+				    enum qede_rdma_probe_mode mode)
 {
 	return 0;
 }
 
 static inline void qede_rdma_dev_event_open(struct qede_dev *dev) {}
 static inline void qede_rdma_dev_event_close(struct qede_dev *dev) {}
-static inline void qede_rdma_dev_remove(struct qede_dev *dev) {}
+static inline void qede_rdma_dev_remove(struct qede_dev *dev,
+					enum qede_rdma_remove_mode mode) {}
 static inline void qede_rdma_event_changeaddr(struct qede_dev *edr) {}
 #endif
 #endif
-- 
cgit v1.2.3-71-gd317


From 8367de2c99a13d35960a51d6084631c883e93a4d Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Thu, 24 Jan 2019 18:20:14 +0900
Subject: block: Fix comment typo

Fix typo in REQ_OP_ZONE_RESET description.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 5c7e7f859a24..d66bf5f32610 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -287,7 +287,7 @@ enum req_opf {
 	REQ_OP_DISCARD		= 3,
 	/* securely erase sectors */
 	REQ_OP_SECURE_ERASE	= 5,
-	/* seset a zone write pointer */
+	/* reset a zone write pointer */
 	REQ_OP_ZONE_RESET	= 6,
 	/* write the same sector many times */
 	REQ_OP_WRITE_SAME	= 7,
-- 
cgit v1.2.3-71-gd317


From 3b707c3008cad04604c1f50e39f456621821c414 Mon Sep 17 00:00:00 2001
From: Maciej Żenczykowski <maze@google.com>
Date: Thu, 24 Jan 2019 03:07:02 -0800
Subject: net: dev_is_mac_header_xmit() true for ARPHRD_RAWIP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

__bpf_redirect() and act_mirred checks this boolean
to determine whether to prefix an ethernet header.

Signed-off-by: Maciej Żenczykowski <maze@google.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_arp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index 6756fea18b69..e44746de95cd 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -54,6 +54,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev)
 	case ARPHRD_IPGRE:
 	case ARPHRD_VOID:
 	case ARPHRD_NONE:
+	case ARPHRD_RAWIP:
 		return false;
 	default:
 		return true;
-- 
cgit v1.2.3-71-gd317


From abfd04f738c2625f63e04c8fc7cadb3b7a70d580 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 25 Jan 2019 15:32:28 -0800
Subject: qed: Revert error handling changes.

This is new code and not bug fixes.

This reverts all changes added by merge commit
8fb18be93efd7292d6ee403b9f61af1008239639

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h          |   5 +-
 drivers/net/ethernet/qlogic/qed/qed_dev.c      | 158 ++++++-------
 drivers/net/ethernet/qlogic/qed/qed_dev_api.h  |  12 -
 drivers/net/ethernet/qlogic/qed/qed_hsi.h      |   2 +-
 drivers/net/ethernet/qlogic/qed/qed_hw.c       |  11 -
 drivers/net/ethernet/qlogic/qed/qed_int.c      | 126 +++++------
 drivers/net/ethernet/qlogic/qed/qed_int.h      |   3 -
 drivers/net/ethernet/qlogic/qed/qed_main.c     |  30 ---
 drivers/net/ethernet/qlogic/qed/qed_mcp.c      | 115 ----------
 drivers/net/ethernet/qlogic/qed/qed_mcp.h      |  42 ----
 drivers/net/ethernet/qlogic/qed/qed_reg_addr.h |   2 -
 drivers/net/ethernet/qlogic/qed/qed_spq.c      |  22 --
 drivers/net/ethernet/qlogic/qed/qed_sriov.c    |   9 +-
 drivers/net/ethernet/qlogic/qede/qede.h        |   3 -
 drivers/net/ethernet/qlogic/qede/qede_main.c   | 300 +++++--------------------
 drivers/net/ethernet/qlogic/qede/qede_rdma.c   |  64 ++----
 include/linux/qed/qed_if.h                     |  20 --
 include/linux/qed/qede_rdma.h                  |  21 +-
 18 files changed, 202 insertions(+), 743 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 3b0955d34716..24a90163775e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -554,6 +554,7 @@ struct qed_hwfn {
 	u8				dp_level;
 	char				name[NAME_SIZE];
 
+	bool				first_on_engine;
 	bool				hw_init_done;
 
 	u8				num_funcs_on_engine;
@@ -804,9 +805,6 @@ struct qed_dev {
 
 	u32				mcp_nvm_resp;
 
-	/* Recovery */
-	bool recov_in_prog;
-
 	/* Linux specific here */
 	struct  qede_dev		*edev;
 	struct  pci_dev			*pdev;
@@ -946,7 +944,6 @@ void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt);
 u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
 		   u32 input_len, u8 *input_buf,
 		   u32 max_size, u8 *unzip_buf);
-void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn);
 void qed_get_protocol_stats(struct qed_dev *cdev,
 			    enum qed_mcp_protocol_type type,
 			    union qed_mcp_protocol_stats *stats);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index b17003d9066c..8f6551421945 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1959,6 +1959,11 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
 		     (p_hwfn->hw_info.personality == QED_PCI_FCOE) ? 1 : 0);
 	STORE_RT_REG(p_hwfn, PRS_REG_SEARCH_ROCE_RT_OFFSET, 0);
 
+	/* Cleanup chip from previous driver if such remains exist */
+	rc = qed_final_cleanup(p_hwfn, p_ptt, rel_pf_id, false);
+	if (rc)
+		return rc;
+
 	/* Sanity check before the PF init sequence that uses DMAE */
 	rc = qed_dmae_sanity(p_hwfn, p_ptt, "pf_phase");
 	if (rc)
@@ -2002,15 +2007,17 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
 	return rc;
 }
 
-int qed_pglueb_set_pfid_enable(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt, bool b_enable)
+static int qed_change_pci_hwfn(struct qed_hwfn *p_hwfn,
+			       struct qed_ptt *p_ptt,
+			       u8 enable)
 {
-	u32 delay_idx = 0, val, set_val = b_enable ? 1 : 0;
+	u32 delay_idx = 0, val, set_val = enable ? 1 : 0;
 
-	/* Configure the PF's internal FID_enable for master transactions */
-	qed_wr(p_hwfn, p_ptt, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, set_val);
+	/* Change PF in PXP */
+	qed_wr(p_hwfn, p_ptt,
+	       PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, set_val);
 
-	/* Wait until value is set - try for 1 second every 50us */
+	/* wait until value is set - try for 1 second every 50us */
 	for (delay_idx = 0; delay_idx < 20000; delay_idx++) {
 		val = qed_rd(p_hwfn, p_ptt,
 			     PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER);
@@ -2064,19 +2071,13 @@ static int qed_vf_start(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
-static void qed_pglueb_clear_err(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
-{
-	qed_wr(p_hwfn, p_ptt, PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR,
-	       BIT(p_hwfn->abs_pf_id));
-}
-
 int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 {
 	struct qed_load_req_params load_req_params;
 	u32 load_code, resp, param, drv_mb_param;
 	bool b_default_mtu = true;
 	struct qed_hwfn *p_hwfn;
-	int rc = 0, i;
+	int rc = 0, mfw_rc, i;
 	u16 ether_type;
 
 	if ((p_params->int_mode == QED_INT_MODE_MSI) && (cdev->num_hwfns > 1)) {
@@ -2091,7 +2092,7 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 	}
 
 	for_each_hwfn(cdev, i) {
-		p_hwfn = &cdev->hwfns[i];
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
 		/* If management didn't provide a default, set one of our own */
 		if (!p_hwfn->hw_info.mtu) {
@@ -2104,6 +2105,9 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 			continue;
 		}
 
+		/* Enable DMAE in PXP */
+		rc = qed_change_pci_hwfn(p_hwfn, p_hwfn->p_main_ptt, true);
+
 		rc = qed_calc_hw_mode(p_hwfn);
 		if (rc)
 			return rc;
@@ -2140,43 +2144,12 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 			   "Load request was sent. Load code: 0x%x\n",
 			   load_code);
 
-		/* Only relevant for recovery:
-		 * Clear the indication after LOAD_REQ is responded by the MFW.
-		 */
-		cdev->recov_in_prog = false;
-
 		qed_mcp_set_capabilities(p_hwfn, p_hwfn->p_main_ptt);
 
 		qed_reset_mb_shadow(p_hwfn, p_hwfn->p_main_ptt);
 
-		/* Clean up chip from previous driver if such remains exist.
-		 * This is not needed when the PF is the first one on the
-		 * engine, since afterwards we are going to init the FW.
-		 */
-		if (load_code != FW_MSG_CODE_DRV_LOAD_ENGINE) {
-			rc = qed_final_cleanup(p_hwfn, p_hwfn->p_main_ptt,
-					       p_hwfn->rel_pf_id, false);
-			if (rc) {
-				DP_NOTICE(p_hwfn, "Final cleanup failed\n");
-				goto load_err;
-			}
-		}
-
-		/* Log and clear previous pglue_b errors if such exist */
-		qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_main_ptt);
-
-		/* Enable the PF's internal FID_enable in the PXP */
-		rc = qed_pglueb_set_pfid_enable(p_hwfn, p_hwfn->p_main_ptt,
-						true);
-		if (rc)
-			goto load_err;
-
-		/* Clear the pglue_b was_error indication.
-		 * In E4 it must be done after the BME and the internal
-		 * FID_enable for the PF are set, since VDMs may cause the
-		 * indication to be set again.
-		 */
-		qed_pglueb_clear_err(p_hwfn, p_hwfn->p_main_ptt);
+		p_hwfn->first_on_engine = (load_code ==
+					   FW_MSG_CODE_DRV_LOAD_ENGINE);
 
 		switch (load_code) {
 		case FW_MSG_CODE_DRV_LOAD_ENGINE:
@@ -2207,29 +2180,39 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 			break;
 		}
 
-		if (rc) {
+		if (rc)
 			DP_NOTICE(p_hwfn,
 				  "init phase failed for loadcode 0x%x (rc %d)\n",
-				  load_code, rc);
-			goto load_err;
-		}
+				   load_code, rc);
 
-		rc = qed_mcp_load_done(p_hwfn, p_hwfn->p_main_ptt);
+		/* ACK mfw regardless of success or failure of initialization */
+		mfw_rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
+				     DRV_MSG_CODE_LOAD_DONE,
+				     0, &load_code, &param);
 		if (rc)
 			return rc;
+		if (mfw_rc) {
+			DP_NOTICE(p_hwfn, "Failed sending LOAD_DONE command\n");
+			return mfw_rc;
+		}
+
+		/* Check if there is a DID mismatch between nvm-cfg/efuse */
+		if (param & FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR)
+			DP_NOTICE(p_hwfn,
+				  "warning: device configuration is not supported on this board type. The device may not function as expected.\n");
 
 		/* send DCBX attention request command */
 		DP_VERBOSE(p_hwfn,
 			   QED_MSG_DCB,
 			   "sending phony dcbx set command to trigger DCBx attention handling\n");
-		rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
-				 DRV_MSG_CODE_SET_DCBX,
-				 1 << DRV_MB_PARAM_DCBX_NOTIFY_SHIFT,
-				 &resp, &param);
-		if (rc) {
+		mfw_rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
+				     DRV_MSG_CODE_SET_DCBX,
+				     1 << DRV_MB_PARAM_DCBX_NOTIFY_SHIFT,
+				     &load_code, &param);
+		if (mfw_rc) {
 			DP_NOTICE(p_hwfn,
 				  "Failed to send DCBX attention request\n");
-			return rc;
+			return mfw_rc;
 		}
 
 		p_hwfn->hw_init_done = true;
@@ -2278,12 +2261,6 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 	}
 
 	return 0;
-
-load_err:
-	/* The MFW load lock should be released also when initialization fails.
-	 */
-	qed_mcp_load_done(p_hwfn, p_hwfn->p_main_ptt);
-	return rc;
 }
 
 #define QED_HW_STOP_RETRY_LIMIT (10)
@@ -2296,9 +2273,6 @@ static void qed_hw_timers_stop(struct qed_dev *cdev,
 	qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_CONN, 0x0);
 	qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_TASK, 0x0);
 
-	if (cdev->recov_in_prog)
-		return;
-
 	for (i = 0; i < QED_HW_STOP_RETRY_LIMIT; i++) {
 		if ((!qed_rd(p_hwfn, p_ptt,
 			     TM_REG_PF_SCAN_ACTIVE_CONN)) &&
@@ -2361,14 +2335,12 @@ int qed_hw_stop(struct qed_dev *cdev)
 		p_hwfn->hw_init_done = false;
 
 		/* Send unload command to MCP */
-		if (!cdev->recov_in_prog) {
-			rc = qed_mcp_unload_req(p_hwfn, p_ptt);
-			if (rc) {
-				DP_NOTICE(p_hwfn,
-					  "Failed sending a UNLOAD_REQ command. rc = %d.\n",
-					  rc);
-				rc2 = -EINVAL;
-			}
+		rc = qed_mcp_unload_req(p_hwfn, p_ptt);
+		if (rc) {
+			DP_NOTICE(p_hwfn,
+				  "Failed sending a UNLOAD_REQ command. rc = %d.\n",
+				  rc);
+			rc2 = -EINVAL;
 		}
 
 		qed_slowpath_irq_sync(p_hwfn);
@@ -2410,31 +2382,27 @@ int qed_hw_stop(struct qed_dev *cdev)
 		qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DB_ENABLE, 0);
 		qed_wr(p_hwfn, p_ptt, QM_REG_PF_EN, 0);
 
-		if (!cdev->recov_in_prog) {
-			rc = qed_mcp_unload_done(p_hwfn, p_ptt);
-			if (rc) {
-				DP_NOTICE(p_hwfn,
-					  "Failed sending a UNLOAD_DONE command. rc = %d.\n",
-					  rc);
-				rc2 = -EINVAL;
-			}
+		qed_mcp_unload_done(p_hwfn, p_ptt);
+		if (rc) {
+			DP_NOTICE(p_hwfn,
+				  "Failed sending a UNLOAD_DONE command. rc = %d.\n",
+				  rc);
+			rc2 = -EINVAL;
 		}
 	}
 
-	if (IS_PF(cdev) && !cdev->recov_in_prog) {
+	if (IS_PF(cdev)) {
 		p_hwfn = QED_LEADING_HWFN(cdev);
 		p_ptt = QED_LEADING_HWFN(cdev)->p_main_ptt;
 
-		/* Clear the PF's internal FID_enable in the PXP.
-		 * In CMT this should only be done for first hw-function, and
-		 * only after all transactions have stopped for all active
-		 * hw-functions.
+		/* Disable DMAE in PXP - in CMT, this should only be done for
+		 * first hw-function, and only after all transactions have
+		 * stopped for all active hw-functions.
 		 */
-		rc = qed_pglueb_set_pfid_enable(p_hwfn, p_ptt, false);
+		rc = qed_change_pci_hwfn(p_hwfn, p_ptt, false);
 		if (rc) {
 			DP_NOTICE(p_hwfn,
-				  "qed_pglueb_set_pfid_enable() failed. rc = %d.\n",
-				  rc);
+				  "qed_change_pci_hwfn failed. rc = %d.\n", rc);
 			rc2 = -EINVAL;
 		}
 	}
@@ -2534,8 +2502,9 @@ static void qed_hw_hwfn_prepare(struct qed_hwfn *p_hwfn)
 		       PGLUE_B_REG_PGL_ADDR_94_F0_BB, 0);
 	}
 
-	/* Clean previous pglue_b errors if such exist */
-	qed_pglueb_clear_err(p_hwfn, p_hwfn->p_main_ptt);
+	/* Clean Previous errors if such exist */
+	qed_wr(p_hwfn, p_hwfn->p_main_ptt,
+	       PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR, 1 << p_hwfn->abs_pf_id);
 
 	/* enable internal target-read */
 	qed_wr(p_hwfn, p_hwfn->p_main_ptt,
@@ -3471,7 +3440,6 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
 				 void __iomem *p_doorbells,
 				 enum qed_pci_personality personality)
 {
-	struct qed_dev *cdev = p_hwfn->cdev;
 	int rc = 0;
 
 	/* Split PCI bars evenly between hwfns */
@@ -3524,7 +3492,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
 	/* Sending a mailbox to the MFW should be done after qed_get_hw_info()
 	 * is called as it sets the ports number in an engine.
 	 */
-	if (IS_LEAD_HWFN(p_hwfn) && !cdev->recov_in_prog) {
+	if (IS_LEAD_HWFN(p_hwfn)) {
 		rc = qed_mcp_initiate_pf_flr(p_hwfn, p_hwfn->p_main_ptt);
 		if (rc)
 			DP_NOTICE(p_hwfn, "Failed to initiate PF FLR\n");
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index e4b4e3b78e8a..acccd85170aa 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -472,18 +472,6 @@ int qed_get_queue_coalesce(struct qed_hwfn *p_hwfn, u16 *coal, void *handle);
 int
 qed_set_queue_coalesce(u16 rx_coal, u16 tx_coal, void *p_handle);
 
-/**
- * @brief qed_pglueb_set_pfid_enable - Enable or disable PCI BUS MASTER
- *
- * @param p_hwfn
- * @param p_ptt
- * @param b_enable - true/false
- *
- * @return int
- */
-int qed_pglueb_set_pfid_enable(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt, bool b_enable);
-
 /**
  * @brief db_recovery_add - add doorbell information to the doorbell
  * recovery mechanism.
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 417121e74ee9..b13cfb449d8f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -12827,7 +12827,7 @@ enum MFW_DRV_MSG_TYPE {
 	MFW_DRV_MSG_LLDP_DATA_UPDATED,
 	MFW_DRV_MSG_DCBX_REMOTE_MIB_UPDATED,
 	MFW_DRV_MSG_DCBX_OPERATIONAL_MIB_UPDATED,
-	MFW_DRV_MSG_ERROR_RECOVERY,
+	MFW_DRV_MSG_RESERVED4,
 	MFW_DRV_MSG_BW_UPDATE,
 	MFW_DRV_MSG_S_TAG_UPDATE,
 	MFW_DRV_MSG_GET_LAN_STATS,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 72ec1c6bdf70..70504dcf4087 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -703,17 +703,6 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
 	int qed_status = 0;
 	u32 offset = 0;
 
-	if (p_hwfn->cdev->recov_in_prog) {
-		DP_VERBOSE(p_hwfn,
-			   NETIF_MSG_HW,
-			   "Recovery is in progress. Avoid DMAE transaction [{src: addr 0x%llx, type %d}, {dst: addr 0x%llx, type %d}, size %d].\n",
-			   src_addr, src_type, dst_addr, dst_type,
-			   size_in_dwords);
-
-		/* Let the flow complete w/o any error handling */
-		return 0;
-	}
-
 	qed_dmae_opcode(p_hwfn,
 			(src_type == QED_DMAE_ADDRESS_GRC),
 			(dst_type == QED_DMAE_ADDRESS_GRC),
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index e23980e301b6..92340919d852 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -255,114 +255,112 @@ out:
 #define PGLUE_ATTENTION_ICPL_VALID		(1 << 23)
 #define PGLUE_ATTENTION_ZLR_VALID		(1 << 25)
 #define PGLUE_ATTENTION_ILT_VALID		(1 << 23)
-
-int qed_pglueb_rbc_attn_handler(struct qed_hwfn *p_hwfn,
-				struct qed_ptt *p_ptt)
+static int qed_pglub_rbc_attn_cb(struct qed_hwfn *p_hwfn)
 {
 	u32 tmp;
 
-	tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_WR_DETAILS2);
+	tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		     PGLUE_B_REG_TX_ERR_WR_DETAILS2);
 	if (tmp & PGLUE_ATTENTION_VALID) {
 		u32 addr_lo, addr_hi, details;
 
-		addr_lo = qed_rd(p_hwfn, p_ptt,
+		addr_lo = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
 				 PGLUE_B_REG_TX_ERR_WR_ADD_31_0);
-		addr_hi = qed_rd(p_hwfn, p_ptt,
+		addr_hi = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
 				 PGLUE_B_REG_TX_ERR_WR_ADD_63_32);
-		details = qed_rd(p_hwfn, p_ptt,
+		details = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
 				 PGLUE_B_REG_TX_ERR_WR_DETAILS);
 
-		DP_NOTICE(p_hwfn,
-			  "Illegal write by chip to [%08x:%08x] blocked.\n"
-			  "Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n"
-			  "Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n",
-			  addr_hi, addr_lo, details,
-			  (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID),
-			  (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID),
-			  GET_FIELD(details,
-				    PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0,
-			  tmp,
-			  GET_FIELD(tmp,
-				    PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1 : 0,
-			  GET_FIELD(tmp,
-				    PGLUE_ATTENTION_DETAILS2_BME) ? 1 : 0,
-			  GET_FIELD(tmp,
-				    PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1 : 0);
+		DP_INFO(p_hwfn,
+			"Illegal write by chip to [%08x:%08x] blocked.\n"
+			"Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n"
+			"Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n",
+			addr_hi, addr_lo, details,
+			(u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID),
+			(u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID),
+			GET_FIELD(details,
+				  PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0,
+			tmp,
+			GET_FIELD(tmp,
+				  PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1 : 0,
+			GET_FIELD(tmp,
+				  PGLUE_ATTENTION_DETAILS2_BME) ? 1 : 0,
+			GET_FIELD(tmp,
+				  PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1 : 0);
 	}
 
-	tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_RD_DETAILS2);
+	tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		     PGLUE_B_REG_TX_ERR_RD_DETAILS2);
 	if (tmp & PGLUE_ATTENTION_RD_VALID) {
 		u32 addr_lo, addr_hi, details;
 
-		addr_lo = qed_rd(p_hwfn, p_ptt,
+		addr_lo = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
 				 PGLUE_B_REG_TX_ERR_RD_ADD_31_0);
-		addr_hi = qed_rd(p_hwfn, p_ptt,
+		addr_hi = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
 				 PGLUE_B_REG_TX_ERR_RD_ADD_63_32);
-		details = qed_rd(p_hwfn, p_ptt,
+		details = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
 				 PGLUE_B_REG_TX_ERR_RD_DETAILS);
 
-		DP_NOTICE(p_hwfn,
-			  "Illegal read by chip from [%08x:%08x] blocked.\n"
-			  "Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n"
-			  "Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n",
-			  addr_hi, addr_lo, details,
-			  (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID),
-			  (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID),
-			  GET_FIELD(details,
-				    PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0,
-			  tmp,
-			  GET_FIELD(tmp,
-				    PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1 : 0,
-			  GET_FIELD(tmp,
-				    PGLUE_ATTENTION_DETAILS2_BME) ? 1 : 0,
-			  GET_FIELD(tmp,
-				    PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1 : 0);
+		DP_INFO(p_hwfn,
+			"Illegal read by chip from [%08x:%08x] blocked.\n"
+			" Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n"
+			" Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n",
+			addr_hi, addr_lo, details,
+			(u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID),
+			(u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID),
+			GET_FIELD(details,
+				  PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0,
+			tmp,
+			GET_FIELD(tmp, PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1
+									 : 0,
+			GET_FIELD(tmp, PGLUE_ATTENTION_DETAILS2_BME) ? 1 : 0,
+			GET_FIELD(tmp, PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1
+									: 0);
 	}
 
-	tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_WR_DETAILS_ICPL);
+	tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		     PGLUE_B_REG_TX_ERR_WR_DETAILS_ICPL);
 	if (tmp & PGLUE_ATTENTION_ICPL_VALID)
-		DP_NOTICE(p_hwfn, "ICPL error - %08x\n", tmp);
+		DP_INFO(p_hwfn, "ICPL error - %08x\n", tmp);
 
-	tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_MASTER_ZLR_ERR_DETAILS);
+	tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		     PGLUE_B_REG_MASTER_ZLR_ERR_DETAILS);
 	if (tmp & PGLUE_ATTENTION_ZLR_VALID) {
 		u32 addr_hi, addr_lo;
 
-		addr_lo = qed_rd(p_hwfn, p_ptt,
+		addr_lo = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
 				 PGLUE_B_REG_MASTER_ZLR_ERR_ADD_31_0);
-		addr_hi = qed_rd(p_hwfn, p_ptt,
+		addr_hi = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
 				 PGLUE_B_REG_MASTER_ZLR_ERR_ADD_63_32);
 
-		DP_NOTICE(p_hwfn, "ZLR error - %08x [Address %08x:%08x]\n",
-			  tmp, addr_hi, addr_lo);
+		DP_INFO(p_hwfn, "ZLR eror - %08x [Address %08x:%08x]\n",
+			tmp, addr_hi, addr_lo);
 	}
 
-	tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_VF_ILT_ERR_DETAILS2);
+	tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		     PGLUE_B_REG_VF_ILT_ERR_DETAILS2);
 	if (tmp & PGLUE_ATTENTION_ILT_VALID) {
 		u32 addr_hi, addr_lo, details;
 
-		addr_lo = qed_rd(p_hwfn, p_ptt,
+		addr_lo = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
 				 PGLUE_B_REG_VF_ILT_ERR_ADD_31_0);
-		addr_hi = qed_rd(p_hwfn, p_ptt,
+		addr_hi = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
 				 PGLUE_B_REG_VF_ILT_ERR_ADD_63_32);
-		details = qed_rd(p_hwfn, p_ptt,
+		details = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
 				 PGLUE_B_REG_VF_ILT_ERR_DETAILS);
 
-		DP_NOTICE(p_hwfn,
-			  "ILT error - Details %08x Details2 %08x [Address %08x:%08x]\n",
-			  details, tmp, addr_hi, addr_lo);
+		DP_INFO(p_hwfn,
+			"ILT error - Details %08x Details2 %08x [Address %08x:%08x]\n",
+			details, tmp, addr_hi, addr_lo);
 	}
 
 	/* Clear the indications */
-	qed_wr(p_hwfn, p_ptt, PGLUE_B_REG_LATCHED_ERRORS_CLR, BIT(2));
+	qed_wr(p_hwfn, p_hwfn->p_dpc_ptt,
+	       PGLUE_B_REG_LATCHED_ERRORS_CLR, (1 << 2));
 
 	return 0;
 }
 
-static int qed_pglueb_rbc_attn_cb(struct qed_hwfn *p_hwfn)
-{
-	return qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_dpc_ptt);
-}
-
 #define QED_DORQ_ATTENTION_REASON_MASK  (0xfffff)
 #define QED_DORQ_ATTENTION_OPAQUE_MASK  (0xffff)
 #define QED_DORQ_ATTENTION_OPAQUE_SHIFT (0x0)
@@ -542,7 +540,7 @@ static struct aeu_invert_reg aeu_descs[NUM_ATTN_REGS] = {
 			{"PGLUE misc_flr", ATTENTION_SINGLE,
 			 NULL, MAX_BLOCK_ID},
 			{"PGLUE B RBC", ATTENTION_PAR_INT,
-			 qed_pglueb_rbc_attn_cb, BLOCK_PGLUE_B},
+			 qed_pglub_rbc_attn_cb, BLOCK_PGLUE_B},
 			{"PGLUE misc_mctp", ATTENTION_SINGLE,
 			 NULL, MAX_BLOCK_ID},
 			{"Flash event", ATTENTION_SINGLE, NULL, MAX_BLOCK_ID},
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
index 1f356ed4f761..d81a62ebd524 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
@@ -431,7 +431,4 @@ int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 
 #define QED_MAPPING_MEMORY_SIZE(dev)	(NUM_OF_SBS(dev))
 
-int qed_pglueb_rbc_attn_handler(struct qed_hwfn *p_hwfn,
-				struct qed_ptt *p_ptt);
-
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index b47352643fb5..6adf5bda9811 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -359,8 +359,6 @@ static struct qed_dev *qed_probe(struct pci_dev *pdev,
 
 	qed_init_dp(cdev, params->dp_module, params->dp_level);
 
-	cdev->recov_in_prog = params->recov_in_prog;
-
 	rc = qed_init_pci(cdev, pdev);
 	if (rc) {
 		DP_ERR(cdev, "init pci failed\n");
@@ -2205,15 +2203,6 @@ static int qed_nvm_get_image(struct qed_dev *cdev, enum qed_nvm_images type,
 	return qed_mcp_get_nvm_image(hwfn, type, buf, len);
 }
 
-void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn)
-{
-	struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common;
-	void *cookie = p_hwfn->cdev->ops_cookie;
-
-	if (ops && ops->schedule_recovery_handler)
-		ops->schedule_recovery_handler(cookie);
-}
-
 static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
 			    void *handle)
 {
@@ -2237,23 +2226,6 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode)
 	return status;
 }
 
-static int qed_recovery_process(struct qed_dev *cdev)
-{
-	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
-	struct qed_ptt *p_ptt;
-	int rc = 0;
-
-	p_ptt = qed_ptt_acquire(p_hwfn);
-	if (!p_ptt)
-		return -EAGAIN;
-
-	rc = qed_start_recovery_process(p_hwfn, p_ptt);
-
-	qed_ptt_release(p_hwfn, p_ptt);
-
-	return rc;
-}
-
 static int qed_update_wol(struct qed_dev *cdev, bool enabled)
 {
 	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
@@ -2408,8 +2380,6 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.nvm_get_image = &qed_nvm_get_image,
 	.set_coalesce = &qed_set_coalesce,
 	.set_led = &qed_set_led,
-	.recovery_process = &qed_recovery_process,
-	.recovery_prolog = &qed_recovery_prolog,
 	.update_drv_state = &qed_update_drv_state,
 	.update_mac = &qed_update_mac,
 	.update_mtu = &qed_update_mtu,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index bb8541847aa5..e7f18e34ff0d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1070,27 +1070,6 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
-int qed_mcp_load_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
-{
-	u32 resp = 0, param = 0;
-	int rc;
-
-	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_LOAD_DONE, 0, &resp,
-			 &param);
-	if (rc) {
-		DP_NOTICE(p_hwfn,
-			  "Failed to send a LOAD_DONE command, rc = %d\n", rc);
-		return rc;
-	}
-
-	/* Check if there is a DID mismatch between nvm-cfg/efuse */
-	if (param & FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR)
-		DP_NOTICE(p_hwfn,
-			  "warning: device configuration is not supported on this board type. The device may not function as expected.\n");
-
-	return 0;
-}
-
 int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct qed_mcp_mb_params mb_params;
@@ -1549,60 +1528,6 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_up)
 	return 0;
 }
 
-u32 qed_get_process_kill_counter(struct qed_hwfn *p_hwfn,
-				 struct qed_ptt *p_ptt)
-{
-	u32 path_offsize_addr, path_offsize, path_addr, proc_kill_cnt;
-
-	if (IS_VF(p_hwfn->cdev))
-		return -EINVAL;
-
-	path_offsize_addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
-						 PUBLIC_PATH);
-	path_offsize = qed_rd(p_hwfn, p_ptt, path_offsize_addr);
-	path_addr = SECTION_ADDR(path_offsize, QED_PATH_ID(p_hwfn));
-
-	proc_kill_cnt = qed_rd(p_hwfn, p_ptt,
-			       path_addr +
-			       offsetof(struct public_path, process_kill)) &
-			PROCESS_KILL_COUNTER_MASK;
-
-	return proc_kill_cnt;
-}
-
-static void qed_mcp_handle_process_kill(struct qed_hwfn *p_hwfn,
-					struct qed_ptt *p_ptt)
-{
-	struct qed_dev *cdev = p_hwfn->cdev;
-	u32 proc_kill_cnt;
-
-	/* Prevent possible attentions/interrupts during the recovery handling
-	 * and till its load phase, during which they will be re-enabled.
-	 */
-	qed_int_igu_disable_int(p_hwfn, p_ptt);
-
-	DP_NOTICE(p_hwfn, "Received a process kill indication\n");
-
-	/* The following operations should be done once, and thus in CMT mode
-	 * are carried out by only the first HW function.
-	 */
-	if (p_hwfn != QED_LEADING_HWFN(cdev))
-		return;
-
-	if (cdev->recov_in_prog) {
-		DP_NOTICE(p_hwfn,
-			  "Ignoring the indication since a recovery process is already in progress\n");
-		return;
-	}
-
-	cdev->recov_in_prog = true;
-
-	proc_kill_cnt = qed_get_process_kill_counter(p_hwfn, p_ptt);
-	DP_NOTICE(p_hwfn, "Process kill counter: %d\n", proc_kill_cnt);
-
-	qed_schedule_recovery_handler(p_hwfn);
-}
-
 static void qed_mcp_send_protocol_stats(struct qed_hwfn *p_hwfn,
 					struct qed_ptt *p_ptt,
 					enum MFW_DRV_MSG_TYPE type)
@@ -1833,9 +1758,6 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
 		case MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE:
 			qed_mcp_handle_transceiver_change(p_hwfn, p_ptt);
 			break;
-		case MFW_DRV_MSG_ERROR_RECOVERY:
-			qed_mcp_handle_process_kill(p_hwfn, p_ptt);
-			break;
 		case MFW_DRV_MSG_GET_LAN_STATS:
 		case MFW_DRV_MSG_GET_FCOE_STATS:
 		case MFW_DRV_MSG_GET_ISCSI_STATS:
@@ -2381,43 +2303,6 @@ int qed_mcp_get_flash_size(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
-int qed_start_recovery_process(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
-{
-	struct qed_dev *cdev = p_hwfn->cdev;
-
-	if (cdev->recov_in_prog) {
-		DP_NOTICE(p_hwfn,
-			  "Avoid triggering a recovery since such a process is already in progress\n");
-		return -EAGAIN;
-	}
-
-	DP_NOTICE(p_hwfn, "Triggering a recovery process\n");
-	qed_wr(p_hwfn, p_ptt, MISC_REG_AEU_GENERAL_ATTN_35, 0x1);
-
-	return 0;
-}
-
-#define QED_RECOVERY_PROLOG_SLEEP_MS    100
-
-int qed_recovery_prolog(struct qed_dev *cdev)
-{
-	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
-	struct qed_ptt *p_ptt = p_hwfn->p_main_ptt;
-	int rc;
-
-	/* Allow ongoing PCIe transactions to complete */
-	msleep(QED_RECOVERY_PROLOG_SLEEP_MS);
-
-	/* Clear the PF's internal FID_enable in the PXP */
-	rc = qed_pglueb_set_pfid_enable(p_hwfn, p_ptt, false);
-	if (rc)
-		DP_NOTICE(p_hwfn,
-			  "qed_pglueb_set_pfid_enable() failed. rc = %d.\n",
-			  rc);
-
-	return rc;
-}
-
 static int
 qed_mcp_config_vf_msix_bb(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt, u8 vf_id, u8 num)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 6e1d72a669ae..eddf67798d6f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -440,38 +440,6 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
 			 struct qed_ptt *p_ptt,
 			 struct qed_mcp_drv_version *p_ver);
 
-/**
- * @brief Read the MFW process kill counter
- *
- * @param p_hwfn
- * @param p_ptt
- *
- * @return u32
- */
-u32 qed_get_process_kill_counter(struct qed_hwfn *p_hwfn,
-				 struct qed_ptt *p_ptt);
-
-/**
- * @brief Trigger a recovery process
- *
- *  @param p_hwfn
- *  @param p_ptt
- *
- * @return int
- */
-int qed_start_recovery_process(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
-
-/**
- * @brief A recovery handler must call this function as its first step.
- *        It is assumed that the handler is not run from an interrupt context.
- *
- *  @param cdev
- *  @param p_ptt
- *
- * @return int
- */
-int qed_recovery_prolog(struct qed_dev *cdev);
-
 /**
  * @brief Notify MFW about the change in base device properties
  *
@@ -832,16 +800,6 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
 		     struct qed_ptt *p_ptt,
 		     struct qed_load_req_params *p_params);
 
-/**
- * @brief Sends a LOAD_DONE message to the MFW
- *
- * @param p_hwfn
- * @param p_ptt
- *
- * @return int - 0 - Operation was successful.
- */
-int qed_mcp_load_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
-
 /**
  * @brief Sends a UNLOAD_REQ message to the MFW
  *
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index 5ce825ca5f24..8939ed6e08b7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -518,8 +518,6 @@
 	0x180824UL
 #define  MISC_REG_AEU_GENERAL_ATTN_0 \
 	0x008400UL
-#define MISC_REG_AEU_GENERAL_ATTN_35 \
-	0x00848cUL
 #define  CAU_REG_SB_ADDR_MEMORY \
 	0x1c8000UL
 #define  CAU_REG_SB_VAR_MEMORY \
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index 3e0f7c46bb1b..eb88bbc6b193 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -790,17 +790,6 @@ static int qed_spq_pend_post(struct qed_hwfn *p_hwfn)
 				 SPQ_HIGH_PRI_RESERVE_DEFAULT);
 }
 
-static void qed_spq_recov_set_ret_code(struct qed_spq_entry *p_ent,
-				       u8 *fw_return_code)
-{
-	if (!fw_return_code)
-		return;
-
-	if (p_ent->elem.hdr.protocol_id == PROTOCOLID_ROCE ||
-	    p_ent->elem.hdr.protocol_id == PROTOCOLID_IWARP)
-		*fw_return_code = RDMA_RETURN_OK;
-}
-
 /* Avoid overriding of SPQ entries when getting out-of-order completions, by
  * marking the completions in a bitmap and increasing the chain consumer only
  * for the first successive completed entries.
@@ -836,17 +825,6 @@ int qed_spq_post(struct qed_hwfn *p_hwfn,
 		return -EINVAL;
 	}
 
-	if (p_hwfn->cdev->recov_in_prog) {
-		DP_VERBOSE(p_hwfn,
-			   QED_MSG_SPQ,
-			   "Recovery is in progress. Skip spq post [cmd %02x protocol %02x]\n",
-			   p_ent->elem.hdr.cmd_id, p_ent->elem.hdr.protocol_id);
-
-		/* Let the flow complete w/o any error handling */
-		qed_spq_recov_set_ret_code(p_ent, fw_return_code);
-		return 0;
-	}
-
 	/* Complete the entry */
 	rc = qed_spq_fill_entry(p_hwfn, p_ent);
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 71e28be58102..ca6290fa0f30 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -4447,13 +4447,6 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled)
 	if (cdev->p_iov_info && cdev->p_iov_info->num_vfs && pci_enabled)
 		pci_disable_sriov(cdev->pdev);
 
-	if (cdev->recov_in_prog) {
-		DP_VERBOSE(cdev,
-			   QED_MSG_IOV,
-			   "Skip SRIOV disable operations in the device since a recovery is in progress\n");
-		goto out;
-	}
-
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *hwfn = &cdev->hwfns[i];
 		struct qed_ptt *ptt = qed_ptt_acquire(hwfn);
@@ -4493,7 +4486,7 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled)
 
 		qed_ptt_release(hwfn, ptt);
 	}
-out:
+
 	qed_iov_set_vfs_to_disable(cdev, false);
 
 	return 0;
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 843416404aeb..613249d1e967 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -162,7 +162,6 @@ struct qede_rdma_dev {
 	struct list_head entry;
 	struct list_head rdma_event_list;
 	struct workqueue_struct *rdma_wq;
-	bool exp_recovery;
 };
 
 struct qede_ptp;
@@ -265,7 +264,6 @@ struct qede_dev {
 enum QEDE_STATE {
 	QEDE_STATE_CLOSED,
 	QEDE_STATE_OPEN,
-	QEDE_STATE_RECOVERY,
 };
 
 #define HILO_U64(hi, lo)		((((u64)(hi)) << 32) + (lo))
@@ -464,7 +462,6 @@ struct qede_fastpath {
 #define QEDE_CSUM_UNNECESSARY		BIT(1)
 #define QEDE_TUNN_CSUM_UNNECESSARY	BIT(2)
 
-#define QEDE_SP_RECOVERY		0
 #define QEDE_SP_RX_MODE			1
 
 #ifdef CONFIG_RFS_ACCEL
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index de955f2b2980..5a74fcbdbc2b 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -133,12 +133,23 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id);
 static void qede_remove(struct pci_dev *pdev);
 static void qede_shutdown(struct pci_dev *pdev);
 static void qede_link_update(void *dev, struct qed_link_output *link);
-static void qede_schedule_recovery_handler(void *dev);
-static void qede_recovery_handler(struct qede_dev *edev);
 static void qede_get_eth_tlv_data(void *edev, void *data);
 static void qede_get_generic_tlv_data(void *edev,
 				      struct qed_generic_tlvs *data);
 
+/* The qede lock is used to protect driver state change and driver flows that
+ * are not reentrant.
+ */
+void __qede_lock(struct qede_dev *edev)
+{
+	mutex_lock(&edev->qede_lock);
+}
+
+void __qede_unlock(struct qede_dev *edev)
+{
+	mutex_unlock(&edev->qede_lock);
+}
+
 #ifdef CONFIG_QED_SRIOV
 static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
 			    __be16 vlan_proto)
@@ -220,7 +231,6 @@ static struct qed_eth_cb_ops qede_ll_ops = {
 		.arfs_filter_op = qede_arfs_filter_op,
 #endif
 		.link_update = qede_link_update,
-		.schedule_recovery_handler = qede_schedule_recovery_handler,
 		.get_generic_tlv_data = qede_get_generic_tlv_data,
 		.get_protocol_tlv_data = qede_get_eth_tlv_data,
 	},
@@ -940,57 +950,11 @@ err:
 	return -ENOMEM;
 }
 
-/* The qede lock is used to protect driver state change and driver flows that
- * are not reentrant.
- */
-void __qede_lock(struct qede_dev *edev)
-{
-	mutex_lock(&edev->qede_lock);
-}
-
-void __qede_unlock(struct qede_dev *edev)
-{
-	mutex_unlock(&edev->qede_lock);
-}
-
-/* This version of the lock should be used when acquiring the RTNL lock is also
- * needed in addition to the internal qede lock.
- */
-void qede_lock(struct qede_dev *edev)
-{
-	rtnl_lock();
-	__qede_lock(edev);
-}
-
-void qede_unlock(struct qede_dev *edev)
-{
-	__qede_unlock(edev);
-	rtnl_unlock();
-}
-
 static void qede_sp_task(struct work_struct *work)
 {
 	struct qede_dev *edev = container_of(work, struct qede_dev,
 					     sp_task.work);
 
-	/* The locking scheme depends on the specific flag:
-	 * In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to
-	 * ensure that ongoing flows are ended and new ones are not started.
-	 * In other cases - only the internal qede lock should be acquired.
-	 */
-
-	if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) {
-#ifdef CONFIG_QED_SRIOV
-		/* SRIOV must be disabled outside the lock to avoid a deadlock.
-		 * The recovery of the active VFs is currently not supported.
-		 */
-		qede_sriov_configure(edev->pdev, 0);
-#endif
-		qede_lock(edev);
-		qede_recovery_handler(edev);
-		qede_unlock(edev);
-	}
-
 	__qede_lock(edev);
 
 	if (test_and_clear_bit(QEDE_SP_RX_MODE, &edev->sp_flags))
@@ -1067,13 +1031,8 @@ static void qede_log_probe(struct qede_dev *edev)
 
 enum qede_probe_mode {
 	QEDE_PROBE_NORMAL,
-	QEDE_PROBE_RECOVERY,
 };
 
-#define QEDE_RDMA_PROBE_MODE(mode) \
-	((mode) == QEDE_PROBE_NORMAL ? QEDE_RDMA_PROBE_NORMAL \
-				     : QEDE_RDMA_PROBE_RECOVERY)
-
 static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 			bool is_vf, enum qede_probe_mode mode)
 {
@@ -1092,7 +1051,6 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 	probe_params.dp_module = dp_module;
 	probe_params.dp_level = dp_level;
 	probe_params.is_vf = is_vf;
-	probe_params.recov_in_prog = (mode == QEDE_PROBE_RECOVERY);
 	cdev = qed_ops->common->probe(pdev, &probe_params);
 	if (!cdev) {
 		rc = -ENODEV;
@@ -1120,20 +1078,11 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 	if (rc)
 		goto err2;
 
-	if (mode != QEDE_PROBE_RECOVERY) {
-		edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module,
-					   dp_level);
-		if (!edev) {
-			rc = -ENOMEM;
-			goto err2;
-		}
-	} else {
-		struct net_device *ndev = pci_get_drvdata(pdev);
-
-		edev = netdev_priv(ndev);
-		edev->cdev = cdev;
-		memset(&edev->stats, 0, sizeof(edev->stats));
-		memcpy(&edev->dev_info, &dev_info, sizeof(dev_info));
+	edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module,
+				   dp_level);
+	if (!edev) {
+		rc = -ENOMEM;
+		goto err2;
 	}
 
 	if (is_vf)
@@ -1141,31 +1090,28 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 
 	qede_init_ndev(edev);
 
-	rc = qede_rdma_dev_add(edev, QEDE_RDMA_PROBE_MODE(mode));
+	rc = qede_rdma_dev_add(edev);
 	if (rc)
 		goto err3;
 
-	if (mode != QEDE_PROBE_RECOVERY) {
-		/* Prepare the lock prior to the registration of the netdev,
-		 * as once it's registered we might reach flows requiring it
-		 * [it's even possible to reach a flow needing it directly
-		 * from there, although it's unlikely].
-		 */
-		INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
-		mutex_init(&edev->qede_lock);
-
-		rc = register_netdev(edev->ndev);
-		if (rc) {
-			DP_NOTICE(edev, "Cannot register net-device\n");
-			goto err4;
-		}
+	/* Prepare the lock prior to the registration of the netdev,
+	 * as once it's registered we might reach flows requiring it
+	 * [it's even possible to reach a flow needing it directly
+	 * from there, although it's unlikely].
+	 */
+	INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
+	mutex_init(&edev->qede_lock);
+	rc = register_netdev(edev->ndev);
+	if (rc) {
+		DP_NOTICE(edev, "Cannot register net-device\n");
+		goto err4;
 	}
 
 	edev->ops->common->set_name(cdev, edev->ndev->name);
 
 	/* PTP not supported on VFs */
 	if (!is_vf)
-		qede_ptp_enable(edev, (mode == QEDE_PROBE_NORMAL));
+		qede_ptp_enable(edev, true);
 
 	edev->ops->register_ops(cdev, &qede_ll_ops, edev);
 
@@ -1180,7 +1126,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 	return 0;
 
 err4:
-	qede_rdma_dev_remove(edev, QEDE_RDMA_PROBE_MODE(mode));
+	qede_rdma_dev_remove(edev);
 err3:
 	free_netdev(edev->ndev);
 err2:
@@ -1216,13 +1162,8 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 enum qede_remove_mode {
 	QEDE_REMOVE_NORMAL,
-	QEDE_REMOVE_RECOVERY,
 };
 
-#define QEDE_RDMA_REMOVE_MODE(mode) \
-	((mode) == QEDE_REMOVE_NORMAL ? QEDE_RDMA_REMOVE_NORMAL \
-			      : QEDE_RDMA_REMOVE_RECOVERY)
-
 static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 {
 	struct net_device *ndev = pci_get_drvdata(pdev);
@@ -1231,19 +1172,15 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 
 	DP_INFO(edev, "Starting qede_remove\n");
 
-	qede_rdma_dev_remove(edev, QEDE_RDMA_REMOVE_MODE(mode));
-
-	if (mode != QEDE_REMOVE_RECOVERY) {
-		unregister_netdev(ndev);
+	qede_rdma_dev_remove(edev);
+	unregister_netdev(ndev);
+	cancel_delayed_work_sync(&edev->sp_task);
 
-		cancel_delayed_work_sync(&edev->sp_task);
+	qede_ptp_disable(edev);
 
-		edev->ops->common->set_power_state(cdev, PCI_D0);
+	edev->ops->common->set_power_state(cdev, PCI_D0);
 
-		pci_set_drvdata(pdev, NULL);
-	}
-
-	qede_ptp_disable(edev);
+	pci_set_drvdata(pdev, NULL);
 
 	/* Use global ops since we've freed edev */
 	qed_ops->common->slowpath_stop(cdev);
@@ -1257,8 +1194,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 	 * [e.g., QED register callbacks] won't break anything when
 	 * accessing the netdevice.
 	 */
-	if (mode != QEDE_REMOVE_RECOVERY)
-		free_netdev(ndev);
+	 free_netdev(ndev);
 
 	dev_info(&pdev->dev, "Ending qede_remove successfully\n");
 }
@@ -1603,58 +1539,6 @@ static int qede_alloc_mem_load(struct qede_dev *edev)
 	return 0;
 }
 
-static void qede_empty_tx_queue(struct qede_dev *edev,
-				struct qede_tx_queue *txq)
-{
-	unsigned int pkts_compl = 0, bytes_compl = 0;
-	struct netdev_queue *netdev_txq;
-	int rc, len = 0;
-
-	netdev_txq = netdev_get_tx_queue(edev->ndev, txq->ndev_txq_id);
-
-	while (qed_chain_get_cons_idx(&txq->tx_pbl) !=
-	       qed_chain_get_prod_idx(&txq->tx_pbl)) {
-		DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
-			   "Freeing a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
-			   txq->index, qed_chain_get_cons_idx(&txq->tx_pbl),
-			   qed_chain_get_prod_idx(&txq->tx_pbl));
-
-		rc = qede_free_tx_pkt(edev, txq, &len);
-		if (rc) {
-			DP_NOTICE(edev,
-				  "Failed to free a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
-				  txq->index,
-				  qed_chain_get_cons_idx(&txq->tx_pbl),
-				  qed_chain_get_prod_idx(&txq->tx_pbl));
-			break;
-		}
-
-		bytes_compl += len;
-		pkts_compl++;
-		txq->sw_tx_cons++;
-	}
-
-	netdev_tx_completed_queue(netdev_txq, pkts_compl, bytes_compl);
-}
-
-static void qede_empty_tx_queues(struct qede_dev *edev)
-{
-	int i;
-
-	for_each_queue(i)
-		if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
-			int cos;
-
-			for_each_cos_in_txq(edev, cos) {
-				struct qede_fastpath *fp;
-
-				fp = &edev->fp_array[i];
-				qede_empty_tx_queue(edev,
-						    &fp->txq[cos]);
-			}
-		}
-}
-
 /* This function inits fp content and resets the SB, RXQ and TXQ structures */
 static void qede_init_fp(struct qede_dev *edev)
 {
@@ -2169,7 +2053,6 @@ out:
 
 enum qede_unload_mode {
 	QEDE_UNLOAD_NORMAL,
-	QEDE_UNLOAD_RECOVERY,
 };
 
 static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
@@ -2185,8 +2068,7 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
 
 	clear_bit(QEDE_FLAGS_LINK_REQUESTED, &edev->flags);
 
-	if (mode != QEDE_UNLOAD_RECOVERY)
-		edev->state = QEDE_STATE_CLOSED;
+	edev->state = QEDE_STATE_CLOSED;
 
 	qede_rdma_dev_event_close(edev);
 
@@ -2194,21 +2076,18 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
 	netif_tx_disable(edev->ndev);
 	netif_carrier_off(edev->ndev);
 
-	if (mode != QEDE_UNLOAD_RECOVERY) {
-		/* Reset the link */
-		memset(&link_params, 0, sizeof(link_params));
-		link_params.link_up = false;
-		edev->ops->common->set_link(edev->cdev, &link_params);
-
-		rc = qede_stop_queues(edev);
-		if (rc) {
-			qede_sync_free_irqs(edev);
-			goto out;
-		}
-
-		DP_INFO(edev, "Stopped Queues\n");
+	/* Reset the link */
+	memset(&link_params, 0, sizeof(link_params));
+	link_params.link_up = false;
+	edev->ops->common->set_link(edev->cdev, &link_params);
+	rc = qede_stop_queues(edev);
+	if (rc) {
+		qede_sync_free_irqs(edev);
+		goto out;
 	}
 
+	DP_INFO(edev, "Stopped Queues\n");
+
 	qede_vlan_mark_nonconfigured(edev);
 	edev->ops->fastpath_stop(edev->cdev);
 
@@ -2223,26 +2102,18 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
 
 	qede_napi_disable_remove(edev);
 
-	if (mode == QEDE_UNLOAD_RECOVERY)
-		qede_empty_tx_queues(edev);
-
 	qede_free_mem_load(edev);
 	qede_free_fp_array(edev);
 
 out:
 	if (!is_locked)
 		__qede_unlock(edev);
-
-	if (mode != QEDE_UNLOAD_RECOVERY)
-		DP_NOTICE(edev, "Link is down\n");
-
 	DP_INFO(edev, "Ending qede unload\n");
 }
 
 enum qede_load_mode {
 	QEDE_LOAD_NORMAL,
 	QEDE_LOAD_RELOAD,
-	QEDE_LOAD_RECOVERY,
 };
 
 static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
@@ -2422,77 +2293,6 @@ static void qede_link_update(void *dev, struct qed_link_output *link)
 	}
 }
 
-static void qede_schedule_recovery_handler(void *dev)
-{
-	struct qede_dev *edev = dev;
-
-	if (edev->state == QEDE_STATE_RECOVERY) {
-		DP_NOTICE(edev,
-			  "Avoid scheduling a recovery handling since already in recovery state\n");
-		return;
-	}
-
-	set_bit(QEDE_SP_RECOVERY, &edev->sp_flags);
-	schedule_delayed_work(&edev->sp_task, 0);
-
-	DP_INFO(edev, "Scheduled a recovery handler\n");
-}
-
-static void qede_recovery_failed(struct qede_dev *edev)
-{
-	netdev_err(edev->ndev, "Recovery handling has failed. Power cycle is needed.\n");
-
-	netif_device_detach(edev->ndev);
-
-	if (edev->cdev)
-		edev->ops->common->set_power_state(edev->cdev, PCI_D3hot);
-}
-
-static void qede_recovery_handler(struct qede_dev *edev)
-{
-	u32 curr_state = edev->state;
-	int rc;
-
-	DP_NOTICE(edev, "Starting a recovery process\n");
-
-	/* No need to acquire first the qede_lock since is done by qede_sp_task
-	 * before calling this function.
-	 */
-	edev->state = QEDE_STATE_RECOVERY;
-
-	edev->ops->common->recovery_prolog(edev->cdev);
-
-	if (curr_state == QEDE_STATE_OPEN)
-		qede_unload(edev, QEDE_UNLOAD_RECOVERY, true);
-
-	__qede_remove(edev->pdev, QEDE_REMOVE_RECOVERY);
-
-	rc = __qede_probe(edev->pdev, edev->dp_module, edev->dp_level,
-			  IS_VF(edev), QEDE_PROBE_RECOVERY);
-	if (rc) {
-		edev->cdev = NULL;
-		goto err;
-	}
-
-	if (curr_state == QEDE_STATE_OPEN) {
-		rc = qede_load(edev, QEDE_LOAD_RECOVERY, true);
-		if (rc)
-			goto err;
-
-		qede_config_rx_mode(edev->ndev);
-		udp_tunnel_get_rx_info(edev->ndev);
-	}
-
-	edev->state = curr_state;
-
-	DP_NOTICE(edev, "Recovery handling is done\n");
-
-	return;
-
-err:
-	qede_recovery_failed(edev);
-}
-
 static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
 	struct netdev_queue *netdev_txq;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
index 9668e5e47d5f..1900bf7e67d1 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
@@ -50,8 +50,6 @@ static void _qede_rdma_dev_add(struct qede_dev *edev)
 	if (!qedr_drv)
 		return;
 
-	/* Leftovers from previous error recovery */
-	edev->rdma_info.exp_recovery = false;
 	edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev,
 						 edev->ndev);
 }
@@ -89,26 +87,21 @@ static void qede_rdma_destroy_wq(struct qede_dev *edev)
 	destroy_workqueue(edev->rdma_info.rdma_wq);
 }
 
-int qede_rdma_dev_add(struct qede_dev *edev, enum qede_rdma_probe_mode mode)
+int qede_rdma_dev_add(struct qede_dev *edev)
 {
-	int rc;
+	int rc = 0;
 
-	if (!qede_rdma_supported(edev))
-		return 0;
-
-	/* Cannot start qedr while recovering since it wasn't fully stopped */
-	if (mode == QEDE_RDMA_PROBE_RECOVERY)
-		return 0;
-
-	rc = qede_rdma_create_wq(edev);
-	if (rc)
-		return rc;
+	if (qede_rdma_supported(edev)) {
+		rc = qede_rdma_create_wq(edev);
+		if (rc)
+			return rc;
 
-	INIT_LIST_HEAD(&edev->rdma_info.entry);
-	mutex_lock(&qedr_dev_list_lock);
-	list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
-	_qede_rdma_dev_add(edev);
-	mutex_unlock(&qedr_dev_list_lock);
+		INIT_LIST_HEAD(&edev->rdma_info.entry);
+		mutex_lock(&qedr_dev_list_lock);
+		list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
+		_qede_rdma_dev_add(edev);
+		mutex_unlock(&qedr_dev_list_lock);
+	}
 
 	return rc;
 }
@@ -117,31 +110,19 @@ static void _qede_rdma_dev_remove(struct qede_dev *edev)
 {
 	if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev)
 		qedr_drv->remove(edev->rdma_info.qedr_dev);
+	edev->rdma_info.qedr_dev = NULL;
 }
 
-void qede_rdma_dev_remove(struct qede_dev *edev,
-			  enum qede_rdma_remove_mode mode)
+void qede_rdma_dev_remove(struct qede_dev *edev)
 {
 	if (!qede_rdma_supported(edev))
 		return;
 
-	/* Cannot remove qedr while recovering since it wasn't fully stopped */
-	if (mode == QEDE_RDMA_REMOVE_NORMAL) {
-		qede_rdma_destroy_wq(edev);
-		mutex_lock(&qedr_dev_list_lock);
-		if (!edev->rdma_info.exp_recovery)
-			_qede_rdma_dev_remove(edev);
-		edev->rdma_info.qedr_dev = NULL;
-		list_del(&edev->rdma_info.entry);
-		mutex_unlock(&qedr_dev_list_lock);
-	} else {
-		if (!edev->rdma_info.exp_recovery) {
-			mutex_lock(&qedr_dev_list_lock);
-			_qede_rdma_dev_remove(edev);
-			mutex_unlock(&qedr_dev_list_lock);
-		}
-		edev->rdma_info.exp_recovery = true;
-	}
+	qede_rdma_destroy_wq(edev);
+	mutex_lock(&qedr_dev_list_lock);
+	_qede_rdma_dev_remove(edev);
+	list_del(&edev->rdma_info.entry);
+	mutex_unlock(&qedr_dev_list_lock);
 }
 
 static void _qede_rdma_dev_open(struct qede_dev *edev)
@@ -223,8 +204,7 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv)
 
 	mutex_lock(&qedr_dev_list_lock);
 	list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
-		/* If device has experienced recovery it was already removed */
-		if (edev->rdma_info.qedr_dev && !edev->rdma_info.exp_recovery)
+		if (edev->rdma_info.qedr_dev)
 			_qede_rdma_dev_remove(edev);
 	}
 	qedr_drv = NULL;
@@ -304,10 +284,6 @@ static void qede_rdma_add_event(struct qede_dev *edev,
 {
 	struct qede_rdma_event_work *event_node;
 
-	/* If a recovery was experienced avoid adding the event */
-	if (edev->rdma_info.exp_recovery)
-		return;
-
 	if (!edev->rdma_info.qedr_dev)
 		return;
 
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index c2a1b7dbe4eb..91c536a01b56 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -764,7 +764,6 @@ struct qed_probe_params {
 	u32 dp_module;
 	u8 dp_level;
 	bool is_vf;
-	bool recov_in_prog;
 };
 
 #define QED_DRV_VER_STR_SIZE 12
@@ -811,7 +810,6 @@ struct qed_common_cb_ops {
 	void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc);
 	void	(*link_update)(void			*dev,
 			       struct qed_link_output	*link);
-	void (*schedule_recovery_handler)(void *dev);
 	void	(*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
 	void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data);
 	void (*get_protocol_tlv_data)(void *dev, void *data);
@@ -1059,24 +1057,6 @@ struct qed_common_ops {
 	int (*db_recovery_del)(struct qed_dev *cdev,
 			       void __iomem *db_addr, void *db_data);
 
-/**
- * @brief recovery_process - Trigger a recovery process
- *
- * @param cdev
- *
- * @return 0 on success, error otherwise.
- */
-	int (*recovery_process)(struct qed_dev *cdev);
-
-/**
- * @brief recovery_prolog - Execute the prolog operations of a recovery process
- *
- * @param cdev
- *
- * @return 0 on success, error otherwise.
- */
-	int (*recovery_prolog)(struct qed_dev *cdev);
-
 /**
  * @brief update_drv_state - API to inform the change in the driver state.
  *
diff --git a/include/linux/qed/qede_rdma.h b/include/linux/qed/qede_rdma.h
index e29d7199c10e..9904617a9730 100644
--- a/include/linux/qed/qede_rdma.h
+++ b/include/linux/qed/qede_rdma.h
@@ -55,16 +55,6 @@ struct qede_rdma_event_work {
 	enum qede_rdma_event event;
 };
 
-enum qede_rdma_probe_mode {
-	QEDE_RDMA_PROBE_NORMAL,
-	QEDE_RDMA_PROBE_RECOVERY,
-};
-
-enum qede_rdma_remove_mode {
-	QEDE_RDMA_REMOVE_NORMAL,
-	QEDE_RDMA_REMOVE_RECOVERY,
-};
-
 struct qedr_driver {
 	unsigned char name[32];
 
@@ -84,24 +74,21 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv);
 bool qede_rdma_supported(struct qede_dev *dev);
 
 #if IS_ENABLED(CONFIG_QED_RDMA)
-int qede_rdma_dev_add(struct qede_dev *dev, enum qede_rdma_probe_mode mode);
+int qede_rdma_dev_add(struct qede_dev *dev);
 void qede_rdma_dev_event_open(struct qede_dev *dev);
 void qede_rdma_dev_event_close(struct qede_dev *dev);
-void qede_rdma_dev_remove(struct qede_dev *dev,
-			  enum qede_rdma_remove_mode mode);
+void qede_rdma_dev_remove(struct qede_dev *dev);
 void qede_rdma_event_changeaddr(struct qede_dev *edr);
 
 #else
-static inline int qede_rdma_dev_add(struct qede_dev *dev,
-				    enum qede_rdma_probe_mode mode)
+static inline int qede_rdma_dev_add(struct qede_dev *dev)
 {
 	return 0;
 }
 
 static inline void qede_rdma_dev_event_open(struct qede_dev *dev) {}
 static inline void qede_rdma_dev_event_close(struct qede_dev *dev) {}
-static inline void qede_rdma_dev_remove(struct qede_dev *dev,
-					enum qede_rdma_remove_mode mode) {}
+static inline void qede_rdma_dev_remove(struct qede_dev *dev) {}
 static inline void qede_rdma_event_changeaddr(struct qede_dev *edr) {}
 #endif
 #endif
-- 
cgit v1.2.3-71-gd317


From 13054abbaa4f1fd4e6f3b4b63439ec033b4c8035 Mon Sep 17 00:00:00 2001
From: Vladis Dronov <vdronov@redhat.com>
Date: Tue, 29 Jan 2019 11:58:35 +0100
Subject: HID: debug: fix the ring buffer implementation

Ring buffer implementation in hid_debug_event() and hid_debug_events_read()
is strange allowing lost or corrupted data. After commit 717adfdaf147
("HID: debug: check length before copy_to_user()") it is possible to enter
an infinite loop in hid_debug_events_read() by providing 0 as count, this
locks up a system. Fix this by rewriting the ring buffer implementation
with kfifo and simplify the code.

This fixes CVE-2019-3819.

v2: fix an execution logic and add a comment
v3: use __set_current_state() instead of set_current_state()

Link: https://bugzilla.redhat.com/show_bug.cgi?id=1669187
Cc: stable@vger.kernel.org # v4.18+
Fixes: cd667ce24796 ("HID: use debugfs for events/reports dumping")
Fixes: 717adfdaf147 ("HID: debug: check length before copy_to_user()")
Signed-off-by: Vladis Dronov <vdronov@redhat.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/hid-debug.c   | 120 ++++++++++++++++++----------------------------
 include/linux/hid-debug.h |   9 ++--
 2 files changed, 51 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index c530476edba6..ac9fda1b5a72 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -30,6 +30,7 @@
 
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/kfifo.h>
 #include <linux/sched/signal.h>
 #include <linux/export.h>
 #include <linux/slab.h>
@@ -661,17 +662,12 @@ EXPORT_SYMBOL_GPL(hid_dump_device);
 /* enqueue string to 'events' ring buffer */
 void hid_debug_event(struct hid_device *hdev, char *buf)
 {
-	unsigned i;
 	struct hid_debug_list *list;
 	unsigned long flags;
 
 	spin_lock_irqsave(&hdev->debug_list_lock, flags);
-	list_for_each_entry(list, &hdev->debug_list, node) {
-		for (i = 0; buf[i]; i++)
-			list->hid_debug_buf[(list->tail + i) % HID_DEBUG_BUFSIZE] =
-				buf[i];
-		list->tail = (list->tail + i) % HID_DEBUG_BUFSIZE;
-        }
+	list_for_each_entry(list, &hdev->debug_list, node)
+		kfifo_in(&list->hid_debug_fifo, buf, strlen(buf));
 	spin_unlock_irqrestore(&hdev->debug_list_lock, flags);
 
 	wake_up_interruptible(&hdev->debug_wait);
@@ -722,8 +718,7 @@ void hid_dump_input(struct hid_device *hdev, struct hid_usage *usage, __s32 valu
 	hid_debug_event(hdev, buf);
 
 	kfree(buf);
-        wake_up_interruptible(&hdev->debug_wait);
-
+	wake_up_interruptible(&hdev->debug_wait);
 }
 EXPORT_SYMBOL_GPL(hid_dump_input);
 
@@ -1083,8 +1078,8 @@ static int hid_debug_events_open(struct inode *inode, struct file *file)
 		goto out;
 	}
 
-	if (!(list->hid_debug_buf = kzalloc(HID_DEBUG_BUFSIZE, GFP_KERNEL))) {
-		err = -ENOMEM;
+	err = kfifo_alloc(&list->hid_debug_fifo, HID_DEBUG_FIFOSIZE, GFP_KERNEL);
+	if (err) {
 		kfree(list);
 		goto out;
 	}
@@ -1104,77 +1099,57 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer,
 		size_t count, loff_t *ppos)
 {
 	struct hid_debug_list *list = file->private_data;
-	int ret = 0, len;
+	int ret = 0, copied;
 	DECLARE_WAITQUEUE(wait, current);
 
 	mutex_lock(&list->read_mutex);
-	while (ret == 0) {
-		if (list->head == list->tail) {
-			add_wait_queue(&list->hdev->debug_wait, &wait);
-			set_current_state(TASK_INTERRUPTIBLE);
-
-			while (list->head == list->tail) {
-				if (file->f_flags & O_NONBLOCK) {
-					ret = -EAGAIN;
-					break;
-				}
-				if (signal_pending(current)) {
-					ret = -ERESTARTSYS;
-					break;
-				}
+	if (kfifo_is_empty(&list->hid_debug_fifo)) {
+		add_wait_queue(&list->hdev->debug_wait, &wait);
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		while (kfifo_is_empty(&list->hid_debug_fifo)) {
+			if (file->f_flags & O_NONBLOCK) {
+				ret = -EAGAIN;
+				break;
+			}
 
-				if (!list->hdev || !list->hdev->debug) {
-					ret = -EIO;
-					set_current_state(TASK_RUNNING);
-					goto out;
-				}
+			if (signal_pending(current)) {
+				ret = -ERESTARTSYS;
+				break;
+			}
 
-				/* allow O_NONBLOCK from other threads */
-				mutex_unlock(&list->read_mutex);
-				schedule();
-				mutex_lock(&list->read_mutex);
-				set_current_state(TASK_INTERRUPTIBLE);
+			/* if list->hdev is NULL we cannot remove_wait_queue().
+			 * if list->hdev->debug is 0 then hid_debug_unregister()
+			 * was already called and list->hdev is being destroyed.
+			 * if we add remove_wait_queue() here we can hit a race.
+			 */
+			if (!list->hdev || !list->hdev->debug) {
+				ret = -EIO;
+				set_current_state(TASK_RUNNING);
+				goto out;
 			}
 
-			set_current_state(TASK_RUNNING);
-			remove_wait_queue(&list->hdev->debug_wait, &wait);
+			/* allow O_NONBLOCK from other threads */
+			mutex_unlock(&list->read_mutex);
+			schedule();
+			mutex_lock(&list->read_mutex);
+			set_current_state(TASK_INTERRUPTIBLE);
 		}
 
-		if (ret)
-			goto out;
+		__set_current_state(TASK_RUNNING);
+		remove_wait_queue(&list->hdev->debug_wait, &wait);
 
-		/* pass the ringbuffer contents to userspace */
-copy_rest:
-		if (list->tail == list->head)
+		if (ret)
 			goto out;
-		if (list->tail > list->head) {
-			len = list->tail - list->head;
-			if (len > count)
-				len = count;
-
-			if (copy_to_user(buffer + ret, &list->hid_debug_buf[list->head], len)) {
-				ret = -EFAULT;
-				goto out;
-			}
-			ret += len;
-			list->head += len;
-		} else {
-			len = HID_DEBUG_BUFSIZE - list->head;
-			if (len > count)
-				len = count;
-
-			if (copy_to_user(buffer, &list->hid_debug_buf[list->head], len)) {
-				ret = -EFAULT;
-				goto out;
-			}
-			list->head = 0;
-			ret += len;
-			count -= len;
-			if (count > 0)
-				goto copy_rest;
-		}
-
 	}
+
+	/* pass the fifo content to userspace, locking is not needed with only
+	 * one concurrent reader and one concurrent writer
+	 */
+	ret = kfifo_to_user(&list->hid_debug_fifo, buffer, count, &copied);
+	if (ret)
+		goto out;
+	ret = copied;
 out:
 	mutex_unlock(&list->read_mutex);
 	return ret;
@@ -1185,7 +1160,7 @@ static __poll_t hid_debug_events_poll(struct file *file, poll_table *wait)
 	struct hid_debug_list *list = file->private_data;
 
 	poll_wait(file, &list->hdev->debug_wait, wait);
-	if (list->head != list->tail)
+	if (!kfifo_is_empty(&list->hid_debug_fifo))
 		return EPOLLIN | EPOLLRDNORM;
 	if (!list->hdev->debug)
 		return EPOLLERR | EPOLLHUP;
@@ -1200,7 +1175,7 @@ static int hid_debug_events_release(struct inode *inode, struct file *file)
 	spin_lock_irqsave(&list->hdev->debug_list_lock, flags);
 	list_del(&list->node);
 	spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags);
-	kfree(list->hid_debug_buf);
+	kfifo_free(&list->hid_debug_fifo);
 	kfree(list);
 
 	return 0;
@@ -1246,4 +1221,3 @@ void hid_debug_exit(void)
 {
 	debugfs_remove_recursive(hid_debug_root);
 }
-
diff --git a/include/linux/hid-debug.h b/include/linux/hid-debug.h
index 8663f216c563..2d6100edf204 100644
--- a/include/linux/hid-debug.h
+++ b/include/linux/hid-debug.h
@@ -24,7 +24,10 @@
 
 #ifdef CONFIG_DEBUG_FS
 
+#include <linux/kfifo.h>
+
 #define HID_DEBUG_BUFSIZE 512
+#define HID_DEBUG_FIFOSIZE 512
 
 void hid_dump_input(struct hid_device *, struct hid_usage *, __s32);
 void hid_dump_report(struct hid_device *, int , u8 *, int);
@@ -37,11 +40,8 @@ void hid_debug_init(void);
 void hid_debug_exit(void);
 void hid_debug_event(struct hid_device *, char *);
 
-
 struct hid_debug_list {
-	char *hid_debug_buf;
-	int head;
-	int tail;
+	DECLARE_KFIFO_PTR(hid_debug_fifo, char);
 	struct fasync_struct *fasync;
 	struct hid_device *hdev;
 	struct list_head node;
@@ -64,4 +64,3 @@ struct hid_debug_list {
 #endif
 
 #endif
-
-- 
cgit v1.2.3-71-gd317


From b284909abad48b07d3071a9fc9b5692b3e64914b Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 30 Jan 2019 07:13:58 -0600
Subject: cpu/hotplug: Fix "SMT disabled by BIOS" detection for KVM

With the following commit:

  73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS")

... the hotplug code attempted to detect when SMT was disabled by BIOS,
in which case it reported SMT as permanently disabled.  However, that
code broke a virt hotplug scenario, where the guest is booted with only
primary CPU threads, and a sibling is brought online later.

The problem is that there doesn't seem to be a way to reliably
distinguish between the HW "SMT disabled by BIOS" case and the virt
"sibling not yet brought online" case.  So the above-mentioned commit
was a bit misguided, as it permanently disabled SMT for both cases,
preventing future virt sibling hotplugs.

Going back and reviewing the original problems which were attempted to
be solved by that commit, when SMT was disabled in BIOS:

  1) /sys/devices/system/cpu/smt/control showed "on" instead of
     "notsupported"; and

  2) vmx_vm_init() was incorrectly showing the L1TF_MSG_SMT warning.

I'd propose that we instead consider #1 above to not actually be a
problem.  Because, at least in the virt case, it's possible that SMT
wasn't disabled by BIOS and a sibling thread could be brought online
later.  So it makes sense to just always default the smt control to "on"
to allow for that possibility (assuming cpuid indicates that the CPU
supports SMT).

The real problem is #2, which has a simple fix: change vmx_vm_init() to
query the actual current SMT state -- i.e., whether any siblings are
currently online -- instead of looking at the SMT "control" sysfs value.

So fix it by:

  a) reverting the original "fix" and its followup fix:

     73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS")
     bc2d8d262cba ("cpu/hotplug: Fix SMT supported evaluation")

     and

  b) changing vmx_vm_init() to query the actual current SMT state --
     instead of the sysfs control value -- to determine whether the L1TF
     warning is needed.  This also requires the 'sched_smt_present'
     variable to exported, instead of 'cpu_smt_control'.

Fixes: 73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS")
Reported-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Joe Mario <jmario@redhat.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: kvm@vger.kernel.org
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/e3a85d585da28cc333ecbc1e78ee9216e6da9396.1548794349.git.jpoimboe@redhat.com
---
 arch/x86/kernel/cpu/bugs.c |  2 +-
 arch/x86/kvm/vmx/vmx.c     |  3 ++-
 include/linux/cpu.h        |  2 --
 kernel/cpu.c               | 33 ++++-----------------------------
 kernel/sched/fair.c        |  1 +
 kernel/smp.c               |  2 --
 6 files changed, 8 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 1de0f4170178..01874d54f4fd 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -71,7 +71,7 @@ void __init check_bugs(void)
 	 * identify_boot_cpu() initialized SMT support information, let the
 	 * core code know.
 	 */
-	cpu_smt_check_topology_early();
+	cpu_smt_check_topology();
 
 	if (!IS_ENABLED(CONFIG_SMP)) {
 		pr_info("CPU: ");
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4341175339f3..95d618045001 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -26,6 +26,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/smt.h>
 #include <linux/slab.h>
 #include <linux/tboot.h>
 #include <linux/trace_events.h>
@@ -6823,7 +6824,7 @@ static int vmx_vm_init(struct kvm *kvm)
 			 * Warn upon starting the first VM in a potentially
 			 * insecure environment.
 			 */
-			if (cpu_smt_control == CPU_SMT_ENABLED)
+			if (sched_smt_active())
 				pr_warn_once(L1TF_MSG_SMT);
 			if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER)
 				pr_warn_once(L1TF_MSG_L1D);
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 218df7f4d3e1..5041357d0297 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -180,12 +180,10 @@ enum cpuhp_smt_control {
 #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
 extern enum cpuhp_smt_control cpu_smt_control;
 extern void cpu_smt_disable(bool force);
-extern void cpu_smt_check_topology_early(void);
 extern void cpu_smt_check_topology(void);
 #else
 # define cpu_smt_control		(CPU_SMT_ENABLED)
 static inline void cpu_smt_disable(bool force) { }
-static inline void cpu_smt_check_topology_early(void) { }
 static inline void cpu_smt_check_topology(void) { }
 #endif
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index c0c7f64573ed..d1c6d152da89 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -376,9 +376,6 @@ void __weak arch_smt_update(void) { }
 
 #ifdef CONFIG_HOTPLUG_SMT
 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
-EXPORT_SYMBOL_GPL(cpu_smt_control);
-
-static bool cpu_smt_available __read_mostly;
 
 void __init cpu_smt_disable(bool force)
 {
@@ -397,25 +394,11 @@ void __init cpu_smt_disable(bool force)
 
 /*
  * The decision whether SMT is supported can only be done after the full
- * CPU identification. Called from architecture code before non boot CPUs
- * are brought up.
- */
-void __init cpu_smt_check_topology_early(void)
-{
-	if (!topology_smt_supported())
-		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
-}
-
-/*
- * If SMT was disabled by BIOS, detect it here, after the CPUs have been
- * brought online. This ensures the smt/l1tf sysfs entries are consistent
- * with reality. cpu_smt_available is set to true during the bringup of non
- * boot CPUs when a SMT sibling is detected. Note, this may overwrite
- * cpu_smt_control's previous setting.
+ * CPU identification. Called from architecture code.
  */
 void __init cpu_smt_check_topology(void)
 {
-	if (!cpu_smt_available)
+	if (!topology_smt_supported())
 		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
 }
 
@@ -428,18 +411,10 @@ early_param("nosmt", smt_cmdline_disable);
 
 static inline bool cpu_smt_allowed(unsigned int cpu)
 {
-	if (topology_is_primary_thread(cpu))
+	if (cpu_smt_control == CPU_SMT_ENABLED)
 		return true;
 
-	/*
-	 * If the CPU is not a 'primary' thread and the booted_once bit is
-	 * set then the processor has SMT support. Store this information
-	 * for the late check of SMT support in cpu_smt_check_topology().
-	 */
-	if (per_cpu(cpuhp_state, cpu).booted_once)
-		cpu_smt_available = true;
-
-	if (cpu_smt_control == CPU_SMT_ENABLED)
+	if (topology_is_primary_thread(cpu))
 		return true;
 
 	/*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 50aa2aba69bd..310d0637fe4b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5980,6 +5980,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
 
 #ifdef CONFIG_SCHED_SMT
 DEFINE_STATIC_KEY_FALSE(sched_smt_present);
+EXPORT_SYMBOL_GPL(sched_smt_present);
 
 static inline void set_idle_cores(int cpu, int val)
 {
diff --git a/kernel/smp.c b/kernel/smp.c
index 163c451af42e..f4cf1b0bb3b8 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -584,8 +584,6 @@ void __init smp_init(void)
 		num_nodes, (num_nodes > 1 ? "s" : ""),
 		num_cpus,  (num_cpus  > 1 ? "s" : ""));
 
-	/* Final decision about SMT support */
-	cpu_smt_check_topology();
 	/* Any cleanup work */
 	smp_cpus_done(setup_max_cpus);
 }
-- 
cgit v1.2.3-71-gd317


From 7d10f70fc198877b43d92bdcd7604279788b9568 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Wed, 30 Jan 2019 13:52:37 -0500
Subject: fs: Don't need to put list_lru into its own cacheline

The list_lru structure is essentially just a pointer to a table of
per-node LRU lists.  Even if CONFIG_MEMCG_KMEM is defined, the list
field is just used for LRU list registration and shrinker_id is set at
initialization.  Those fields won't need to be touched that often.

So there is no point to make the list_lru structures to sit in their own
cachelines.

Signed-off-by: Waiman Long <longman@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 811c77743dad..29d8e2cfed0e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1479,11 +1479,12 @@ struct super_block {
 	struct user_namespace *s_user_ns;
 
 	/*
-	 * Keep the lru lists last in the structure so they always sit on their
-	 * own individual cachelines.
+	 * The list_lru structure is essentially just a pointer to a table
+	 * of per-node lru lists, each of which has its own spinlock.
+	 * There is no need to put them into separate cachelines.
 	 */
-	struct list_lru		s_dentry_lru ____cacheline_aligned_in_smp;
-	struct list_lru		s_inode_lru ____cacheline_aligned_in_smp;
+	struct list_lru		s_dentry_lru;
+	struct list_lru		s_inode_lru;
 	struct rcu_head		rcu;
 	struct work_struct	destroy_work;
 
-- 
cgit v1.2.3-71-gd317


From af0c9af1b3f66052c369d08be3f60fa9a9559e48 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Wed, 30 Jan 2019 13:52:38 -0500
Subject: fs/dcache: Track & report number of negative dentries

The current dentry number tracking code doesn't distinguish between
positive & negative dentries.  It just reports the total number of
dentries in the LRU lists.

As excessive number of negative dentries can have an impact on system
performance, it will be wise to track the number of positive and
negative dentries separately.

This patch adds tracking for the total number of negative dentries in
the system LRU lists and reports it in the 5th field in the
/proc/sys/fs/dentry-state file.  The number, however, does not include
negative dentries that are in flight but not in the LRU yet as well as
those in the shrinker lists which are on the way out anyway.

The number of positive dentries in the LRU lists can be roughly found by
subtracting the number of negative dentries from the unused count.

Matthew Wilcox had confirmed that since the introduction of the
dentry_stat structure in 2.1.60, the dummy array was there, probably for
future extension.  They were not replacements of pre-existing fields.
So no sane applications that read the value of /proc/sys/fs/dentry-state
will do dummy thing if the last 2 fields of the sysctl parameter are not
zero.  IOW, it will be safe to use one of the dummy array entry for
negative dentry count.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/fs.txt | 26 ++++++++++++++++----------
 fs/dcache.c                 | 32 ++++++++++++++++++++++++++++++++
 include/linux/dcache.h      |  7 ++++---
 3 files changed, 52 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 819caf8ca05f..58649bd4fcfc 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -56,26 +56,32 @@ of any kernel data structures.
 
 dentry-state:
 
-From linux/fs/dentry.c:
+From linux/include/linux/dcache.h:
 --------------------------------------------------------------
-struct {
+struct dentry_stat_t dentry_stat {
         int nr_dentry;
         int nr_unused;
         int age_limit;         /* age in seconds */
         int want_pages;        /* pages requested by system */
-        int dummy[2];
-} dentry_stat = {0, 0, 45, 0,};
--------------------------------------------------------------- 
-
-Dentries are dynamically allocated and deallocated, and
-nr_dentry seems to be 0 all the time. Hence it's safe to
-assume that only nr_unused, age_limit and want_pages are
-used. Nr_unused seems to be exactly what its name says.
+        int nr_negative;       /* # of unused negative dentries */
+        int dummy;             /* Reserved for future use */
+};
+--------------------------------------------------------------
+
+Dentries are dynamically allocated and deallocated.
+
+nr_dentry shows the total number of dentries allocated (active
++ unused). nr_unused shows the number of dentries that are not
+actively used, but are saved in the LRU list for future reuse.
+
 Age_limit is the age in seconds after which dcache entries
 can be reclaimed when memory is short and want_pages is
 nonzero when shrink_dcache_pages() has been called and the
 dcache isn't pruned yet.
 
+nr_negative shows the number of unused dentries that are also
+negative dentries which do not mapped to actual files.
+
 ==============================================================
 
 dquot-max & dquot-nr:
diff --git a/fs/dcache.c b/fs/dcache.c
index 44e5652b2664..aac41adf4743 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -119,6 +119,7 @@ struct dentry_stat_t dentry_stat = {
 
 static DEFINE_PER_CPU(long, nr_dentry);
 static DEFINE_PER_CPU(long, nr_dentry_unused);
+static DEFINE_PER_CPU(long, nr_dentry_negative);
 
 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
 
@@ -152,11 +153,22 @@ static long get_nr_dentry_unused(void)
 	return sum < 0 ? 0 : sum;
 }
 
+static long get_nr_dentry_negative(void)
+{
+	int i;
+	long sum = 0;
+
+	for_each_possible_cpu(i)
+		sum += per_cpu(nr_dentry_negative, i);
+	return sum < 0 ? 0 : sum;
+}
+
 int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer,
 		   size_t *lenp, loff_t *ppos)
 {
 	dentry_stat.nr_dentry = get_nr_dentry();
 	dentry_stat.nr_unused = get_nr_dentry_unused();
+	dentry_stat.nr_negative = get_nr_dentry_negative();
 	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 }
 #endif
@@ -317,6 +329,8 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry)
 	flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
 	WRITE_ONCE(dentry->d_flags, flags);
 	dentry->d_inode = NULL;
+	if (dentry->d_flags & DCACHE_LRU_LIST)
+		this_cpu_inc(nr_dentry_negative);
 }
 
 static void dentry_free(struct dentry *dentry)
@@ -371,6 +385,11 @@ static void dentry_unlink_inode(struct dentry * dentry)
  * The per-cpu "nr_dentry_unused" counters are updated with
  * the DCACHE_LRU_LIST bit.
  *
+ * The per-cpu "nr_dentry_negative" counters are only updated
+ * when deleted from or added to the per-superblock LRU list, not
+ * from/to the shrink list. That is to avoid an unneeded dec/inc
+ * pair when moving from LRU to shrink list in select_collect().
+ *
  * These helper functions make sure we always follow the
  * rules. d_lock must be held by the caller.
  */
@@ -380,6 +399,8 @@ static void d_lru_add(struct dentry *dentry)
 	D_FLAG_VERIFY(dentry, 0);
 	dentry->d_flags |= DCACHE_LRU_LIST;
 	this_cpu_inc(nr_dentry_unused);
+	if (d_is_negative(dentry))
+		this_cpu_inc(nr_dentry_negative);
 	WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
 }
 
@@ -388,6 +409,8 @@ static void d_lru_del(struct dentry *dentry)
 	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
 	dentry->d_flags &= ~DCACHE_LRU_LIST;
 	this_cpu_dec(nr_dentry_unused);
+	if (d_is_negative(dentry))
+		this_cpu_dec(nr_dentry_negative);
 	WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
 }
 
@@ -418,6 +441,8 @@ static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry)
 	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
 	dentry->d_flags &= ~DCACHE_LRU_LIST;
 	this_cpu_dec(nr_dentry_unused);
+	if (d_is_negative(dentry))
+		this_cpu_dec(nr_dentry_negative);
 	list_lru_isolate(lru, &dentry->d_lru);
 }
 
@@ -426,6 +451,8 @@ static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry,
 {
 	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
 	dentry->d_flags |= DCACHE_SHRINK_LIST;
+	if (d_is_negative(dentry))
+		this_cpu_dec(nr_dentry_negative);
 	list_lru_isolate_move(lru, &dentry->d_lru, list);
 }
 
@@ -1816,6 +1843,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 	WARN_ON(d_in_lookup(dentry));
 
 	spin_lock(&dentry->d_lock);
+	/*
+	 * Decrement negative dentry count if it was in the LRU list.
+	 */
+	if (dentry->d_flags & DCACHE_LRU_LIST)
+		this_cpu_dec(nr_dentry_negative);
 	hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
 	raw_write_seqcount_begin(&dentry->d_seq);
 	__d_set_inode_and_type(dentry, inode, add_flags);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index ef4b70f64f33..60996e64c579 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -62,9 +62,10 @@ extern const struct qstr slash_name;
 struct dentry_stat_t {
 	long nr_dentry;
 	long nr_unused;
-	long age_limit;          /* age in seconds */
-	long want_pages;         /* pages requested by system */
-	long dummy[2];
+	long age_limit;		/* age in seconds */
+	long want_pages;	/* pages requested by system */
+	long nr_negative;	/* # of unused negative dentries */
+	long dummy;		/* Reserved for future use */
 };
 extern struct dentry_stat_t dentry_stat;
 
-- 
cgit v1.2.3-71-gd317


From 15efb47dc560849d0c07db96fdad5121f2cf736e Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Wed, 30 Jan 2019 18:26:02 +0100
Subject: PM-runtime: Fix deadlock with ktime_get()

A deadlock has been seen when swicthing clocksources which use
PM-runtime.  The call path is:

change_clocksource
    ...
    write_seqcount_begin
    ...
    timekeeping_update
        ...
        sh_cmt_clocksource_enable
            ...
            rpm_resume
                pm_runtime_mark_last_busy
                    ktime_get
                        do
                            read_seqcount_begin
                        while read_seqcount_retry
    ....
    write_seqcount_end

Although we should be safe because we haven't yet changed the
clocksource at that time, we can't do that because of seqcount
protection.

Use ktime_get_mono_fast_ns() instead which is lock safe for such
cases.

With ktime_get_mono_fast_ns, the timestamp is not guaranteed to be
monotonic across an update and as a result can goes backward.
According to update_fast_timekeeper() description: "In the worst
case, this can result is a slightly wrong timestamp (a few
nanoseconds)". For PM-runtime autosuspend, this means only that
the suspend decision may be slightly suboptimal.

Fixes: 8234f6734c5d ("PM-runtime: Switch autosuspend over to using hrtimers")
Reported-by: Biju Das <biju.das@bp.renesas.com>
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/runtime.c | 10 +++++-----
 include/linux/pm_runtime.h   |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 457be03b744d..0ea2139c50d8 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -130,7 +130,7 @@ u64 pm_runtime_autosuspend_expiration(struct device *dev)
 {
 	int autosuspend_delay;
 	u64 last_busy, expires = 0;
-	u64 now = ktime_to_ns(ktime_get());
+	u64 now = ktime_get_mono_fast_ns();
 
 	if (!dev->power.use_autosuspend)
 		goto out;
@@ -909,7 +909,7 @@ static enum hrtimer_restart  pm_suspend_timer_fn(struct hrtimer *timer)
 	 * If 'expires' is after the current time, we've been called
 	 * too early.
 	 */
-	if (expires > 0 && expires < ktime_to_ns(ktime_get())) {
+	if (expires > 0 && expires < ktime_get_mono_fast_ns()) {
 		dev->power.timer_expires = 0;
 		rpm_suspend(dev, dev->power.timer_autosuspends ?
 		    (RPM_ASYNC | RPM_AUTO) : RPM_ASYNC);
@@ -928,7 +928,7 @@ static enum hrtimer_restart  pm_suspend_timer_fn(struct hrtimer *timer)
 int pm_schedule_suspend(struct device *dev, unsigned int delay)
 {
 	unsigned long flags;
-	ktime_t expires;
+	u64 expires;
 	int retval;
 
 	spin_lock_irqsave(&dev->power.lock, flags);
@@ -945,8 +945,8 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay)
 	/* Other scheduled or pending requests need to be canceled. */
 	pm_runtime_cancel_pending(dev);
 
-	expires = ktime_add(ktime_get(), ms_to_ktime(delay));
-	dev->power.timer_expires = ktime_to_ns(expires);
+	expires = ktime_get_mono_fast_ns() + (u64)delay * NSEC_PER_MSEC;
+	dev->power.timer_expires = expires;
 	dev->power.timer_autosuspends = 0;
 	hrtimer_start(&dev->power.suspend_timer, expires, HRTIMER_MODE_ABS);
 
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 54af4eef169f..fed5be706bc9 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -105,7 +105,7 @@ static inline bool pm_runtime_callbacks_present(struct device *dev)
 
 static inline void pm_runtime_mark_last_busy(struct device *dev)
 {
-	WRITE_ONCE(dev->power.last_busy, ktime_to_ns(ktime_get()));
+	WRITE_ONCE(dev->power.last_busy, ktime_get_mono_fast_ns());
 }
 
 static inline bool pm_runtime_is_irq_safe(struct device *dev)
-- 
cgit v1.2.3-71-gd317


From 9a6d5488002fdca7134a0e59b0ae252f61042810 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 30 Jan 2019 08:41:40 -0700
Subject: ide: ensure atapi sense request aren't preempted

There's an issue with how sense requests are handled in IDE. If ide-cd
encounters an error, it queues a sense request. With how IDE request
handling is done, this is the next request we need to handle. But it's
impossible to guarantee this, as another request could come in between
the sense being queued, and ->queue_rq() being run and handling it. If
that request ALSO fails, then we attempt to doubly queue the single
sense request we have.

Since we only support one active request at the time, defer request
processing when a sense request is queued.

Fixes: 600335205b8d "ide: convert to blk-mq"
Reported-by: He Zhe <zhe.he@windriver.com>
Tested-by: He Zhe <zhe.he@windriver.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ide/ide-atapi.c |  9 +++++++-
 drivers/ide/ide-io.c    | 61 +++++++++++++++++++++++++------------------------
 drivers/ide/ide-park.c  |  2 ++
 drivers/ide/ide-probe.c | 23 +++++++++++++------
 include/linux/ide.h     |  2 ++
 5 files changed, 59 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index da58020a144e..33a28cde126c 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -235,21 +235,28 @@ EXPORT_SYMBOL_GPL(ide_prep_sense);
 
 int ide_queue_sense_rq(ide_drive_t *drive, void *special)
 {
-	struct request *sense_rq = drive->sense_rq;
+	ide_hwif_t *hwif = drive->hwif;
+	struct request *sense_rq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&hwif->lock, flags);
 
 	/* deferred failure from ide_prep_sense() */
 	if (!drive->sense_rq_armed) {
 		printk(KERN_WARNING PFX "%s: error queuing a sense request\n",
 		       drive->name);
+		spin_unlock_irqrestore(&hwif->lock, flags);
 		return -ENOMEM;
 	}
 
+	sense_rq = drive->sense_rq;
 	ide_req(sense_rq)->special = special;
 	drive->sense_rq_armed = false;
 
 	drive->hwif->rq = NULL;
 
 	ide_insert_request_head(drive, sense_rq);
+	spin_unlock_irqrestore(&hwif->lock, flags);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 8445b484ae69..b137f27a34d5 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -68,8 +68,10 @@ int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error,
 	}
 
 	if (!blk_update_request(rq, error, nr_bytes)) {
-		if (rq == drive->sense_rq)
+		if (rq == drive->sense_rq) {
 			drive->sense_rq = NULL;
+			drive->sense_rq_active = false;
+		}
 
 		__blk_mq_end_request(rq, error);
 		return 0;
@@ -451,16 +453,11 @@ void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq)
 		blk_mq_delay_run_hw_queue(q->queue_hw_ctx[0], 3);
 }
 
-/*
- * Issue a new request to a device.
- */
-blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx,
-			  const struct blk_mq_queue_data *bd)
+blk_status_t ide_issue_rq(ide_drive_t *drive, struct request *rq,
+			  bool local_requeue)
 {
-	ide_drive_t	*drive = hctx->queue->queuedata;
-	ide_hwif_t	*hwif = drive->hwif;
+	ide_hwif_t *hwif = drive->hwif;
 	struct ide_host *host = hwif->host;
-	struct request	*rq = bd->rq;
 	ide_startstop_t	startstop;
 
 	if (!blk_rq_is_passthrough(rq) && !(rq->rq_flags & RQF_DONTPREP)) {
@@ -474,8 +471,6 @@ blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (ide_lock_host(host, hwif))
 		return BLK_STS_DEV_RESOURCE;
 
-	blk_mq_start_request(rq);
-
 	spin_lock_irq(&hwif->lock);
 
 	if (!ide_lock_port(hwif)) {
@@ -510,18 +505,6 @@ repeat:
 		hwif->cur_dev = drive;
 		drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED);
 
-		/*
-		 * we know that the queue isn't empty, but this can happen
-		 * if ->prep_rq() decides to kill a request
-		 */
-		if (!rq) {
-			rq = bd->rq;
-			if (!rq) {
-				ide_unlock_port(hwif);
-				goto out;
-			}
-		}
-
 		/*
 		 * Sanity: don't accept a request that isn't a PM request
 		 * if we are currently power managed. This is very important as
@@ -560,9 +543,12 @@ repeat:
 		}
 	} else {
 plug_device:
+		if (local_requeue)
+			list_add(&rq->queuelist, &drive->rq_list);
 		spin_unlock_irq(&hwif->lock);
 		ide_unlock_host(host);
-		ide_requeue_and_plug(drive, rq);
+		if (!local_requeue)
+			ide_requeue_and_plug(drive, rq);
 		return BLK_STS_OK;
 	}
 
@@ -573,6 +559,26 @@ out:
 	return BLK_STS_OK;
 }
 
+/*
+ * Issue a new request to a device.
+ */
+blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx,
+			  const struct blk_mq_queue_data *bd)
+{
+	ide_drive_t *drive = hctx->queue->queuedata;
+	ide_hwif_t *hwif = drive->hwif;
+
+	spin_lock_irq(&hwif->lock);
+	if (drive->sense_rq_active) {
+		spin_unlock_irq(&hwif->lock);
+		return BLK_STS_DEV_RESOURCE;
+	}
+	spin_unlock_irq(&hwif->lock);
+
+	blk_mq_start_request(bd->rq);
+	return ide_issue_rq(drive, bd->rq, false);
+}
+
 static int drive_is_ready(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
@@ -893,13 +899,8 @@ EXPORT_SYMBOL_GPL(ide_pad_transfer);
 
 void ide_insert_request_head(ide_drive_t *drive, struct request *rq)
 {
-	ide_hwif_t *hwif = drive->hwif;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hwif->lock, flags);
+	drive->sense_rq_active = true;
 	list_add_tail(&rq->queuelist, &drive->rq_list);
-	spin_unlock_irqrestore(&hwif->lock, flags);
-
 	kblockd_schedule_work(&drive->rq_work);
 }
 EXPORT_SYMBOL_GPL(ide_insert_request_head);
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 102aa3bc3e7f..8af7af6001eb 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -54,7 +54,9 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 	scsi_req(rq)->cmd[0] = REQ_UNPARK_HEADS;
 	scsi_req(rq)->cmd_len = 1;
 	ide_req(rq)->type = ATA_PRIV_MISC;
+	spin_lock_irq(&hwif->lock);
 	ide_insert_request_head(drive, rq);
+	spin_unlock_irq(&hwif->lock);
 
 out:
 	return;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 63627be0811a..5aeaca24a28f 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1159,18 +1159,27 @@ static void drive_rq_insert_work(struct work_struct *work)
 	ide_drive_t *drive = container_of(work, ide_drive_t, rq_work);
 	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq;
+	blk_status_t ret;
 	LIST_HEAD(list);
 
-	spin_lock_irq(&hwif->lock);
-	if (!list_empty(&drive->rq_list))
-		list_splice_init(&drive->rq_list, &list);
-	spin_unlock_irq(&hwif->lock);
+	blk_mq_quiesce_queue(drive->queue);
 
-	while (!list_empty(&list)) {
-		rq = list_first_entry(&list, struct request, queuelist);
+	ret = BLK_STS_OK;
+	spin_lock_irq(&hwif->lock);
+	while (!list_empty(&drive->rq_list)) {
+		rq = list_first_entry(&drive->rq_list, struct request, queuelist);
 		list_del_init(&rq->queuelist);
-		blk_execute_rq_nowait(drive->queue, rq->rq_disk, rq, true, NULL);
+
+		spin_unlock_irq(&hwif->lock);
+		ret = ide_issue_rq(drive, rq, true);
+		spin_lock_irq(&hwif->lock);
 	}
+	spin_unlock_irq(&hwif->lock);
+
+	blk_mq_unquiesce_queue(drive->queue);
+
+	if (ret != BLK_STS_OK)
+		kblockd_schedule_work(&drive->rq_work);
 }
 
 static const u8 ide_hwif_to_major[] =
diff --git a/include/linux/ide.h b/include/linux/ide.h
index e7d29ae633cd..971cf76a78a0 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -615,6 +615,7 @@ struct ide_drive_s {
 
 	/* current sense rq and buffer */
 	bool sense_rq_armed;
+	bool sense_rq_active;
 	struct request *sense_rq;
 	struct request_sense sense_data;
 
@@ -1219,6 +1220,7 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout);
 extern void ide_timer_expiry(struct timer_list *t);
 extern irqreturn_t ide_intr(int irq, void *dev_id);
 extern blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
+extern blk_status_t ide_issue_rq(ide_drive_t *, struct request *, bool);
 extern void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq);
 
 void ide_init_disk(struct gendisk *, ide_drive_t *);
-- 
cgit v1.2.3-71-gd317


From 9bcdeb51bd7d2ae9fe65ea4d60643d2aeef5bfe3 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 1 Feb 2019 14:20:31 -0800
Subject: oom, oom_reaper: do not enqueue same task twice

Arkadiusz reported that enabling memcg's group oom killing causes
strange memcg statistics where there is no task in a memcg despite the
number of tasks in that memcg is not 0.  It turned out that there is a
bug in wake_oom_reaper() which allows enqueuing same task twice which
makes impossible to decrease the number of tasks in that memcg due to a
refcount leak.

This bug existed since the OOM reaper became invokable from
task_will_free_mem(current) path in out_of_memory() in Linux 4.7,

  T1@P1     |T2@P1     |T3@P1     |OOM reaper
  ----------+----------+----------+------------
                                   # Processing an OOM victim in a different memcg domain.
                        try_charge()
                          mem_cgroup_out_of_memory()
                            mutex_lock(&oom_lock)
             try_charge()
               mem_cgroup_out_of_memory()
                 mutex_lock(&oom_lock)
  try_charge()
    mem_cgroup_out_of_memory()
      mutex_lock(&oom_lock)
                            out_of_memory()
                              oom_kill_process(P1)
                                do_send_sig_info(SIGKILL, @P1)
                                mark_oom_victim(T1@P1)
                                wake_oom_reaper(T1@P1) # T1@P1 is enqueued.
                            mutex_unlock(&oom_lock)
                 out_of_memory()
                   mark_oom_victim(T2@P1)
                   wake_oom_reaper(T2@P1) # T2@P1 is enqueued.
                 mutex_unlock(&oom_lock)
      out_of_memory()
        mark_oom_victim(T1@P1)
        wake_oom_reaper(T1@P1) # T1@P1 is enqueued again due to oom_reaper_list == T2@P1 && T1@P1->oom_reaper_list == NULL.
      mutex_unlock(&oom_lock)
                                   # Completed processing an OOM victim in a different memcg domain.
                                   spin_lock(&oom_reaper_lock)
                                   # T1P1 is dequeued.
                                   spin_unlock(&oom_reaper_lock)

but memcg's group oom killing made it easier to trigger this bug by
calling wake_oom_reaper() on the same task from one out_of_memory()
request.

Fix this bug using an approach used by commit 855b018325737f76 ("oom,
oom_reaper: disable oom_reaper for oom_kill_allocating_task").  As a
side effect of this patch, this patch also avoids enqueuing multiple
threads sharing memory via task_will_free_mem(current) path.

Link: http://lkml.kernel.org/r/e865a044-2c10-9858-f4ef-254bc71d6cc2@i-love.sakura.ne.jp
Link: http://lkml.kernel.org/r/5ee34fc6-1485-34f8-8790-903ddabaa809@i-love.sakura.ne.jp
Fixes: af8e15cc85a25315 ("oom, oom_reaper: do not enqueue task if it is on the oom_reaper_list head")
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: Arkadiusz Miskiewicz <arekm@maven.pl>
Tested-by: Arkadiusz Miskiewicz <arekm@maven.pl>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Roman Gushchin <guro@fb.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Aleksa Sarai <asarai@suse.de>
Cc: Jay Kamat <jgkamat@fb.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched/coredump.h | 1 +
 mm/oom_kill.c                  | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index ec912d01126f..ecdc6542070f 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -71,6 +71,7 @@ static inline int get_dumpable(struct mm_struct *mm)
 #define MMF_HUGE_ZERO_PAGE	23      /* mm has ever used the global huge zero page */
 #define MMF_DISABLE_THP		24	/* disable THP for all VMAs */
 #define MMF_OOM_VICTIM		25	/* mm is the oom victim */
+#define MMF_OOM_REAP_QUEUED	26	/* mm was queued for oom_reaper */
 #define MMF_DISABLE_THP_MASK	(1 << MMF_DISABLE_THP)
 
 #define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f0e8cd9edb1a..059e617a1847 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -647,8 +647,8 @@ static int oom_reaper(void *unused)
 
 static void wake_oom_reaper(struct task_struct *tsk)
 {
-	/* tsk is already queued? */
-	if (tsk == oom_reaper_list || tsk->oom_reaper_list)
+	/* mm is already queued? */
+	if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
 		return;
 
 	get_task_struct(tsk);
-- 
cgit v1.2.3-71-gd317


From b13bc35193d9e7a8c050a24928ca5c9e7c9a009b Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Fri, 1 Feb 2019 14:20:51 -0800
Subject: mm/hotplug: invalid PFNs from pfn_to_online_page()

On an arm64 ThunderX2 server, the first kmemleak scan would crash [1]
with CONFIG_DEBUG_VM_PGFLAGS=y due to page_to_nid() found a pfn that is
not directly mapped (MEMBLOCK_NOMAP).  Hence, the page->flags is
uninitialized.

This is due to the commit 9f1eb38e0e11 ("mm, kmemleak: little
optimization while scanning") starts to use pfn_to_online_page() instead
of pfn_valid().  However, in the CONFIG_MEMORY_HOTPLUG=y case,
pfn_to_online_page() does not call memblock_is_map_memory() while
pfn_valid() does.

Historically, the commit 68709f45385a ("arm64: only consider memblocks
with NOMAP cleared for linear mapping") causes pages marked as nomap
being no long reassigned to the new zone in memmap_init_zone() by
calling __init_single_page().

Since the commit 2d070eab2e82 ("mm: consider zone which is not fully
populated to have holes") introduced pfn_to_online_page() and was
designed to return a valid pfn only, but it is clearly broken on arm64.

Therefore, let pfn_to_online_page() call pfn_valid_within(), so it can
handle nomap thanks to the commit f52bb98f5ade ("arm64: mm: always
enable CONFIG_HOLES_IN_ZONE"), while it will be optimized away on
architectures where have no HOLES_IN_ZONE.

[1]
  Unable to handle kernel NULL pointer dereference at virtual address 0000000000000006
  Mem abort info:
    ESR = 0x96000005
    Exception class = DABT (current EL), IL = 32 bits
    SET = 0, FnV = 0
    EA = 0, S1PTW = 0
  Data abort info:
    ISV = 0, ISS = 0x00000005
    CM = 0, WnR = 0
  Internal error: Oops: 96000005 [#1] SMP
  CPU: 60 PID: 1408 Comm: kmemleak Not tainted 5.0.0-rc2+ #8
  pstate: 60400009 (nZCv daif +PAN -UAO)
  pc : page_mapping+0x24/0x144
  lr : __dump_page+0x34/0x3dc
  sp : ffff00003a5cfd10
  x29: ffff00003a5cfd10 x28: 000000000000802f
  x27: 0000000000000000 x26: 0000000000277d00
  x25: ffff000010791f56 x24: ffff7fe000000000
  x23: ffff000010772f8b x22: ffff00001125f670
  x21: ffff000011311000 x20: ffff000010772f8b
  x19: fffffffffffffffe x18: 0000000000000000
  x17: 0000000000000000 x16: 0000000000000000
  x15: 0000000000000000 x14: ffff802698b19600
  x13: ffff802698b1a200 x12: ffff802698b16f00
  x11: ffff802698b1a400 x10: 0000000000001400
  x9 : 0000000000000001 x8 : ffff00001121a000
  x7 : 0000000000000000 x6 : ffff0000102c53b8
  x5 : 0000000000000000 x4 : 0000000000000003
  x3 : 0000000000000100 x2 : 0000000000000000
  x1 : ffff000010772f8b x0 : ffffffffffffffff
  Process kmemleak (pid: 1408, stack limit = 0x(____ptrval____))
  Call trace:
   page_mapping+0x24/0x144
   __dump_page+0x34/0x3dc
   dump_page+0x28/0x4c
   kmemleak_scan+0x4ac/0x680
   kmemleak_scan_thread+0xb4/0xdc
   kthread+0x12c/0x13c
   ret_from_fork+0x10/0x18
  Code: d503201f f9400660 36000040 d1000413 (f9400661)
  ---[ end trace 4d4bd7f573490c8e ]---
  Kernel panic - not syncing: Fatal exception
  SMP: stopping secondary CPUs
  Kernel Offset: disabled
  CPU features: 0x002,20000c38
  Memory Limit: none
  ---[ end Kernel panic - not syncing: Fatal exception ]---

Link: http://lkml.kernel.org/r/20190122132916.28360-1-cai@lca.pw
Fixes: 9f1eb38e0e11 ("mm, kmemleak: little optimization while scanning")
Signed-off-by: Qian Cai <cai@lca.pw>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 07da5c6c5ba0..368267c1b71b 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -21,14 +21,16 @@ struct vmem_altmap;
  * walkers which rely on the fully initialized page->flags and others
  * should use this rather than pfn_valid && pfn_to_page
  */
-#define pfn_to_online_page(pfn)				\
-({							\
-	struct page *___page = NULL;			\
-	unsigned long ___nr = pfn_to_section_nr(pfn);	\
-							\
-	if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr))\
-		___page = pfn_to_page(pfn);		\
-	___page;					\
+#define pfn_to_online_page(pfn)					   \
+({								   \
+	struct page *___page = NULL;				   \
+	unsigned long ___pfn = pfn;				   \
+	unsigned long ___nr = pfn_to_section_nr(___pfn);	   \
+								   \
+	if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr) && \
+	    pfn_valid_within(___pfn))				   \
+		___page = pfn_to_page(___pfn);			   \
+	___page;						   \
 })
 
 /*
-- 
cgit v1.2.3-71-gd317


From e6d429313ea5c776d2e76b4494df69102e6b7115 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 29 Jan 2019 17:44:36 -0500
Subject: x86/resctrl: Avoid confusion over the new X86_RESCTRL config

"Resource Control" is a very broad term for this CPU feature, and a term
that is also associated with containers, cgroups etc. This can easily
cause confusion.

Make the user prompt more specific. Match the config symbol name.

 [ bp: In the future, the corresponding ARM arch-specific code will be
   under ARM_CPU_RESCTRL and the arch-agnostic bits will be carved out
   under the CPU_RESCTRL umbrella symbol. ]

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Babu Moger <Babu.Moger@amd.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morse <james.morse@arm.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: linux-doc@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Pu Wen <puwen@hygon.cn>
Cc: Reinette Chatre <reinette.chatre@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190130195621.GA30653@cmpxchg.org
---
 Documentation/x86/resctrl_ui.txt     | 2 +-
 arch/x86/Kconfig                     | 6 +++---
 arch/x86/include/asm/resctrl_sched.h | 4 ++--
 arch/x86/kernel/cpu/Makefile         | 2 +-
 arch/x86/kernel/cpu/resctrl/Makefile | 4 ++--
 include/linux/sched.h                | 2 +-
 6 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/x86/resctrl_ui.txt b/Documentation/x86/resctrl_ui.txt
index e8e8d14d3c4e..c1f95b59e14d 100644
--- a/Documentation/x86/resctrl_ui.txt
+++ b/Documentation/x86/resctrl_ui.txt
@@ -9,7 +9,7 @@ Fenghua Yu <fenghua.yu@intel.com>
 Tony Luck <tony.luck@intel.com>
 Vikas Shivappa <vikas.shivappa@intel.com>
 
-This feature is enabled by the CONFIG_X86_RESCTRL and the x86 /proc/cpuinfo
+This feature is enabled by the CONFIG_X86_CPU_RESCTRL and the x86 /proc/cpuinfo
 flag bits:
 RDT (Resource Director Technology) Allocation - "rdt_a"
 CAT (Cache Allocation Technology) - "cat_l3", "cat_l2"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 26387c7bf305..68261430fe6e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -446,12 +446,12 @@ config RETPOLINE
 	  branches. Requires a compiler with -mindirect-branch=thunk-extern
 	  support for full protection. The kernel may run slower.
 
-config X86_RESCTRL
-	bool "Resource Control support"
+config X86_CPU_RESCTRL
+	bool "x86 CPU resource control support"
 	depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
 	select KERNFS
 	help
-	  Enable Resource Control support.
+	  Enable x86 CPU resource control support.
 
 	  Provide support for the allocation and monitoring of system resources
 	  usage by the CPU.
diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl_sched.h
index 40ebddde6ac2..f6b7fe2833cc 100644
--- a/arch/x86/include/asm/resctrl_sched.h
+++ b/arch/x86/include/asm/resctrl_sched.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_X86_RESCTRL_SCHED_H
 #define _ASM_X86_RESCTRL_SCHED_H
 
-#ifdef CONFIG_X86_RESCTRL
+#ifdef CONFIG_X86_CPU_RESCTRL
 
 #include <linux/sched.h>
 #include <linux/jump_label.h>
@@ -88,6 +88,6 @@ static inline void resctrl_sched_in(void)
 
 static inline void resctrl_sched_in(void) {}
 
-#endif /* CONFIG_X86_RESCTRL */
+#endif /* CONFIG_X86_CPU_RESCTRL */
 
 #endif /* _ASM_X86_RESCTRL_SCHED_H */
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index b6fa0869f7aa..cfd24f9f7614 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -39,7 +39,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 obj-$(CONFIG_X86_MCE)			+= mce/
 obj-$(CONFIG_MTRR)			+= mtrr/
 obj-$(CONFIG_MICROCODE)			+= microcode/
-obj-$(CONFIG_X86_RESCTRL)		+= resctrl/
+obj-$(CONFIG_X86_CPU_RESCTRL)		+= resctrl/
 
 obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o
 
diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
index 1cabe6fd8e11..4a06c37b9cf1 100644
--- a/arch/x86/kernel/cpu/resctrl/Makefile
+++ b/arch/x86/kernel/cpu/resctrl/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_X86_RESCTRL)	+= core.o rdtgroup.o monitor.o
-obj-$(CONFIG_X86_RESCTRL)	+= ctrlmondata.o pseudo_lock.o
+obj-$(CONFIG_X86_CPU_RESCTRL)	+= core.o rdtgroup.o monitor.o
+obj-$(CONFIG_X86_CPU_RESCTRL)	+= ctrlmondata.o pseudo_lock.o
 CFLAGS_pseudo_lock.o = -I$(src)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 224666226e87..8c328b14c424 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -995,7 +995,7 @@ struct task_struct {
 	/* cg_list protected by css_set_lock and tsk->alloc_lock: */
 	struct list_head		cg_list;
 #endif
-#ifdef CONFIG_X86_RESCTRL
+#ifdef CONFIG_X86_CPU_RESCTRL
 	u32				closid;
 	u32				rmid;
 #endif
-- 
cgit v1.2.3-71-gd317