From 36d8593ec74dc04d3bd7c1c897a7b7cfbd0b0dc6 Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Sat, 19 Feb 2011 15:34:50 +0000
Subject: Make clocksource name const

As nothing should be writing to the clocksource name string, make the
clocksource name pointer const.  Build-tested on ARM Versatile Express.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: John Stultz <johnstul@us.ibm.com>
---
 include/linux/clocksource.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index c37b21ad5a3b..94c1f38e922a 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -161,7 +161,7 @@ struct clocksource {
 	/*
 	 * First part of structure is read mostly
 	 */
-	char *name;
+	const char *name;
 	struct list_head list;
 	int rating;
 	cycle_t (*read)(struct clocksource *cs);
-- 
cgit v1.2.3-71-gd317


From 09e10d7fe509408d15818db6a0299f563668a7ba Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 16 Mar 2011 22:57:47 +0000
Subject: ASoC: Add WM8958 VSS support

With appropriate firmware the WM8958 can support Virtual Surround Sound or
VSS, widening the stereo audio image for improved user experience. Enable
support for this mode of operation when the appropriate firmware can be
loaded at runtime.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@ti.com>
---
 include/linux/mfd/wm8994/pdata.h |  34 ++++
 sound/soc/codecs/wm8958-dsp2.c   | 363 ++++++++++++++++++++++++++++++++++++++-
 sound/soc/codecs/wm8994.c        |   2 +
 sound/soc/codecs/wm8994.h        |  14 ++
 4 files changed, 405 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index 466b1c777aff..c72174aff1fe 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -32,6 +32,9 @@ struct wm8994_ldo_pdata {
 #define WM8994_EQ_REGS  20
 #define WM8958_MBC_CUTOFF_REGS 20
 #define WM8958_MBC_COEFF_REGS  48
+#define WM8958_MBC_COMBINED_REGS 56
+#define WM8958_VSS_HPF_REGS 2
+#define WM8958_VSS_REGS 148
 
 /**
  * DRC configurations are specified with a label and a set of register
@@ -71,6 +74,31 @@ struct wm8958_mbc_cfg {
 	const char *name;
 	u16 cutoff_regs[WM8958_MBC_CUTOFF_REGS];
 	u16 coeff_regs[WM8958_MBC_COEFF_REGS];
+
+	/* Coefficient layout when using MBC+VSS firmware */
+	u16 combined_regs[WM8958_MBC_COMBINED_REGS];
+};
+
+/**
+ * VSS HPF configurations are specified with a label and two values to
+ * write.  Configurations are expected to be generated using the
+ * multiband compressor configuration panel in WISCE - see
+ * http://www.wolfsonmicro.com/wisce/
+ */
+struct wm8958_vss_hpf_cfg {
+	const char *name;
+	u16 regs[WM8958_VSS_HPF_REGS];
+};
+
+/**
+ * VSS configurations are specified with a label and array of values
+ * to write.  Configurations are expected to be generated using the
+ * multiband compressor configuration panel in WISCE - see
+ * http://www.wolfsonmicro.com/wisce/
+ */
+struct wm8958_vss_cfg {
+	const char *name;
+	u16 regs[WM8958_VSS_REGS];
 };
 
 struct wm8994_pdata {
@@ -95,6 +123,12 @@ struct wm8994_pdata {
 	int num_mbc_cfgs;
 	struct wm8958_mbc_cfg *mbc_cfgs;
 
+	int num_vss_cfgs;
+	struct wm8958_vss_cfg *vss_cfgs;
+
+	int num_vss_hpf_cfgs;
+	struct wm8958_vss_hpf_cfg *vss_hpf_cfgs;
+
         /* LINEOUT can be differential or single ended */
         unsigned int lineout1_diff:1;
         unsigned int lineout2_diff:1;
diff --git a/sound/soc/codecs/wm8958-dsp2.c b/sound/soc/codecs/wm8958-dsp2.c
index 9c1cbe5b61ae..d0e257315d97 100644
--- a/sound/soc/codecs/wm8958-dsp2.c
+++ b/sound/soc/codecs/wm8958-dsp2.c
@@ -233,6 +233,68 @@ static void wm8958_dsp_start_mbc(struct snd_soc_codec *codec, int path)
 			    WM8958_MBC_ENA);
 }
 
+static void wm8958_dsp_start_vss(struct snd_soc_codec *codec, int path)
+{
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+	struct wm8994_pdata *pdata = wm8994->pdata;
+	int i, ena;
+
+	if (wm8994->mbc_vss)
+		wm8958_dsp2_fw(codec, "MBC+VSS", wm8994->mbc_vss, false);
+
+	snd_soc_update_bits(codec, WM8958_DSP2_PROGRAM,
+			    WM8958_DSP2_ENA, WM8958_DSP2_ENA);
+
+	/* If we've got user supplied settings use them */
+	if (pdata && pdata->num_mbc_cfgs) {
+		struct wm8958_mbc_cfg *cfg
+			= &pdata->mbc_cfgs[wm8994->mbc_cfg];
+
+		for (i = 0; i < ARRAY_SIZE(cfg->combined_regs); i++)
+			snd_soc_write(codec, i + 0x2800,
+				      cfg->combined_regs[i]);
+	}
+
+	if (pdata && pdata->num_vss_cfgs) {
+		struct wm8958_vss_cfg *cfg
+			= &pdata->vss_cfgs[wm8994->vss_cfg];
+
+		for (i = 0; i < ARRAY_SIZE(cfg->regs); i++)
+			snd_soc_write(codec, i + 0x2600, cfg->regs[i]);
+	}
+
+	if (pdata && pdata->num_vss_hpf_cfgs) {
+		struct wm8958_vss_hpf_cfg *cfg
+			= &pdata->vss_hpf_cfgs[wm8994->vss_hpf_cfg];
+
+		for (i = 0; i < ARRAY_SIZE(cfg->regs); i++)
+			snd_soc_write(codec, i + 0x2400, cfg->regs[i]);
+	}
+
+	/* Run the DSP */
+	snd_soc_write(codec, WM8958_DSP2_EXECCONTROL,
+		      WM8958_DSP2_RUNR);
+
+	/* Enable the algorithms we've selected */
+	ena = 0;
+	if (wm8994->mbc_ena[path])
+		ena |= 0x8;
+	if (wm8994->hpf2_ena[path])
+		ena |= 0x4;
+	if (wm8994->hpf1_ena[path])
+		ena |= 0x2;
+	if (wm8994->vss_ena[path])
+		ena |= 0x1;
+
+	snd_soc_write(codec, 0x2201, ena);
+
+	/* Switch the DSP into the data path */
+	snd_soc_update_bits(codec, WM8958_DSP2_CONFIG,
+			    WM8958_MBC_SEL_MASK | WM8958_MBC_ENA,
+			    path << WM8958_MBC_SEL_SHIFT | WM8958_MBC_ENA);
+}
+
+
 static void wm8958_dsp_apply(struct snd_soc_codec *codec, int path, int start)
 {
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
@@ -258,7 +320,8 @@ static void wm8958_dsp_apply(struct snd_soc_codec *codec, int path, int start)
 	}
 
 	/* Do we have both an active AIF and an active algorithm? */
-	ena = wm8994->mbc_ena[path];
+	ena = wm8994->mbc_ena[path] || wm8994->vss_ena[path] ||
+		wm8994->hpf1_ena[path] || wm8994->hpf2_ena[path];
 	if (!pwr_reg)
 		ena = 0;
 
@@ -281,11 +344,18 @@ static void wm8958_dsp_apply(struct snd_soc_codec *codec, int path, int start)
 				    aif << WM8958_DSP2CLK_SRC_SHIFT |
 				    WM8958_DSP2CLK_ENA);
 
-		if (wm8994->mbc_ena[path])
+		if (wm8994->vss_ena[path] || wm8994->hpf1_ena[path] ||
+		    wm8994->hpf2_ena[path])
+			wm8958_dsp_start_vss(codec, path);
+		else if (wm8994->mbc_ena[path])
 			wm8958_dsp_start_mbc(codec, path);
 
-		dev_dbg(codec->dev, "DSP running\n");
-	} else {
+		wm8994->dsp_active = path;
+
+		dev_dbg(codec->dev, "DSP running in path %d\n", path);
+	}
+
+	if (!start && wm8994->dsp_active == path) {
 		/* If the DSP is already stopped then noop */
 		if (!(reg & WM8958_DSP2_ENA))
 			return;
@@ -335,7 +405,8 @@ static int wm8958_dsp2_busy(struct wm8994_priv *wm8994, int aif)
 	for (i = 0; i < ARRAY_SIZE(wm8994->mbc_ena); i++) {
 		if (i == aif)
 			continue;
-		if (wm8994->mbc_ena[i])
+		if (wm8994->mbc_ena[i] || wm8994->vss_ena[i] ||
+		    wm8994->hpf1_ena[i] || wm8994->hpf2_ena[i])
 			return 1;
 	}
 
@@ -426,22 +497,239 @@ static int wm8958_mbc_put(struct snd_kcontrol *kcontrol,
 	.get = wm8958_mbc_get, .put = wm8958_mbc_put, \
 	.private_value = xval }
 
+static int wm8958_put_vss_enum(struct snd_kcontrol *kcontrol,
+			       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+	struct wm8994_pdata *pdata = wm8994->pdata;
+	int value = ucontrol->value.integer.value[0];
+	int reg;
+
+	/* Don't allow on the fly reconfiguration */
+	reg = snd_soc_read(codec, WM8994_CLOCKING_1);
+	if (reg < 0 || reg & WM8958_DSP2CLK_ENA)
+		return -EBUSY;
+
+	if (value >= pdata->num_vss_cfgs)
+		return -EINVAL;
+
+	wm8994->vss_cfg = value;
+
+	return 0;
+}
+
+static int wm8958_get_vss_enum(struct snd_kcontrol *kcontrol,
+			       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	ucontrol->value.enumerated.item[0] = wm8994->vss_cfg;
+
+	return 0;
+}
+
+static int wm8958_put_vss_hpf_enum(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+	struct wm8994_pdata *pdata = wm8994->pdata;
+	int value = ucontrol->value.integer.value[0];
+	int reg;
+
+	/* Don't allow on the fly reconfiguration */
+	reg = snd_soc_read(codec, WM8994_CLOCKING_1);
+	if (reg < 0 || reg & WM8958_DSP2CLK_ENA)
+		return -EBUSY;
+
+	if (value >= pdata->num_vss_hpf_cfgs)
+		return -EINVAL;
+
+	wm8994->vss_hpf_cfg = value;
+
+	return 0;
+}
+
+static int wm8958_get_vss_hpf_enum(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	ucontrol->value.enumerated.item[0] = wm8994->vss_hpf_cfg;
+
+	return 0;
+}
+
+static int wm8958_vss_info(struct snd_kcontrol *kcontrol,
+			   struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 1;
+	return 0;
+}
+
+static int wm8958_vss_get(struct snd_kcontrol *kcontrol,
+			  struct snd_ctl_elem_value *ucontrol)
+{
+	int vss = kcontrol->private_value;
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	ucontrol->value.integer.value[0] = wm8994->vss_ena[vss];
+
+	return 0;
+}
+
+static int wm8958_vss_put(struct snd_kcontrol *kcontrol,
+			  struct snd_ctl_elem_value *ucontrol)
+{
+	int vss = kcontrol->private_value;
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	if (ucontrol->value.integer.value[0] > 1)
+		return -EINVAL;
+
+	if (!wm8994->mbc_vss)
+		return -ENODEV;
+
+	if (wm8958_dsp2_busy(wm8994, vss)) {
+		dev_dbg(codec->dev, "DSP2 active on %d already\n", vss);
+		return -EBUSY;
+	}
+
+	wm8994->vss_ena[vss] = ucontrol->value.integer.value[0];
+
+	wm8958_dsp_apply(codec, vss, wm8994->vss_ena[vss]);
+
+	return 0;
+}
+
+
+#define WM8958_VSS_SWITCH(xname, xval) {\
+	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname), \
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,\
+	.info = wm8958_vss_info, \
+	.get = wm8958_vss_get, .put = wm8958_vss_put, \
+	.private_value = xval }
+
+static int wm8958_hpf_info(struct snd_kcontrol *kcontrol,
+			   struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 1;
+	return 0;
+}
+
+static int wm8958_hpf_get(struct snd_kcontrol *kcontrol,
+			  struct snd_ctl_elem_value *ucontrol)
+{
+	int hpf = kcontrol->private_value;
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	if (hpf < 3)
+		ucontrol->value.integer.value[0] = wm8994->hpf1_ena[hpf % 3];
+	else
+		ucontrol->value.integer.value[0] = wm8994->hpf2_ena[hpf % 3];
+
+	return 0;
+}
+
+static int wm8958_hpf_put(struct snd_kcontrol *kcontrol,
+			  struct snd_ctl_elem_value *ucontrol)
+{
+	int hpf = kcontrol->private_value;
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	if (ucontrol->value.integer.value[0] > 1)
+		return -EINVAL;
+
+	if (!wm8994->mbc_vss)
+		return -ENODEV;
+
+	if (wm8958_dsp2_busy(wm8994, hpf % 3)) {
+		dev_dbg(codec->dev, "DSP2 active on %d already\n", hpf);
+		return -EBUSY;
+	}
+
+	if (wm8994->eq[hpf % 3])
+		return -EBUSY;
+
+	if (hpf < 3)
+		wm8994->hpf1_ena[hpf % 3] = ucontrol->value.integer.value[0];
+	else
+		wm8994->hpf2_ena[hpf % 3] = ucontrol->value.integer.value[0];
+
+	wm8958_dsp_apply(codec, hpf % 3, ucontrol->value.integer.value[0]);
+
+	return 0;
+}
+
+#define WM8958_HPF_SWITCH(xname, xval) {\
+	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname), \
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,\
+	.info = wm8958_hpf_info, \
+	.get = wm8958_hpf_get, .put = wm8958_hpf_put, \
+	.private_value = xval }
+
 static const struct snd_kcontrol_new wm8958_mbc_snd_controls[] = {
 WM8958_MBC_SWITCH("AIF1DAC1 MBC Switch", 0),
 WM8958_MBC_SWITCH("AIF1DAC2 MBC Switch", 1),
 WM8958_MBC_SWITCH("AIF2DAC MBC Switch", 2),
 };
 
-static void wm8958_mbc_loaded(const struct firmware *fw, void *context)
+static const struct snd_kcontrol_new wm8958_vss_snd_controls[] = {
+WM8958_VSS_SWITCH("AIF1DAC1 VSS Switch", 0),
+WM8958_VSS_SWITCH("AIF1DAC2 VSS Switch", 1),
+WM8958_VSS_SWITCH("AIF2DAC VSS Switch", 2),
+WM8958_HPF_SWITCH("AIF1DAC1 HPF1 Switch", 0),
+WM8958_HPF_SWITCH("AIF1DAC2 HPF1 Switch", 1),
+WM8958_HPF_SWITCH("AIF2DAC HPF1 Switch", 2),
+WM8958_HPF_SWITCH("AIF1DAC1 HPF2 Switch", 3),
+WM8958_HPF_SWITCH("AIF1DAC2 HPF2 Switch", 4),
+WM8958_HPF_SWITCH("AIF2DAC HPF2 Switch", 5),
+};
+
+static void wm8958_mbc_vss_loaded(const struct firmware *fw, void *context)
 {
 	struct snd_soc_codec *codec = context;
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
 
-	if (fw && wm8958_dsp2_fw(codec, "MBC", fw, true) != 0) {
+	if (fw && (wm8958_dsp2_fw(codec, "MBC+VSS", fw, true) == 0)) {
 		mutex_lock(&codec->mutex);
-		wm8994->mbc = fw;
+		wm8994->mbc_vss = fw;
 		mutex_unlock(&codec->mutex);
 	}
+
+}
+
+static void wm8958_mbc_loaded(const struct firmware *fw, void *context)
+{
+	struct snd_soc_codec *codec = context;
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	if (wm8958_dsp2_fw(codec, "MBC", fw, true) != 0)
+		return;
+
+	mutex_lock(&codec->mutex);
+	wm8994->mbc = fw;
+	mutex_unlock(&codec->mutex);
+
+	/* We can't have more than one request outstanding at once so
+	 * we daisy chain.
+	 */
+	request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG,
+				"wm8958_mbc_vss.wfw", codec->dev, GFP_KERNEL,
+				codec, wm8958_mbc_vss_loaded);
 }
 
 void wm8958_dsp2_init(struct snd_soc_codec *codec)
@@ -454,6 +742,9 @@ void wm8958_dsp2_init(struct snd_soc_codec *codec)
 
 	snd_soc_add_controls(codec, wm8958_mbc_snd_controls,
 			     ARRAY_SIZE(wm8958_mbc_snd_controls));
+	snd_soc_add_controls(codec, wm8958_vss_snd_controls,
+			     ARRAY_SIZE(wm8958_vss_snd_controls));
+
 
 	/* We don't *require* firmware and don't want to delay boot */
 	request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG,
@@ -491,5 +782,61 @@ void wm8958_dsp2_init(struct snd_soc_codec *codec)
 				"Failed to add MBC mode controls: %d\n", ret);
 	}
 
+	if (pdata->num_vss_cfgs) {
+		struct snd_kcontrol_new control[] = {
+			SOC_ENUM_EXT("VSS Mode", wm8994->vss_enum,
+				     wm8958_get_vss_enum, wm8958_put_vss_enum),
+		};
 
+		/* We need an array of texts for the enum API */
+		wm8994->vss_texts = kmalloc(sizeof(char *)
+					    * pdata->num_vss_cfgs, GFP_KERNEL);
+		if (!wm8994->vss_texts) {
+			dev_err(wm8994->codec->dev,
+				"Failed to allocate %d VSS config texts\n",
+				pdata->num_vss_cfgs);
+			return;
+		}
+
+		for (i = 0; i < pdata->num_vss_cfgs; i++)
+			wm8994->vss_texts[i] = pdata->vss_cfgs[i].name;
+
+		wm8994->vss_enum.max = pdata->num_vss_cfgs;
+		wm8994->vss_enum.texts = wm8994->vss_texts;
+
+		ret = snd_soc_add_controls(wm8994->codec, control, 1);
+		if (ret != 0)
+			dev_err(wm8994->codec->dev,
+				"Failed to add VSS mode controls: %d\n", ret);
+	}
+
+	if (pdata->num_vss_hpf_cfgs) {
+		struct snd_kcontrol_new control[] = {
+			SOC_ENUM_EXT("VSS HPF Mode", wm8994->vss_hpf_enum,
+				     wm8958_get_vss_hpf_enum,
+				     wm8958_put_vss_hpf_enum),
+		};
+
+		/* We need an array of texts for the enum API */
+		wm8994->vss_hpf_texts = kmalloc(sizeof(char *)
+						* pdata->num_vss_hpf_cfgs, GFP_KERNEL);
+		if (!wm8994->vss_hpf_texts) {
+			dev_err(wm8994->codec->dev,
+				"Failed to allocate %d VSS HPF config texts\n",
+				pdata->num_vss_hpf_cfgs);
+			return;
+		}
+
+		for (i = 0; i < pdata->num_vss_hpf_cfgs; i++)
+			wm8994->vss_hpf_texts[i] = pdata->vss_hpf_cfgs[i].name;
+
+		wm8994->vss_hpf_enum.max = pdata->num_vss_hpf_cfgs;
+		wm8994->vss_hpf_enum.texts = wm8994->vss_hpf_texts;
+
+		ret = snd_soc_add_controls(wm8994->codec, control, 1);
+		if (ret != 0)
+			dev_err(wm8994->codec->dev,
+				"Failed to add VSS HPFmode controls: %d\n",
+				ret);
+	}
 }
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index f622ff691b41..01ef5704091e 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -3140,6 +3140,8 @@ static int  wm8994_codec_remove(struct snd_soc_codec *codec)
 	}
 	if (wm8994->mbc)
 		release_firmware(wm8994->mbc);
+	if (wm8994->mbc_vss)
+		release_firmware(wm8994->mbc_vss);
 	kfree(wm8994->retune_mobile_texts);
 	kfree(wm8994->drc_texts);
 	kfree(wm8994);
diff --git a/sound/soc/codecs/wm8994.h b/sound/soc/codecs/wm8994.h
index a4bfde83065f..f337f3d50590 100644
--- a/sound/soc/codecs/wm8994.h
+++ b/sound/soc/codecs/wm8994.h
@@ -84,6 +84,9 @@ struct wm8994_priv {
 	int lrclk_shared[2];
 
 	int mbc_ena[3];
+	int hpf1_ena[3];
+	int hpf2_ena[3];
+	int vss_ena[3];
 
 	/* Platform dependant DRC configuration */
 	const char **drc_texts;
@@ -101,6 +104,16 @@ struct wm8994_priv {
 	const char **mbc_texts;
 	struct soc_enum mbc_enum;
 
+	/* Platform dependant VSS configuration */
+	int vss_cfg;
+	const char **vss_texts;
+	struct soc_enum vss_enum;
+
+	/* Platform dependant VSS HPF configuration */
+	int vss_hpf_cfg;
+	const char **vss_hpf_texts;
+	struct soc_enum vss_hpf_enum;
+
 	struct wm8994_micdet micdet[2];
 
 	wm8958_micdet_cb jack_cb;
@@ -119,6 +132,7 @@ struct wm8994_priv {
 	int dsp_active;
 	const struct firmware *cur_fw;
 	const struct firmware *mbc;
+	const struct firmware *mbc_vss;
 };
 
 #endif
-- 
cgit v1.2.3-71-gd317


From 312158718fe2056703b2744801165a9574560495 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 17 Mar 2011 20:23:43 +0000
Subject: ASoC: Add WM8958 enhanced EQ support

DSP2 in the WM8958 can be used to support an upgraded EQ for use in
demanding applications.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@ti.com>
---
 include/linux/mfd/wm8994/pdata.h |  15 +++
 sound/soc/codecs/wm8958-dsp2.c   | 192 ++++++++++++++++++++++++++++++++++++++-
 sound/soc/codecs/wm8994.c        |   2 +
 sound/soc/codecs/wm8994.h        |   7 ++
 4 files changed, 213 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index c72174aff1fe..d12f8d635a81 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -35,6 +35,7 @@ struct wm8994_ldo_pdata {
 #define WM8958_MBC_COMBINED_REGS 56
 #define WM8958_VSS_HPF_REGS 2
 #define WM8958_VSS_REGS 148
+#define WM8958_ENH_EQ_REGS 32
 
 /**
  * DRC configurations are specified with a label and a set of register
@@ -101,6 +102,17 @@ struct wm8958_vss_cfg {
 	u16 regs[WM8958_VSS_REGS];
 };
 
+/**
+ * Enhanced EQ configurations are specified with a label and array of
+ * values to write.  Configurations are expected to be generated using
+ * the multiband compressor configuration panel in WISCE - see
+ * http://www.wolfsonmicro.com/wisce/
+ */
+struct wm8958_enh_eq_cfg {
+	const char *name;
+	u16 regs[WM8958_ENH_EQ_REGS];
+};
+
 struct wm8994_pdata {
 	int gpio_base;
 
@@ -129,6 +141,9 @@ struct wm8994_pdata {
 	int num_vss_hpf_cfgs;
 	struct wm8958_vss_hpf_cfg *vss_hpf_cfgs;
 
+	int num_enh_eq_cfgs;
+	struct wm8958_enh_eq_cfg *enh_eq_cfgs;
+
         /* LINEOUT can be differential or single ended */
         unsigned int lineout1_diff:1;
         unsigned int lineout2_diff:1;
diff --git a/sound/soc/codecs/wm8958-dsp2.c b/sound/soc/codecs/wm8958-dsp2.c
index d0e257315d97..74983ee2b87f 100644
--- a/sound/soc/codecs/wm8958-dsp2.c
+++ b/sound/soc/codecs/wm8958-dsp2.c
@@ -294,6 +294,36 @@ static void wm8958_dsp_start_vss(struct snd_soc_codec *codec, int path)
 			    path << WM8958_MBC_SEL_SHIFT | WM8958_MBC_ENA);
 }
 
+static void wm8958_dsp_start_enh_eq(struct snd_soc_codec *codec, int path)
+{
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+	struct wm8994_pdata *pdata = wm8994->pdata;
+	int i;
+
+	wm8958_dsp2_fw(codec, "ENH_EQ", wm8994->enh_eq, false);
+
+	snd_soc_update_bits(codec, WM8958_DSP2_PROGRAM,
+			    WM8958_DSP2_ENA, WM8958_DSP2_ENA);
+
+	/* If we've got user supplied settings use them */
+	if (pdata && pdata->num_enh_eq_cfgs) {
+		struct wm8958_enh_eq_cfg *cfg
+			= &pdata->enh_eq_cfgs[wm8994->enh_eq_cfg];
+
+		for (i = 0; i < ARRAY_SIZE(cfg->regs); i++)
+			snd_soc_write(codec, i + 0x2200,
+				      cfg->regs[i]);
+	}
+
+	/* Run the DSP */
+	snd_soc_write(codec, WM8958_DSP2_EXECCONTROL,
+		      WM8958_DSP2_RUNR);
+
+	/* Switch the DSP into the data path */
+	snd_soc_update_bits(codec, WM8958_DSP2_CONFIG,
+			    WM8958_MBC_SEL_MASK | WM8958_MBC_ENA,
+			    path << WM8958_MBC_SEL_SHIFT | WM8958_MBC_ENA);
+}
 
 static void wm8958_dsp_apply(struct snd_soc_codec *codec, int path, int start)
 {
@@ -321,7 +351,8 @@ static void wm8958_dsp_apply(struct snd_soc_codec *codec, int path, int start)
 
 	/* Do we have both an active AIF and an active algorithm? */
 	ena = wm8994->mbc_ena[path] || wm8994->vss_ena[path] ||
-		wm8994->hpf1_ena[path] || wm8994->hpf2_ena[path];
+		wm8994->hpf1_ena[path] || wm8994->hpf2_ena[path] ||
+		wm8994->enh_eq_ena[path];
 	if (!pwr_reg)
 		ena = 0;
 
@@ -344,7 +375,9 @@ static void wm8958_dsp_apply(struct snd_soc_codec *codec, int path, int start)
 				    aif << WM8958_DSP2CLK_SRC_SHIFT |
 				    WM8958_DSP2CLK_ENA);
 
-		if (wm8994->vss_ena[path] || wm8994->hpf1_ena[path] ||
+		if (wm8994->enh_eq_ena[path])
+			wm8958_dsp_start_enh_eq(codec, path);
+		else if (wm8994->vss_ena[path] || wm8994->hpf1_ena[path] ||
 		    wm8994->hpf2_ena[path])
 			wm8958_dsp_start_vss(codec, path);
 		else if (wm8994->mbc_ena[path])
@@ -483,6 +516,9 @@ static int wm8958_mbc_put(struct snd_kcontrol *kcontrol,
 		return -EBUSY;
 	}
 
+	if (wm8994->enh_eq_ena[mbc])
+		return -EBUSY;
+
 	wm8994->mbc_ena[mbc] = ucontrol->value.integer.value[0];
 
 	wm8958_dsp_apply(codec, mbc, wm8994->mbc_ena[mbc]);
@@ -603,6 +639,9 @@ static int wm8958_vss_put(struct snd_kcontrol *kcontrol,
 		return -EBUSY;
 	}
 
+	if (wm8994->enh_eq_ena[vss])
+		return -EBUSY;
+
 	wm8994->vss_ena[vss] = ucontrol->value.integer.value[0];
 
 	wm8958_dsp_apply(codec, vss, wm8994->vss_ena[vss]);
@@ -661,7 +700,7 @@ static int wm8958_hpf_put(struct snd_kcontrol *kcontrol,
 		return -EBUSY;
 	}
 
-	if (wm8994->eq[hpf % 3])
+	if (wm8994->enh_eq_ena[hpf % 3])
 		return -EBUSY;
 
 	if (hpf < 3)
@@ -681,6 +720,97 @@ static int wm8958_hpf_put(struct snd_kcontrol *kcontrol,
 	.get = wm8958_hpf_get, .put = wm8958_hpf_put, \
 	.private_value = xval }
 
+static int wm8958_put_enh_eq_enum(struct snd_kcontrol *kcontrol,
+				  struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+	struct wm8994_pdata *pdata = wm8994->pdata;
+	int value = ucontrol->value.integer.value[0];
+	int reg;
+
+	/* Don't allow on the fly reconfiguration */
+	reg = snd_soc_read(codec, WM8994_CLOCKING_1);
+	if (reg < 0 || reg & WM8958_DSP2CLK_ENA)
+		return -EBUSY;
+
+	if (value >= pdata->num_enh_eq_cfgs)
+		return -EINVAL;
+
+	wm8994->enh_eq_cfg = value;
+
+	return 0;
+}
+
+static int wm8958_get_enh_eq_enum(struct snd_kcontrol *kcontrol,
+				  struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	ucontrol->value.enumerated.item[0] = wm8994->enh_eq_cfg;
+
+	return 0;
+}
+
+static int wm8958_enh_eq_info(struct snd_kcontrol *kcontrol,
+			   struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 1;
+	return 0;
+}
+
+static int wm8958_enh_eq_get(struct snd_kcontrol *kcontrol,
+			  struct snd_ctl_elem_value *ucontrol)
+{
+	int eq = kcontrol->private_value;
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	ucontrol->value.integer.value[0] = wm8994->enh_eq_ena[eq];
+
+	return 0;
+}
+
+static int wm8958_enh_eq_put(struct snd_kcontrol *kcontrol,
+			  struct snd_ctl_elem_value *ucontrol)
+{
+	int eq = kcontrol->private_value;
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	if (ucontrol->value.integer.value[0] > 1)
+		return -EINVAL;
+
+	if (!wm8994->enh_eq)
+		return -ENODEV;
+
+	if (wm8958_dsp2_busy(wm8994, eq)) {
+		dev_dbg(codec->dev, "DSP2 active on %d already\n", eq);
+		return -EBUSY;
+	}
+
+	if (wm8994->mbc_ena[eq] || wm8994->vss_ena[eq] ||
+	    wm8994->hpf1_ena[eq] || wm8994->hpf2_ena[eq])
+		return -EBUSY;
+
+	wm8994->enh_eq_ena[eq] = ucontrol->value.integer.value[0];
+
+	wm8958_dsp_apply(codec, eq, ucontrol->value.integer.value[0]);
+
+	return 0;
+}
+
+#define WM8958_ENH_EQ_SWITCH(xname, xval) {\
+	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname), \
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,\
+	.info = wm8958_enh_eq_info, \
+	.get = wm8958_enh_eq_get, .put = wm8958_enh_eq_put, \
+	.private_value = xval }
+
 static const struct snd_kcontrol_new wm8958_mbc_snd_controls[] = {
 WM8958_MBC_SWITCH("AIF1DAC1 MBC Switch", 0),
 WM8958_MBC_SWITCH("AIF1DAC2 MBC Switch", 1),
@@ -699,6 +829,24 @@ WM8958_HPF_SWITCH("AIF1DAC2 HPF2 Switch", 4),
 WM8958_HPF_SWITCH("AIF2DAC HPF2 Switch", 5),
 };
 
+static const struct snd_kcontrol_new wm8958_enh_eq_snd_controls[] = {
+WM8958_ENH_EQ_SWITCH("AIF1DAC1 Enhanced EQ Switch", 0),
+WM8958_ENH_EQ_SWITCH("AIF1DAC2 Enhanced EQ Switch", 1),
+WM8958_ENH_EQ_SWITCH("AIF2DAC Enhanced EQ Switch", 2),
+};
+
+static void wm8958_enh_eq_loaded(const struct firmware *fw, void *context)
+{
+	struct snd_soc_codec *codec = context;
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	if (fw && (wm8958_dsp2_fw(codec, "ENH_EQ", fw, true) == 0)) {
+		mutex_lock(&codec->mutex);
+		wm8994->enh_eq = fw;
+		mutex_unlock(&codec->mutex);
+	}
+}
+
 static void wm8958_mbc_vss_loaded(const struct firmware *fw, void *context)
 {
 	struct snd_soc_codec *codec = context;
@@ -710,6 +858,12 @@ static void wm8958_mbc_vss_loaded(const struct firmware *fw, void *context)
 		mutex_unlock(&codec->mutex);
 	}
 
+	/* We can't have more than one request outstanding at once so
+	 * we daisy chain.
+	 */
+	request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG,
+				"wm8958_enh_eq.wfw", codec->dev, GFP_KERNEL,
+				codec, wm8958_enh_eq_loaded);
 }
 
 static void wm8958_mbc_loaded(const struct firmware *fw, void *context)
@@ -744,6 +898,8 @@ void wm8958_dsp2_init(struct snd_soc_codec *codec)
 			     ARRAY_SIZE(wm8958_mbc_snd_controls));
 	snd_soc_add_controls(codec, wm8958_vss_snd_controls,
 			     ARRAY_SIZE(wm8958_vss_snd_controls));
+	snd_soc_add_controls(codec, wm8958_enh_eq_snd_controls,
+			     ARRAY_SIZE(wm8958_enh_eq_snd_controls));
 
 
 	/* We don't *require* firmware and don't want to delay boot */
@@ -839,4 +995,34 @@ void wm8958_dsp2_init(struct snd_soc_codec *codec)
 				"Failed to add VSS HPFmode controls: %d\n",
 				ret);
 	}
+
+	if (pdata->num_enh_eq_cfgs) {
+		struct snd_kcontrol_new control[] = {
+			SOC_ENUM_EXT("Enhanced EQ Mode", wm8994->enh_eq_enum,
+				     wm8958_get_enh_eq_enum,
+				     wm8958_put_enh_eq_enum),
+		};
+
+		/* We need an array of texts for the enum API */
+		wm8994->enh_eq_texts = kmalloc(sizeof(char *)
+						* pdata->num_enh_eq_cfgs, GFP_KERNEL);
+		if (!wm8994->enh_eq_texts) {
+			dev_err(wm8994->codec->dev,
+				"Failed to allocate %d enhanced EQ config texts\n",
+				pdata->num_enh_eq_cfgs);
+			return;
+		}
+
+		for (i = 0; i < pdata->num_enh_eq_cfgs; i++)
+			wm8994->enh_eq_texts[i] = pdata->enh_eq_cfgs[i].name;
+
+		wm8994->enh_eq_enum.max = pdata->num_enh_eq_cfgs;
+		wm8994->enh_eq_enum.texts = wm8994->enh_eq_texts;
+
+		ret = snd_soc_add_controls(wm8994->codec, control, 1);
+		if (ret != 0)
+			dev_err(wm8994->codec->dev,
+				"Failed to add enhanced EQ controls: %d\n",
+				ret);
+	}
 }
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 01ef5704091e..03ef62462bab 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -3142,6 +3142,8 @@ static int  wm8994_codec_remove(struct snd_soc_codec *codec)
 		release_firmware(wm8994->mbc);
 	if (wm8994->mbc_vss)
 		release_firmware(wm8994->mbc_vss);
+	if (wm8994->enh_eq)
+		release_firmware(wm8994->enh_eq);
 	kfree(wm8994->retune_mobile_texts);
 	kfree(wm8994->drc_texts);
 	kfree(wm8994);
diff --git a/sound/soc/codecs/wm8994.h b/sound/soc/codecs/wm8994.h
index f337f3d50590..0a1db04b73bd 100644
--- a/sound/soc/codecs/wm8994.h
+++ b/sound/soc/codecs/wm8994.h
@@ -87,6 +87,7 @@ struct wm8994_priv {
 	int hpf1_ena[3];
 	int hpf2_ena[3];
 	int vss_ena[3];
+	int enh_eq_ena[3];
 
 	/* Platform dependant DRC configuration */
 	const char **drc_texts;
@@ -114,6 +115,11 @@ struct wm8994_priv {
 	const char **vss_hpf_texts;
 	struct soc_enum vss_hpf_enum;
 
+	/* Platform dependant enhanced EQ configuration */
+	int enh_eq_cfg;
+	const char **enh_eq_texts;
+	struct soc_enum enh_eq_enum;
+
 	struct wm8994_micdet micdet[2];
 
 	wm8958_micdet_cb jack_cb;
@@ -133,6 +139,7 @@ struct wm8994_priv {
 	const struct firmware *cur_fw;
 	const struct firmware *mbc;
 	const struct firmware *mbc_vss;
+	const struct firmware *enh_eq;
 };
 
 #endif
-- 
cgit v1.2.3-71-gd317


From edf2ed153bcae52de70db00a98b0e81a5668e563 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Mar 2011 10:37:00 +0100
Subject: ptrace: Kill tracehook_notify_jctl()

tracehook_notify_jctl() aids in determining whether and what to report
to the parent when a task is stopped or continued.  The function also
adds an extra requirement that siglock may be released across it,
which is currently unused and quite difficult to satisfy in
well-defined manner.

As job control and the notifications are about to receive major
overhaul, remove the tracehook and open code it.  If ever necessary,
let's factor it out after the overhaul.

* Oleg spotted incorrect CLD_CONTINUED/STOPPED selection when ptraced.
  Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
---
 include/linux/tracehook.h | 27 ---------------------------
 kernel/signal.c           | 34 ++++++++++++++--------------------
 2 files changed, 14 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 3a2e66d88a32..b073f3c8adc3 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -468,33 +468,6 @@ static inline int tracehook_get_signal(struct task_struct *task,
 	return 0;
 }
 
-/**
- * tracehook_notify_jctl - report about job control stop/continue
- * @notify:		zero, %CLD_STOPPED or %CLD_CONTINUED
- * @why:		%CLD_STOPPED or %CLD_CONTINUED
- *
- * This is called when we might call do_notify_parent_cldstop().
- *
- * @notify is zero if we would not ordinarily send a %SIGCHLD,
- * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD.
- *
- * @why is %CLD_STOPPED when about to stop for job control;
- * we are already in %TASK_STOPPED state, about to call schedule().
- * It might also be that we have just exited (check %PF_EXITING),
- * but need to report that a group-wide stop is complete.
- *
- * @why is %CLD_CONTINUED when waking up after job control stop and
- * ready to make a delayed @notify report.
- *
- * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal.
- *
- * Called with the siglock held.
- */
-static inline int tracehook_notify_jctl(int notify, int why)
-{
-	return notify ?: (current->ptrace & PT_PTRACED) ? why : 0;
-}
-
 /**
  * tracehook_finish_jctl - report about return from job control stop
  *
diff --git a/kernel/signal.c b/kernel/signal.c
index f4db76986ec1..03d874e1058f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1727,7 +1727,7 @@ void ptrace_notify(int exit_code)
 static int do_signal_stop(int signr)
 {
 	struct signal_struct *sig = current->signal;
-	int notify;
+	int notify = 0;
 
 	if (!sig->group_stop_count) {
 		struct task_struct *t;
@@ -1759,19 +1759,16 @@ static int do_signal_stop(int signr)
 	 * a group stop in progress and we are the last to stop, report
 	 * to the parent.  When ptraced, every thread reports itself.
 	 */
-	notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0;
-	notify = tracehook_notify_jctl(notify, CLD_STOPPED);
-	/*
-	 * tracehook_notify_jctl() can drop and reacquire siglock, so
-	 * we keep ->group_stop_count != 0 before the call. If SIGCONT
-	 * or SIGKILL comes in between ->group_stop_count == 0.
-	 */
-	if (sig->group_stop_count) {
-		if (!--sig->group_stop_count)
-			sig->flags = SIGNAL_STOP_STOPPED;
-		current->exit_code = sig->group_exit_code;
-		__set_current_state(TASK_STOPPED);
+	if (!--sig->group_stop_count) {
+		sig->flags = SIGNAL_STOP_STOPPED;
+		notify = CLD_STOPPED;
 	}
+	if (task_ptrace(current))
+		notify = CLD_STOPPED;
+
+	current->exit_code = sig->group_exit_code;
+	__set_current_state(TASK_STOPPED);
+
 	spin_unlock_irq(&current->sighand->siglock);
 
 	if (notify) {
@@ -1860,14 +1857,11 @@ relock:
 
 		signal->flags &= ~SIGNAL_CLD_MASK;
 
-		why = tracehook_notify_jctl(why, CLD_CONTINUED);
 		spin_unlock_irq(&sighand->siglock);
 
-		if (why) {
-			read_lock(&tasklist_lock);
-			do_notify_parent_cldstop(current->group_leader, why);
-			read_unlock(&tasklist_lock);
-		}
+		read_lock(&tasklist_lock);
+		do_notify_parent_cldstop(current->group_leader, why);
+		read_unlock(&tasklist_lock);
 		goto relock;
 	}
 
@@ -2034,7 +2028,7 @@ void exit_signals(struct task_struct *tsk)
 	if (unlikely(tsk->signal->group_stop_count) &&
 			!--tsk->signal->group_stop_count) {
 		tsk->signal->flags = SIGNAL_STOP_STOPPED;
-		group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED);
+		group_stop = CLD_STOPPED;
 	}
 out:
 	spin_unlock_irq(&tsk->sighand->siglock);
-- 
cgit v1.2.3-71-gd317


From e5c1902e9260a0075ea52cb5ef627a8d9aaede89 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Mar 2011 10:37:00 +0100
Subject: signal: Fix premature completion of group stop when interfered by
 ptrace

task->signal->group_stop_count is used to track the progress of group
stop.  It's initialized to the number of tasks which need to stop for
group stop to finish and each stopping or trapping task decrements.
However, each task doesn't keep track of whether it decremented the
counter or not and if woken up before the group stop is complete and
stops again, it can decrement the counter multiple times.

Please consider the following example code.

 static void *worker(void *arg)
 {
	 while (1) ;
	 return NULL;
 }

 int main(void)
 {
	 pthread_t thread;
	 pid_t pid;
	 int i;

	 pid = fork();
	 if (!pid) {
		 for (i = 0; i < 5; i++)
			 pthread_create(&thread, NULL, worker, NULL);
		 while (1) ;
		 return 0;
	 }

	 ptrace(PTRACE_ATTACH, pid, NULL, NULL);
	 while (1) {
		 waitid(P_PID, pid, NULL, WSTOPPED);
		 ptrace(PTRACE_SINGLESTEP, pid, NULL, (void *)(long)SIGSTOP);
	 }
	 return 0;
 }

The child creates five threads and the parent continuously traps the
first thread and whenever the child gets a signal, SIGSTOP is
delivered.  If an external process sends SIGSTOP to the child, all
other threads in the process should reliably stop.  However, due to
the above bug, the first thread will often end up consuming
group_stop_count multiple times and SIGSTOP often ends up stopping
none or part of the other four threads.

This patch adds a new field task->group_stop which is protected by
siglock and uses GROUP_STOP_CONSUME flag to track which task is still
to consume group_stop_count to fix this bug.

task_clear_group_stop_pending() and task_participate_group_stop() are
added to help manipulating group stop states.  As ptrace_stop() now
also uses task_participate_group_stop(), it will set
SIGNAL_STOP_STOPPED if it completes a group stop.

There still are many issues regarding the interaction between group
stop and ptrace.  Patches to address them will follow.

- Oleg spotted duplicate GROUP_STOP_CONSUME.  Dropped.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
---
 include/linux/sched.h |  6 +++++
 kernel/signal.c       | 62 ++++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 60 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4b601be3dace..85f51042c2b8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1260,6 +1260,7 @@ struct task_struct {
 	int exit_state;
 	int exit_code, exit_signal;
 	int pdeath_signal;  /*  The signal sent when the parent dies  */
+	unsigned int group_stop;	/* GROUP_STOP_*, siglock protected */
 	/* ??? */
 	unsigned int personality;
 	unsigned did_exec:1;
@@ -1771,6 +1772,11 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
 #define used_math() tsk_used_math(current)
 
+/*
+ * task->group_stop flags
+ */
+#define GROUP_STOP_CONSUME	(1 << 17) /* consume group stop count */
+
 #ifdef CONFIG_PREEMPT_RCU
 
 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
diff --git a/kernel/signal.c b/kernel/signal.c
index 95ac42dc3bcb..ecb20089eaff 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -223,6 +223,52 @@ static inline void print_dropped_signal(int sig)
 				current->comm, current->pid, sig);
 }
 
+/**
+ * task_clear_group_stop_pending - clear pending group stop
+ * @task: target task
+ *
+ * Clear group stop states for @task.
+ *
+ * CONTEXT:
+ * Must be called with @task->sighand->siglock held.
+ */
+static void task_clear_group_stop_pending(struct task_struct *task)
+{
+	task->group_stop &= ~GROUP_STOP_CONSUME;
+}
+
+/**
+ * task_participate_group_stop - participate in a group stop
+ * @task: task participating in a group stop
+ *
+ * @task is participating in a group stop.  Group stop states are cleared
+ * and the group stop count is consumed if %GROUP_STOP_CONSUME was set.  If
+ * the consumption completes the group stop, the appropriate %SIGNAL_*
+ * flags are set.
+ *
+ * CONTEXT:
+ * Must be called with @task->sighand->siglock held.
+ */
+static bool task_participate_group_stop(struct task_struct *task)
+{
+	struct signal_struct *sig = task->signal;
+	bool consume = task->group_stop & GROUP_STOP_CONSUME;
+
+	task_clear_group_stop_pending(task);
+
+	if (!consume)
+		return false;
+
+	if (!WARN_ON_ONCE(sig->group_stop_count == 0))
+		sig->group_stop_count--;
+
+	if (!sig->group_stop_count) {
+		sig->flags = SIGNAL_STOP_STOPPED;
+		return true;
+	}
+	return false;
+}
+
 /*
  * allocate a new signal queue record
  * - this may be called without locks if and only if t == current, otherwise an
@@ -1645,7 +1691,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
 	 * we must participate in the bookkeeping.
 	 */
 	if (current->signal->group_stop_count > 0)
-		--current->signal->group_stop_count;
+		task_participate_group_stop(current);
 
 	current->last_siginfo = info;
 	current->exit_code = exit_code;
@@ -1730,6 +1776,7 @@ static int do_signal_stop(int signr)
 	int notify = 0;
 
 	if (!sig->group_stop_count) {
+		unsigned int gstop = GROUP_STOP_CONSUME;
 		struct task_struct *t;
 
 		if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
@@ -1741,6 +1788,7 @@ static int do_signal_stop(int signr)
 		 */
 		sig->group_exit_code = signr;
 
+		current->group_stop = gstop;
 		sig->group_stop_count = 1;
 		for (t = next_thread(current); t != current; t = next_thread(t))
 			/*
@@ -1750,19 +1798,19 @@ static int do_signal_stop(int signr)
 			 */
 			if (!(t->flags & PF_EXITING) &&
 			    !task_is_stopped_or_traced(t)) {
+				t->group_stop = gstop;
 				sig->group_stop_count++;
 				signal_wake_up(t, 0);
-			}
+			} else
+				task_clear_group_stop_pending(t);
 	}
 	/*
 	 * If there are no other threads in the group, or if there is
 	 * a group stop in progress and we are the last to stop, report
 	 * to the parent.  When ptraced, every thread reports itself.
 	 */
-	if (!--sig->group_stop_count) {
-		sig->flags = SIGNAL_STOP_STOPPED;
+	if (task_participate_group_stop(current))
 		notify = CLD_STOPPED;
-	}
 	if (task_ptrace(current))
 		notify = CLD_STOPPED;
 
@@ -2026,10 +2074,8 @@ void exit_signals(struct task_struct *tsk)
 			recalc_sigpending_and_wake(t);
 
 	if (unlikely(tsk->signal->group_stop_count) &&
-			!--tsk->signal->group_stop_count) {
-		tsk->signal->flags = SIGNAL_STOP_STOPPED;
+	    task_participate_group_stop(tsk))
 		group_stop = CLD_STOPPED;
-	}
 out:
 	spin_unlock_irq(&tsk->sighand->siglock);
 
-- 
cgit v1.2.3-71-gd317


From 39efa3ef3a376a4e53de2f82fc91182459d34200 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Mar 2011 10:37:00 +0100
Subject: signal: Use GROUP_STOP_PENDING to stop once for a single group stop

Currently task->signal->group_stop_count is used to decide whether to
stop for group stop.  However, if there is a task in the group which
is taking a long time to stop, other tasks which are continued by
ptrace would repeatedly stop for the same group stop until the group
stop is complete.

Conversely, if a ptraced task is in TASK_TRACED state, the debugger
won't get notified of group stops which is inconsistent compared to
the ptraced task in any other state.

This patch introduces GROUP_STOP_PENDING which tracks whether a task
is yet to stop for the group stop in progress.  The flag is set when a
group stop starts and cleared when the task stops the first time for
the group stop, and consulted whenever whether the task should
participate in a group stop needs to be determined.  Note that now
tasks in TASK_TRACED also participate in group stop.

This results in the following behavior changes.

* For a single group stop, a ptracer would see at most one stop
  reported.

* A ptracee in TASK_TRACED now also participates in group stop and the
  tracer would get the notification.  However, as a ptraced task could
  be in TASK_STOPPED state or any ptrace trap could consume group
  stop, the notification may still be missing.  These will be
  addressed with further patches.

* A ptracee may start a group stop while one is still in progress if
  the tracer let it continue with stop signal delivery.  Group stop
  code handles this correctly.

Oleg:

* Spotted that a task might skip signal check even when its
  GROUP_STOP_PENDING is set.  Fixed by updating
  recalc_sigpending_tsk() to check GROUP_STOP_PENDING instead of
  group_stop_count.

* Pointed out that task->group_stop should be cleared whenever
  task->signal->group_stop_count is cleared.  Fixed accordingly.

* Pointed out the behavior inconsistency between TASK_TRACED and
  RUNNING and the last behavior change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
---
 fs/exec.c             |  1 +
 include/linux/sched.h |  3 +++
 kernel/signal.c       | 36 +++++++++++++++++++++---------------
 3 files changed, 25 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 5e62d26a4fec..8328beb9016f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1659,6 +1659,7 @@ static int zap_process(struct task_struct *start, int exit_code)
 
 	t = start;
 	do {
+		task_clear_group_stop_pending(t);
 		if (t != current && t->mm) {
 			sigaddset(&t->pending.signal, SIGKILL);
 			signal_wake_up(t, 1);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 85f51042c2b8..b2a17dfbdbad 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1775,8 +1775,11 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 /*
  * task->group_stop flags
  */
+#define GROUP_STOP_PENDING	(1 << 16) /* task should stop for group stop */
 #define GROUP_STOP_CONSUME	(1 << 17) /* consume group stop count */
 
+extern void task_clear_group_stop_pending(struct task_struct *task);
+
 #ifdef CONFIG_PREEMPT_RCU
 
 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
diff --git a/kernel/signal.c b/kernel/signal.c
index ecb20089eaff..a2e7a6527d24 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -124,7 +124,7 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
 
 static int recalc_sigpending_tsk(struct task_struct *t)
 {
-	if (t->signal->group_stop_count > 0 ||
+	if ((t->group_stop & GROUP_STOP_PENDING) ||
 	    PENDING(&t->pending, &t->blocked) ||
 	    PENDING(&t->signal->shared_pending, &t->blocked)) {
 		set_tsk_thread_flag(t, TIF_SIGPENDING);
@@ -232,19 +232,19 @@ static inline void print_dropped_signal(int sig)
  * CONTEXT:
  * Must be called with @task->sighand->siglock held.
  */
-static void task_clear_group_stop_pending(struct task_struct *task)
+void task_clear_group_stop_pending(struct task_struct *task)
 {
-	task->group_stop &= ~GROUP_STOP_CONSUME;
+	task->group_stop &= ~(GROUP_STOP_PENDING | GROUP_STOP_CONSUME);
 }
 
 /**
  * task_participate_group_stop - participate in a group stop
  * @task: task participating in a group stop
  *
- * @task is participating in a group stop.  Group stop states are cleared
- * and the group stop count is consumed if %GROUP_STOP_CONSUME was set.  If
- * the consumption completes the group stop, the appropriate %SIGNAL_*
- * flags are set.
+ * @task has GROUP_STOP_PENDING set and is participating in a group stop.
+ * Group stop states are cleared and the group stop count is consumed if
+ * %GROUP_STOP_CONSUME was set.  If the consumption completes the group
+ * stop, the appropriate %SIGNAL_* flags are set.
  *
  * CONTEXT:
  * Must be called with @task->sighand->siglock held.
@@ -254,6 +254,8 @@ static bool task_participate_group_stop(struct task_struct *task)
 	struct signal_struct *sig = task->signal;
 	bool consume = task->group_stop & GROUP_STOP_CONSUME;
 
+	WARN_ON_ONCE(!(task->group_stop & GROUP_STOP_PENDING));
+
 	task_clear_group_stop_pending(task);
 
 	if (!consume)
@@ -765,6 +767,9 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
 		t = p;
 		do {
 			unsigned int state;
+
+			task_clear_group_stop_pending(t);
+
 			rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
 			/*
 			 * If there is a handler for SIGCONT, we must make
@@ -906,6 +911,7 @@ static void complete_signal(int sig, struct task_struct *p, int group)
 			signal->group_stop_count = 0;
 			t = p;
 			do {
+				task_clear_group_stop_pending(t);
 				sigaddset(&t->pending.signal, SIGKILL);
 				signal_wake_up(t, 1);
 			} while_each_thread(p, t);
@@ -1139,6 +1145,7 @@ int zap_other_threads(struct task_struct *p)
 	p->signal->group_stop_count = 0;
 
 	while_each_thread(p, t) {
+		task_clear_group_stop_pending(t);
 		count++;
 
 		/* Don't bother with already dead threads */
@@ -1690,7 +1697,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
 	 * If there is a group stop in progress,
 	 * we must participate in the bookkeeping.
 	 */
-	if (current->signal->group_stop_count > 0)
+	if (current->group_stop & GROUP_STOP_PENDING)
 		task_participate_group_stop(current);
 
 	current->last_siginfo = info;
@@ -1775,8 +1782,8 @@ static int do_signal_stop(int signr)
 	struct signal_struct *sig = current->signal;
 	int notify = 0;
 
-	if (!sig->group_stop_count) {
-		unsigned int gstop = GROUP_STOP_CONSUME;
+	if (!(current->group_stop & GROUP_STOP_PENDING)) {
+		unsigned int gstop = GROUP_STOP_PENDING | GROUP_STOP_CONSUME;
 		struct task_struct *t;
 
 		if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
@@ -1796,8 +1803,7 @@ static int do_signal_stop(int signr)
 			 * stop is always done with the siglock held,
 			 * so this check has no races.
 			 */
-			if (!(t->flags & PF_EXITING) &&
-			    !task_is_stopped_or_traced(t)) {
+			if (!(t->flags & PF_EXITING) && !task_is_stopped(t)) {
 				t->group_stop = gstop;
 				sig->group_stop_count++;
 				signal_wake_up(t, 0);
@@ -1926,8 +1932,8 @@ relock:
 		if (unlikely(signr != 0))
 			ka = return_ka;
 		else {
-			if (unlikely(signal->group_stop_count > 0) &&
-			    do_signal_stop(0))
+			if (unlikely(current->group_stop &
+				     GROUP_STOP_PENDING) && do_signal_stop(0))
 				goto relock;
 
 			signr = dequeue_signal(current, &current->blocked,
@@ -2073,7 +2079,7 @@ void exit_signals(struct task_struct *tsk)
 		if (!signal_pending(t) && !(t->flags & PF_EXITING))
 			recalc_sigpending_and_wake(t);
 
-	if (unlikely(tsk->signal->group_stop_count) &&
+	if (unlikely(tsk->group_stop & GROUP_STOP_PENDING) &&
 	    task_participate_group_stop(tsk))
 		group_stop = CLD_STOPPED;
 out:
-- 
cgit v1.2.3-71-gd317


From d79fdd6d96f46fabb779d86332e3677c6f5c2a4f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Mar 2011 10:37:00 +0100
Subject: ptrace: Clean transitions between TASK_STOPPED and TRACED

Currently, if the task is STOPPED on ptrace attach, it's left alone
and the state is silently changed to TRACED on the next ptrace call.
The behavior breaks the assumption that arch_ptrace_stop() is called
before any task is poked by ptrace and is ugly in that a task
manipulates the state of another task directly.

With GROUP_STOP_PENDING, the transitions between TASK_STOPPED and
TRACED can be made clean.  The tracer can use the flag to tell the
tracee to retry stop on attach and detach.  On retry, the tracee will
enter the desired state in the correct way.  The lower 16bits of
task->group_stop is used to remember the signal number which caused
the last group stop.  This is used while retrying for ptrace attach as
the original group_exit_code could have been consumed with wait(2) by
then.

As the real parent may wait(2) and consume the group_exit_code
anytime, the group_exit_code needs to be saved separately so that it
can be used when switching from regular sleep to ptrace_stop().  This
is recorded in the lower 16bits of task->group_stop.

If a task is already stopped and there's no intervening SIGCONT, a
ptrace request immediately following a successful PTRACE_ATTACH should
always succeed even if the tracer doesn't wait(2) for attach
completion; however, with this change, the tracee might still be
TASK_RUNNING trying to enter TASK_TRACED which would cause the
following request to fail with -ESRCH.

This intermediate state is hidden from the ptracer by setting
GROUP_STOP_TRAPPING on attach and making ptrace_check_attach() wait
for it to clear on its signal->wait_chldexit.  Completing the
transition or getting killed clears TRAPPING and wakes up the tracer.

Note that the STOPPED -> RUNNING -> TRACED transition is still visible
to other threads which are in the same group as the ptracer and the
reverse transition is visible to all.  Please read the comments for
details.

Oleg:

* Spotted a race condition where a task may retry group stop without
  proper bookkeeping.  Fixed by redoing bookkeeping on retry.

* Spotted that the transition is visible to userland in several
  different ways.  Most are fixed with GROUP_STOP_TRAPPING.  Unhandled
  corner case is documented.

* Pointed out not setting GROUP_STOP_SIGMASK on an already stopped
  task would result in more consistent behavior.

* Pointed out that calling ptrace_stop() from do_signal_stop() in
  TASK_STOPPED can race with group stop start logic and then confuse
  the TRAPPING wait in ptrace_check_attach().  ptrace_stop() is now
  called with TASK_RUNNING.

* Suggested using signal->wait_chldexit instead of bit wait.

* Spotted a race condition between TRACED transition and clearing of
  TRAPPING.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Jan Kratochvil <jan.kratochvil@redhat.com>
---
 include/linux/sched.h |  2 ++
 kernel/ptrace.c       | 49 ++++++++++++++++++++++++++++----
 kernel/signal.c       | 79 ++++++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 112 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b2a17dfbdbad..456d80ed3b78 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1775,8 +1775,10 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 /*
  * task->group_stop flags
  */
+#define GROUP_STOP_SIGMASK	0xffff    /* signr of the last group stop */
 #define GROUP_STOP_PENDING	(1 << 16) /* task should stop for group stop */
 #define GROUP_STOP_CONSUME	(1 << 17) /* consume group stop count */
+#define GROUP_STOP_TRAPPING	(1 << 18) /* switching from STOPPED to TRACED */
 
 extern void task_clear_group_stop_pending(struct task_struct *task);
 
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 6acf8954017c..745fc2dd00c5 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -49,14 +49,22 @@ static void ptrace_untrace(struct task_struct *child)
 	spin_lock(&child->sighand->siglock);
 	if (task_is_traced(child)) {
 		/*
-		 * If the group stop is completed or in progress,
-		 * this thread was already counted as stopped.
+		 * If group stop is completed or in progress, it should
+		 * participate in the group stop.  Set GROUP_STOP_PENDING
+		 * before kicking it.
+		 *
+		 * This involves TRACED -> RUNNING -> STOPPED transition
+		 * which is similar to but in the opposite direction of
+		 * what happens while attaching to a stopped task.
+		 * However, in this direction, the intermediate RUNNING
+		 * state is not hidden even from the current ptracer and if
+		 * it immediately re-attaches and performs a WNOHANG
+		 * wait(2), it may fail.
 		 */
 		if (child->signal->flags & SIGNAL_STOP_STOPPED ||
 		    child->signal->group_stop_count)
-			__set_task_state(child, TASK_STOPPED);
-		else
-			signal_wake_up(child, 1);
+			child->group_stop |= GROUP_STOP_PENDING;
+		signal_wake_up(child, 1);
 	}
 	spin_unlock(&child->sighand->siglock);
 }
@@ -165,6 +173,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
 
 static int ptrace_attach(struct task_struct *task)
 {
+	bool wait_trap = false;
 	int retval;
 
 	audit_ptrace(task);
@@ -204,12 +213,42 @@ static int ptrace_attach(struct task_struct *task)
 	__ptrace_link(task, current);
 	send_sig_info(SIGSTOP, SEND_SIG_FORCED, task);
 
+	spin_lock(&task->sighand->siglock);
+
+	/*
+	 * If the task is already STOPPED, set GROUP_STOP_PENDING and
+	 * TRAPPING, and kick it so that it transits to TRACED.  TRAPPING
+	 * will be cleared if the child completes the transition or any
+	 * event which clears the group stop states happens.  We'll wait
+	 * for the transition to complete before returning from this
+	 * function.
+	 *
+	 * This hides STOPPED -> RUNNING -> TRACED transition from the
+	 * attaching thread but a different thread in the same group can
+	 * still observe the transient RUNNING state.  IOW, if another
+	 * thread's WNOHANG wait(2) on the stopped tracee races against
+	 * ATTACH, the wait(2) may fail due to the transient RUNNING.
+	 *
+	 * The following task_is_stopped() test is safe as both transitions
+	 * in and out of STOPPED are protected by siglock.
+	 */
+	if (task_is_stopped(task)) {
+		task->group_stop |= GROUP_STOP_PENDING | GROUP_STOP_TRAPPING;
+		signal_wake_up(task, 1);
+		wait_trap = true;
+	}
+
+	spin_unlock(&task->sighand->siglock);
+
 	retval = 0;
 unlock_tasklist:
 	write_unlock_irq(&tasklist_lock);
 unlock_creds:
 	mutex_unlock(&task->signal->cred_guard_mutex);
 out:
+	if (wait_trap)
+		wait_event(current->signal->wait_chldexit,
+			   !(task->group_stop & GROUP_STOP_TRAPPING));
 	return retval;
 }
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 418776c41d24..1e199919ae09 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -223,6 +223,26 @@ static inline void print_dropped_signal(int sig)
 				current->comm, current->pid, sig);
 }
 
+/**
+ * task_clear_group_stop_trapping - clear group stop trapping bit
+ * @task: target task
+ *
+ * If GROUP_STOP_TRAPPING is set, a ptracer is waiting for us.  Clear it
+ * and wake up the ptracer.  Note that we don't need any further locking.
+ * @task->siglock guarantees that @task->parent points to the ptracer.
+ *
+ * CONTEXT:
+ * Must be called with @task->sighand->siglock held.
+ */
+static void task_clear_group_stop_trapping(struct task_struct *task)
+{
+	if (unlikely(task->group_stop & GROUP_STOP_TRAPPING)) {
+		task->group_stop &= ~GROUP_STOP_TRAPPING;
+		__wake_up_sync(&task->parent->signal->wait_chldexit,
+			       TASK_UNINTERRUPTIBLE, 1);
+	}
+}
+
 /**
  * task_clear_group_stop_pending - clear pending group stop
  * @task: target task
@@ -1706,8 +1726,20 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
 	current->last_siginfo = info;
 	current->exit_code = exit_code;
 
-	/* Let the debugger run.  */
-	__set_current_state(TASK_TRACED);
+	/*
+	 * TRACED should be visible before TRAPPING is cleared; otherwise,
+	 * the tracer might fail do_wait().
+	 */
+	set_current_state(TASK_TRACED);
+
+	/*
+	 * We're committing to trapping.  Clearing GROUP_STOP_TRAPPING and
+	 * transition to TASK_TRACED should be atomic with respect to
+	 * siglock.  This hsould be done after the arch hook as siglock is
+	 * released and regrabbed across it.
+	 */
+	task_clear_group_stop_trapping(current);
+
 	spin_unlock_irq(&current->sighand->siglock);
 	read_lock(&tasklist_lock);
 	if (may_ptrace_stop()) {
@@ -1788,6 +1820,9 @@ static int do_signal_stop(int signr)
 		unsigned int gstop = GROUP_STOP_PENDING | GROUP_STOP_CONSUME;
 		struct task_struct *t;
 
+		/* signr will be recorded in task->group_stop for retries */
+		WARN_ON_ONCE(signr & ~GROUP_STOP_SIGMASK);
+
 		if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
 		    unlikely(signal_group_exit(sig)))
 			return 0;
@@ -1797,25 +1832,27 @@ static int do_signal_stop(int signr)
 		 */
 		sig->group_exit_code = signr;
 
-		current->group_stop = gstop;
+		current->group_stop &= ~GROUP_STOP_SIGMASK;
+		current->group_stop |= signr | gstop;
 		sig->group_stop_count = 1;
-		for (t = next_thread(current); t != current; t = next_thread(t))
+		for (t = next_thread(current); t != current;
+		     t = next_thread(t)) {
+			t->group_stop &= ~GROUP_STOP_SIGMASK;
 			/*
 			 * Setting state to TASK_STOPPED for a group
 			 * stop is always done with the siglock held,
 			 * so this check has no races.
 			 */
 			if (!(t->flags & PF_EXITING) && !task_is_stopped(t)) {
-				t->group_stop = gstop;
+				t->group_stop |= signr | gstop;
 				sig->group_stop_count++;
 				signal_wake_up(t, 0);
-			} else
+			} else {
 				task_clear_group_stop_pending(t);
+			}
+		}
 	}
-
-	current->exit_code = sig->group_exit_code;
-	__set_current_state(TASK_STOPPED);
-
+retry:
 	if (likely(!task_ptrace(current))) {
 		int notify = 0;
 
@@ -1827,6 +1864,7 @@ static int do_signal_stop(int signr)
 		if (task_participate_group_stop(current))
 			notify = CLD_STOPPED;
 
+		__set_current_state(TASK_STOPPED);
 		spin_unlock_irq(&current->sighand->siglock);
 
 		if (notify) {
@@ -1839,13 +1877,28 @@ static int do_signal_stop(int signr)
 		schedule();
 
 		spin_lock_irq(&current->sighand->siglock);
-	} else
-		ptrace_stop(current->exit_code, CLD_STOPPED, 0, NULL);
+	} else {
+		ptrace_stop(current->group_stop & GROUP_STOP_SIGMASK,
+			    CLD_STOPPED, 0, NULL);
+		current->exit_code = 0;
+	}
+
+	/*
+	 * GROUP_STOP_PENDING could be set if another group stop has
+	 * started since being woken up or ptrace wants us to transit
+	 * between TASK_STOPPED and TRACED.  Retry group stop.
+	 */
+	if (current->group_stop & GROUP_STOP_PENDING) {
+		WARN_ON_ONCE(!(current->group_stop & GROUP_STOP_SIGMASK));
+		goto retry;
+	}
+
+	/* PTRACE_ATTACH might have raced with task killing, clear trapping */
+	task_clear_group_stop_trapping(current);
 
 	spin_unlock_irq(&current->sighand->siglock);
 
 	tracehook_finish_jctl();
-	current->exit_code = 0;
 
 	return 1;
 }
-- 
cgit v1.2.3-71-gd317


From f6d5b33125c4fa63c16f7f54c533338c9695d82c Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Tue, 29 Mar 2011 18:00:27 -0700
Subject: RTC: Fix early irqs caused by calling rtc_set_alarm too early

When we register an rtc device at boot, we read the alarm value
in hardware and set the rtc device's aie_timer to that value.

The initial method to do this was to simply call rtc_set_alarm()
with the value read from hardware. However, this may cause problems
as rtc_set_alarm may enable interupts, and the RTC alarm might fire,
which can cause invalid pointer dereferencing since the RTC registration
is not complete.

This patch solves the issue by initializing the rtc_device.aie_timer
y hand via rtc_initialize_alarm(). This avoids any calls to the RTC
hardware which might enable interrupts too early.

CC: Thomas Gleixner <tglx@linutronix.de>
CC: Alessandro Zummo <a.zummo@towertech.it>
Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/rtc/class.c     |  2 +-
 drivers/rtc/interface.c | 26 ++++++++++++++++++++++++++
 include/linux/rtc.h     |  2 ++
 3 files changed, 29 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 09b4437b3e61..39013867cbd6 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -171,7 +171,7 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 	err = __rtc_read_alarm(rtc, &alrm);
 
 	if (!err && !rtc_valid_tm(&alrm.time))
-		rtc_set_alarm(rtc, &alrm);
+		rtc_initialize_alarm(rtc, &alrm);
 
 	strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE);
 	dev_set_name(&rtc->dev, "rtc%d", id);
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 8ec6b069a7f5..b2fea80dfb65 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -375,6 +375,32 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 }
 EXPORT_SYMBOL_GPL(rtc_set_alarm);
 
+/* Called once per device from rtc_device_register */
+int rtc_initialize_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
+{
+	int err;
+
+	err = rtc_valid_tm(&alarm->time);
+	if (err != 0)
+		return err;
+
+	err = mutex_lock_interruptible(&rtc->ops_lock);
+	if (err)
+		return err;
+
+	rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time);
+	rtc->aie_timer.period = ktime_set(0, 0);
+	if (alarm->enabled) {
+		rtc->aie_timer.enabled = 1;
+		timerqueue_add(&rtc->timerqueue, &rtc->aie_timer.node);
+	}
+	mutex_unlock(&rtc->ops_lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtc_initialize_alarm);
+
+
+
 int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled)
 {
 	int err = mutex_lock_interruptible(&rtc->ops_lock);
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 2ca7e8a78060..877ece45426f 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -228,6 +228,8 @@ extern int rtc_read_alarm(struct rtc_device *rtc,
 			struct rtc_wkalrm *alrm);
 extern int rtc_set_alarm(struct rtc_device *rtc,
 				struct rtc_wkalrm *alrm);
+extern int rtc_initialize_alarm(struct rtc_device *rtc,
+				struct rtc_wkalrm *alrm);
 extern void rtc_update_irq(struct rtc_device *rtc,
 			unsigned long num, unsigned long events);
 
-- 
cgit v1.2.3-71-gd317


From 6fb1b1e18fe3d141c54182c5d5b3af823bed455f Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sat, 19 Mar 2011 13:55:39 +0100
Subject: ath9k: add support for overriding the MAC address through platform
 data

On some devices the correct MAC address is not in the EEPROM data, but
stored somewhere else.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/init.c | 7 ++++++-
 include/linux/ath9k_platform.h        | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index 79aec983279f..e22e8215d941 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/slab.h>
+#include <linux/ath9k_platform.h>
 
 #include "ath9k.h"
 
@@ -537,6 +538,7 @@ static void ath9k_init_misc(struct ath_softc *sc)
 static int ath9k_init_softc(u16 devid, struct ath_softc *sc, u16 subsysid,
 			    const struct ath_bus_ops *bus_ops)
 {
+	struct ath9k_platform_data *pdata = sc->dev->platform_data;
 	struct ath_hw *ah = NULL;
 	struct ath_common *common;
 	int ret = 0, i;
@@ -551,7 +553,7 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc, u16 subsysid,
 	ah->hw_version.subsysid = subsysid;
 	sc->sc_ah = ah;
 
-	if (!sc->dev->platform_data)
+	if (!pdata)
 		ah->ah_flags |= AH_USE_EEPROM;
 
 	common = ath9k_hw_common(ah);
@@ -587,6 +589,9 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc, u16 subsysid,
 	if (ret)
 		goto err_hw;
 
+	if (pdata && pdata->macaddr)
+		memcpy(common->macaddr, pdata->macaddr, ETH_ALEN);
+
 	ret = ath9k_init_queues(sc);
 	if (ret)
 		goto err_queues;
diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h
index b847fc7b93f9..b5f06583a1ba 100644
--- a/include/linux/ath9k_platform.h
+++ b/include/linux/ath9k_platform.h
@@ -23,6 +23,7 @@
 
 struct ath9k_platform_data {
 	u16 eeprom_data[ATH9K_PLAT_EEP_MAX_WORDS];
+	u8 *macaddr;
 };
 
 #endif /* _LINUX_ATH9K_PLATFORM_H */
-- 
cgit v1.2.3-71-gd317


From 6de66dd963ddd669667a81a2401f2fd6472ff55c Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sat, 19 Mar 2011 13:55:40 +0100
Subject: ath9k: add support for overriding LED pin and GPIO settings from
 platform data

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/gpio.c | 14 ++++++++------
 drivers/net/wireless/ath/ath9k/hw.h   |  2 +-
 drivers/net/wireless/ath/ath9k/init.c |  8 +++++++-
 include/linux/ath9k_platform.h        |  4 ++++
 4 files changed, 20 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath9k/gpio.c b/drivers/net/wireless/ath/ath9k/gpio.c
index 0fb8f8ac275a..44a0a886124d 100644
--- a/drivers/net/wireless/ath/ath9k/gpio.c
+++ b/drivers/net/wireless/ath/ath9k/gpio.c
@@ -41,12 +41,14 @@ void ath_init_leds(struct ath_softc *sc)
 {
 	int ret;
 
-	if (AR_SREV_9287(sc->sc_ah))
-		sc->sc_ah->led_pin = ATH_LED_PIN_9287;
-	else if (AR_SREV_9485(sc->sc_ah))
-		sc->sc_ah->led_pin = ATH_LED_PIN_9485;
-	else
-		sc->sc_ah->led_pin = ATH_LED_PIN_DEF;
+	if (sc->sc_ah->led_pin < 0) {
+		if (AR_SREV_9287(sc->sc_ah))
+			sc->sc_ah->led_pin = ATH_LED_PIN_9287;
+		else if (AR_SREV_9485(sc->sc_ah))
+			sc->sc_ah->led_pin = ATH_LED_PIN_9485;
+		else
+			sc->sc_ah->led_pin = ATH_LED_PIN_DEF;
+	}
 
 	/* Configure gpio 1 for output */
 	ath9k_hw_cfg_output(sc->sc_ah, sc->sc_ah->led_pin,
diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h
index 775c0eb10b95..3d9fc6e391a7 100644
--- a/drivers/net/wireless/ath/ath9k/hw.h
+++ b/drivers/net/wireless/ath/ath9k/hw.h
@@ -798,7 +798,7 @@ struct ath_hw {
 	u32 originalGain[22];
 	int initPDADC;
 	int PDADCdelta;
-	u8 led_pin;
+	int led_pin;
 	u32 gpio_mask;
 	u32 gpio_val;
 
diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index e22e8215d941..cdb0f1c89a0e 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -553,8 +553,14 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc, u16 subsysid,
 	ah->hw_version.subsysid = subsysid;
 	sc->sc_ah = ah;
 
-	if (!pdata)
+	if (!pdata) {
 		ah->ah_flags |= AH_USE_EEPROM;
+		sc->sc_ah->led_pin = -1;
+	} else {
+		sc->sc_ah->gpio_mask = pdata->gpio_mask;
+		sc->sc_ah->gpio_val = pdata->gpio_val;
+		sc->sc_ah->led_pin = pdata->led_pin;
+	}
 
 	common = ath9k_hw_common(ah);
 	common->ops = &ath9k_common_ops;
diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h
index b5f06583a1ba..020387a114e3 100644
--- a/include/linux/ath9k_platform.h
+++ b/include/linux/ath9k_platform.h
@@ -24,6 +24,10 @@
 struct ath9k_platform_data {
 	u16 eeprom_data[ATH9K_PLAT_EEP_MAX_WORDS];
 	u8 *macaddr;
+
+	int led_pin;
+	u32 gpio_mask;
+	u32 gpio_val;
 };
 
 #endif /* _LINUX_ATH9K_PLATFORM_H */
-- 
cgit v1.2.3-71-gd317


From 6cb6a27c45cec9184302c2e350b3593c64bc7f6c Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Sat, 2 Apr 2011 22:48:47 -0700
Subject: net: Call netdev_features_change() from netdev_update_features()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Issue FEAT_CHANGE notification when features are changed by
netdev_update_features().  This will allow changes made by extra constraints
on e.g. MTU change to be properly propagated like changes via ethtool.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 23 +++++++++++++++++------
 net/core/ethtool.c        |  6 +++---
 3 files changed, 21 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5eeb2cd3631c..423a5447d2ed 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2550,6 +2550,7 @@ static inline u32 netdev_get_wanted_features(struct net_device *dev)
 }
 u32 netdev_increment_features(u32 all, u32 one, u32 mask);
 u32 netdev_fix_features(struct net_device *dev, u32 features);
+int __netdev_update_features(struct net_device *dev);
 void netdev_update_features(struct net_device *dev);
 
 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
diff --git a/net/core/dev.c b/net/core/dev.c
index 3da9fb06d47a..02f56376fe99 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5236,7 +5236,7 @@ u32 netdev_fix_features(struct net_device *dev, u32 features)
 }
 EXPORT_SYMBOL(netdev_fix_features);
 
-void netdev_update_features(struct net_device *dev)
+int __netdev_update_features(struct net_device *dev)
 {
 	u32 features;
 	int err = 0;
@@ -5250,7 +5250,7 @@ void netdev_update_features(struct net_device *dev)
 	features = netdev_fix_features(dev, features);
 
 	if (dev->features == features)
-		return;
+		return 0;
 
 	netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n",
 		dev->features, features);
@@ -5258,12 +5258,23 @@ void netdev_update_features(struct net_device *dev)
 	if (dev->netdev_ops->ndo_set_features)
 		err = dev->netdev_ops->ndo_set_features(dev, features);
 
-	if (!err)
-		dev->features = features;
-	else if (err < 0)
+	if (unlikely(err < 0)) {
 		netdev_err(dev,
 			"set_features() failed (%d); wanted 0x%08x, left 0x%08x\n",
 			err, features, dev->features);
+		return -1;
+	}
+
+	if (!err)
+		dev->features = features;
+
+	return 1;
+}
+
+void netdev_update_features(struct net_device *dev)
+{
+	if (__netdev_update_features(dev))
+		netdev_features_change(dev);
 }
 EXPORT_SYMBOL(netdev_update_features);
 
@@ -5430,7 +5441,7 @@ int register_netdevice(struct net_device *dev)
 		goto err_uninit;
 	dev->reg_state = NETREG_REGISTERED;
 
-	netdev_update_features(dev);
+	__netdev_update_features(dev);
 
 	/*
 	 *	Default initial state at registry is that the
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 74ead9eca126..439e4b0e1312 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -317,7 +317,7 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
 
 	dev->wanted_features &= ~features[0].valid;
 	dev->wanted_features |= features[0].valid & features[0].requested;
-	netdev_update_features(dev);
+	__netdev_update_features(dev);
 
 	if ((dev->wanted_features ^ dev->features) & features[0].valid)
 		ret |= ETHTOOL_F_WISH;
@@ -499,7 +499,7 @@ static int ethtool_set_one_feature(struct net_device *dev,
 		else
 			dev->wanted_features &= ~mask;
 
-		netdev_update_features(dev);
+		__netdev_update_features(dev);
 		return 0;
 	}
 
@@ -551,7 +551,7 @@ int __ethtool_set_flags(struct net_device *dev, u32 data)
 	dev->wanted_features =
 		(dev->wanted_features & ~changed) | data;
 
-	netdev_update_features(dev);
+	__netdev_update_features(dev);
 
 	return 0;
 }
-- 
cgit v1.2.3-71-gd317


From ee77f075921730b2b465880f9fd4367003bdab39 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 1 Apr 2011 20:12:38 +0200
Subject: signal: Turn SIGNAL_STOP_DEQUEUED into GROUP_STOP_DEQUEUED

This patch moves SIGNAL_STOP_DEQUEUED from signal_struct->flags to
task_struct->group_stop, and thus makes it per-thread.

Like SIGNAL_STOP_DEQUEUED, GROUP_STOP_DEQUEUED can be false-positive
after return from get_signal_to_deliver(), this is fine. The only
purpose of this bit is: we can drop ->siglock after __dequeue_signal()
returns the sig_kernel_stop() signal and before we call
do_signal_stop(), in this case we must not miss SIGCONT if it comes in
between.

But, unlike SIGNAL_STOP_DEQUEUED, GROUP_STOP_DEQUEUED can not be
false-positive in do_signal_stop() if multiple threads dequeue the
sig_kernel_stop() signal at the same time.

Consider two threads T1 and T2, SIGTTIN has a hanlder.

	- T1 dequeues SIGTSTP and sets SIGNAL_STOP_DEQUEUED, then
	  it drops ->siglock

	- SIGCONT comes and clears SIGNAL_STOP_DEQUEUED, SIGTSTP
	  should be cancelled.

	- T2 dequeues SIGTTIN and sets SIGNAL_STOP_DEQUEUED again.
	  Since we have a handler we should not stop, T2 returns
	  to usermode to run the handler.

	- T1 continues, calls do_signal_stop() and wrongly starts
	  the group stop because SIGNAL_STOP_DEQUEUED was restored
	  in between.

With or without this change:

	- we need to do something with ptrace_signal() which can
	  return SIGSTOP, but this needs another discussion

	- SIGSTOP can be lost if it races with the mt exec, will
	  be fixed later.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/sched.h |  6 +++---
 kernel/signal.c       | 14 ++++----------
 2 files changed, 7 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 456d80ed3b78..8cef82d4cf77 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -652,9 +652,8 @@ struct signal_struct {
  * Bits in flags field of signal_struct.
  */
 #define SIGNAL_STOP_STOPPED	0x00000001 /* job control stop in effect */
-#define SIGNAL_STOP_DEQUEUED	0x00000002 /* stop signal dequeued */
-#define SIGNAL_STOP_CONTINUED	0x00000004 /* SIGCONT since WCONTINUED reap */
-#define SIGNAL_GROUP_EXIT	0x00000008 /* group exit in progress */
+#define SIGNAL_STOP_CONTINUED	0x00000002 /* SIGCONT since WCONTINUED reap */
+#define SIGNAL_GROUP_EXIT	0x00000004 /* group exit in progress */
 /*
  * Pending notifications to parent.
  */
@@ -1779,6 +1778,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define GROUP_STOP_PENDING	(1 << 16) /* task should stop for group stop */
 #define GROUP_STOP_CONSUME	(1 << 17) /* consume group stop count */
 #define GROUP_STOP_TRAPPING	(1 << 18) /* switching from STOPPED to TRACED */
+#define GROUP_STOP_DEQUEUED	(1 << 19) /* stop signal dequeued */
 
 extern void task_clear_group_stop_pending(struct task_struct *task);
 
diff --git a/kernel/signal.c b/kernel/signal.c
index e9abc69dc0d8..4f7312b49b2d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -254,7 +254,8 @@ static void task_clear_group_stop_trapping(struct task_struct *task)
  */
 void task_clear_group_stop_pending(struct task_struct *task)
 {
-	task->group_stop &= ~(GROUP_STOP_PENDING | GROUP_STOP_CONSUME);
+	task->group_stop &= ~(GROUP_STOP_PENDING | GROUP_STOP_CONSUME |
+			      GROUP_STOP_DEQUEUED);
 }
 
 /**
@@ -602,7 +603,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 		 * is to alert stop-signal processing code when another
 		 * processor has come along and cleared the flag.
 		 */
-		tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
+		current->group_stop |= GROUP_STOP_DEQUEUED;
 	}
 	if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
 		/*
@@ -821,13 +822,6 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
 			signal->flags = why | SIGNAL_STOP_CONTINUED;
 			signal->group_stop_count = 0;
 			signal->group_exit_code = 0;
-		} else {
-			/*
-			 * We are not stopped, but there could be a stop
-			 * signal in the middle of being processed after
-			 * being removed from the queue.  Clear that too.
-			 */
-			signal->flags &= ~SIGNAL_STOP_DEQUEUED;
 		}
 	}
 
@@ -1855,7 +1849,7 @@ static int do_signal_stop(int signr)
 		/* signr will be recorded in task->group_stop for retries */
 		WARN_ON_ONCE(signr & ~GROUP_STOP_SIGMASK);
 
-		if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
+		if (!likely(current->group_stop & GROUP_STOP_DEQUEUED) ||
 		    unlikely(signal_group_exit(sig)))
 			return 0;
 		/*
-- 
cgit v1.2.3-71-gd317


From 7f5c6d4f665bb57a19a34ce1fb16cc708c04f219 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 4 Apr 2011 17:04:03 +0200
Subject: netfilter: get rid of atomic ops in fast path

We currently use a percpu spinlock to 'protect' rule bytes/packets
counters, after various attempts to use RCU instead.

Lately we added a seqlock so that get_counters() can run without
blocking BH or 'writers'. But we really only need the seqcount in it.

Spinlock itself is only locked by the current/owner cpu, so we can
remove it completely.

This cleanups api, using correct 'writer' vs 'reader' semantic.

At replace time, the get_counters() call makes sure all cpus are done
using the old table.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h | 96 +++++++++++++++++---------------------
 net/ipv4/netfilter/arp_tables.c    | 18 ++++---
 net/ipv4/netfilter/ip_tables.c     | 28 +++++------
 net/ipv6/netfilter/ip6_tables.c    | 19 +++++---
 net/netfilter/x_tables.c           |  9 ++--
 5 files changed, 80 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 37219525ff6f..32cddf78b13e 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -456,72 +456,60 @@ extern void xt_proto_fini(struct net *net, u_int8_t af);
 extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
 extern void xt_free_table_info(struct xt_table_info *info);
 
-/*
- * Per-CPU spinlock associated with per-cpu table entries, and
- * with a counter for the "reading" side that allows a recursive
- * reader to avoid taking the lock and deadlocking.
- *
- * "reading" is used by ip/arp/ip6 tables rule processing which runs per-cpu.
- * It needs to ensure that the rules are not being changed while the packet
- * is being processed. In some cases, the read lock will be acquired
- * twice on the same CPU; this is okay because of the count.
- *
- * "writing" is used when reading counters.
- *  During replace any readers that are using the old tables have to complete
- *  before freeing the old table. This is handled by the write locking
- *  necessary for reading the counters.
+/**
+ * xt_recseq - recursive seqcount for netfilter use
+ * 
+ * Packet processing changes the seqcount only if no recursion happened
+ * get_counters() can use read_seqcount_begin()/read_seqcount_retry(),
+ * because we use the normal seqcount convention :
+ * Low order bit set to 1 if a writer is active.
  */
-struct xt_info_lock {
-	seqlock_t lock;
-	unsigned char readers;
-};
-DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks);
+DECLARE_PER_CPU(seqcount_t, xt_recseq);
 
-/*
- * Note: we need to ensure that preemption is disabled before acquiring
- * the per-cpu-variable, so we do it as a two step process rather than
- * using "spin_lock_bh()".
- *
- * We _also_ need to disable bottom half processing before updating our
- * nesting count, to make sure that the only kind of re-entrancy is this
- * code being called by itself: since the count+lock is not an atomic
- * operation, we can allow no races.
+/**
+ * xt_write_recseq_begin - start of a write section
  *
- * _Only_ that special combination of being per-cpu and never getting
- * re-entered asynchronously means that the count is safe.
+ * Begin packet processing : all readers must wait the end
+ * 1) Must be called with preemption disabled
+ * 2) softirqs must be disabled too (or we should use irqsafe_cpu_add())
+ * Returns :
+ *  1 if no recursion on this cpu
+ *  0 if recursion detected
  */
-static inline void xt_info_rdlock_bh(void)
+static inline unsigned int xt_write_recseq_begin(void)
 {
-	struct xt_info_lock *lock;
+	unsigned int addend;
 
-	local_bh_disable();
-	lock = &__get_cpu_var(xt_info_locks);
-	if (likely(!lock->readers++))
-		write_seqlock(&lock->lock);
-}
+	/*
+	 * Low order bit of sequence is set if we already
+	 * called xt_write_recseq_begin().
+	 */
+	addend = (__this_cpu_read(xt_recseq.sequence) + 1) & 1;
 
-static inline void xt_info_rdunlock_bh(void)
-{
-	struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks);
+	/*
+	 * This is kind of a write_seqcount_begin(), but addend is 0 or 1
+	 * We dont check addend value to avoid a test and conditional jump,
+	 * since addend is most likely 1
+	 */
+	__this_cpu_add(xt_recseq.sequence, addend);
+	smp_wmb();
 
-	if (likely(!--lock->readers))
-		write_sequnlock(&lock->lock);
-	local_bh_enable();
+	return addend;
 }
 
-/*
- * The "writer" side needs to get exclusive access to the lock,
- * regardless of readers.  This must be called with bottom half
- * processing (and thus also preemption) disabled.
+/**
+ * xt_write_recseq_end - end of a write section
+ * @addend: return value from previous xt_write_recseq_begin()
+ *
+ * End packet processing : all readers can proceed
+ * 1) Must be called with preemption disabled
+ * 2) softirqs must be disabled too (or we should use irqsafe_cpu_add())
  */
-static inline void xt_info_wrlock(unsigned int cpu)
-{
-	write_seqlock(&per_cpu(xt_info_locks, cpu).lock);
-}
-
-static inline void xt_info_wrunlock(unsigned int cpu)
+static inline void xt_write_recseq_end(unsigned int addend)
 {
-	write_sequnlock(&per_cpu(xt_info_locks, cpu).lock);
+	/* this is kind of a write_seqcount_end(), but addend is 0 or 1 */
+	smp_wmb();
+	__this_cpu_add(xt_recseq.sequence, addend);
 }
 
 /*
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 4b5d457c2d76..2ea743336836 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -260,6 +260,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	void *table_base;
 	const struct xt_table_info *private;
 	struct xt_action_param acpar;
+	unsigned int addend;
 
 	if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
 		return NF_DROP;
@@ -267,7 +268,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	indev = in ? in->name : nulldevname;
 	outdev = out ? out->name : nulldevname;
 
-	xt_info_rdlock_bh();
+	local_bh_disable();
+	addend = xt_write_recseq_begin();
 	private = table->private;
 	table_base = private->entries[smp_processor_id()];
 
@@ -338,7 +340,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 			/* Verdict */
 			break;
 	} while (!acpar.hotdrop);
-	xt_info_rdunlock_bh();
+	xt_write_recseq_end(addend);
+	local_bh_enable();
 
 	if (acpar.hotdrop)
 		return NF_DROP;
@@ -712,7 +715,7 @@ static void get_counters(const struct xt_table_info *t,
 	unsigned int i;
 
 	for_each_possible_cpu(cpu) {
-		seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+		seqcount_t *s = &per_cpu(xt_recseq, cpu);
 
 		i = 0;
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -720,10 +723,10 @@ static void get_counters(const struct xt_table_info *t,
 			unsigned int start;
 
 			do {
-				start = read_seqbegin(lock);
+				start = read_seqcount_begin(s);
 				bcnt = iter->counters.bcnt;
 				pcnt = iter->counters.pcnt;
-			} while (read_seqretry(lock, start));
+			} while (read_seqcount_retry(s, start));
 
 			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i;
@@ -1115,6 +1118,7 @@ static int do_add_counters(struct net *net, const void __user *user,
 	int ret = 0;
 	void *loc_cpu_entry;
 	struct arpt_entry *iter;
+	unsigned int addend;
 #ifdef CONFIG_COMPAT
 	struct compat_xt_counters_info compat_tmp;
 
@@ -1171,12 +1175,12 @@ static int do_add_counters(struct net *net, const void __user *user,
 	/* Choose the copy that is on our node */
 	curcpu = smp_processor_id();
 	loc_cpu_entry = private->entries[curcpu];
-	xt_info_wrlock(curcpu);
+	addend = xt_write_recseq_begin();
 	xt_entry_foreach(iter, loc_cpu_entry, private->size) {
 		ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
 		++i;
 	}
-	xt_info_wrunlock(curcpu);
+	xt_write_recseq_end(addend);
  unlock_up_free:
 	local_bh_enable();
 	xt_table_unlock(t);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ffcea0d1678e..2b6b700949eb 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -68,15 +68,6 @@ void *ipt_alloc_initial_table(const struct xt_table *info)
 }
 EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
 
-/*
-   We keep a set of rules for each CPU, so we can avoid write-locking
-   them in the softirq when updating the counters and therefore
-   only need to read-lock in the softirq; doing a write_lock_bh() in user
-   context stops packets coming through and allows user context to read
-   the counters or update the rules.
-
-   Hence the start of any table is given by get_table() below.  */
-
 /* Returns whether matches rule or not. */
 /* Performance critical - called for every packet */
 static inline bool
@@ -311,6 +302,7 @@ ipt_do_table(struct sk_buff *skb,
 	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
 	struct xt_action_param acpar;
+	unsigned int addend;
 
 	/* Initialization */
 	ip = ip_hdr(skb);
@@ -331,7 +323,8 @@ ipt_do_table(struct sk_buff *skb,
 	acpar.hooknum = hook;
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-	xt_info_rdlock_bh();
+	local_bh_disable();
+	addend = xt_write_recseq_begin();
 	private = table->private;
 	cpu        = smp_processor_id();
 	table_base = private->entries[cpu];
@@ -430,7 +423,9 @@ ipt_do_table(struct sk_buff *skb,
 	pr_debug("Exiting %s; resetting sp from %u to %u\n",
 		 __func__, *stackptr, origptr);
 	*stackptr = origptr;
-	xt_info_rdunlock_bh();
+ 	xt_write_recseq_end(addend);
+ 	local_bh_enable();
+
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
 #else
@@ -886,7 +881,7 @@ get_counters(const struct xt_table_info *t,
 	unsigned int i;
 
 	for_each_possible_cpu(cpu) {
-		seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+		seqcount_t *s = &per_cpu(xt_recseq, cpu);
 
 		i = 0;
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -894,10 +889,10 @@ get_counters(const struct xt_table_info *t,
 			unsigned int start;
 
 			do {
-				start = read_seqbegin(lock);
+				start = read_seqcount_begin(s);
 				bcnt = iter->counters.bcnt;
 				pcnt = iter->counters.pcnt;
-			} while (read_seqretry(lock, start));
+			} while (read_seqcount_retry(s, start));
 
 			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i; /* macro does multi eval of i */
@@ -1312,6 +1307,7 @@ do_add_counters(struct net *net, const void __user *user,
 	int ret = 0;
 	void *loc_cpu_entry;
 	struct ipt_entry *iter;
+	unsigned int addend;
 #ifdef CONFIG_COMPAT
 	struct compat_xt_counters_info compat_tmp;
 
@@ -1368,12 +1364,12 @@ do_add_counters(struct net *net, const void __user *user,
 	/* Choose the copy that is on our node */
 	curcpu = smp_processor_id();
 	loc_cpu_entry = private->entries[curcpu];
-	xt_info_wrlock(curcpu);
+	addend = xt_write_recseq_begin();
 	xt_entry_foreach(iter, loc_cpu_entry, private->size) {
 		ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
 		++i;
 	}
-	xt_info_wrunlock(curcpu);
+	xt_write_recseq_end(addend);
  unlock_up_free:
 	local_bh_enable();
 	xt_table_unlock(t);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 0b2af9b85cec..ec7cf579cdd4 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -340,6 +340,7 @@ ip6t_do_table(struct sk_buff *skb,
 	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
 	struct xt_action_param acpar;
+	unsigned int addend;
 
 	/* Initialization */
 	indev = in ? in->name : nulldevname;
@@ -358,7 +359,8 @@ ip6t_do_table(struct sk_buff *skb,
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 
-	xt_info_rdlock_bh();
+	local_bh_disable();
+	addend = xt_write_recseq_begin();
 	private = table->private;
 	cpu        = smp_processor_id();
 	table_base = private->entries[cpu];
@@ -442,7 +444,9 @@ ip6t_do_table(struct sk_buff *skb,
 	} while (!acpar.hotdrop);
 
 	*stackptr = origptr;
-	xt_info_rdunlock_bh();
+
+ 	xt_write_recseq_end(addend);
+ 	local_bh_enable();
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
@@ -899,7 +903,7 @@ get_counters(const struct xt_table_info *t,
 	unsigned int i;
 
 	for_each_possible_cpu(cpu) {
-		seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+		seqcount_t *s = &per_cpu(xt_recseq, cpu);
 
 		i = 0;
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -907,10 +911,10 @@ get_counters(const struct xt_table_info *t,
 			unsigned int start;
 
 			do {
-				start = read_seqbegin(lock);
+				start = read_seqcount_begin(s);
 				bcnt = iter->counters.bcnt;
 				pcnt = iter->counters.pcnt;
-			} while (read_seqretry(lock, start));
+			} while (read_seqcount_retry(s, start));
 
 			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i;
@@ -1325,6 +1329,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
 	int ret = 0;
 	const void *loc_cpu_entry;
 	struct ip6t_entry *iter;
+	unsigned int addend;
 #ifdef CONFIG_COMPAT
 	struct compat_xt_counters_info compat_tmp;
 
@@ -1381,13 +1386,13 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
 	i = 0;
 	/* Choose the copy that is on our node */
 	curcpu = smp_processor_id();
-	xt_info_wrlock(curcpu);
+	addend = xt_write_recseq_begin();
 	loc_cpu_entry = private->entries[curcpu];
 	xt_entry_foreach(iter, loc_cpu_entry, private->size) {
 		ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
 		++i;
 	}
-	xt_info_wrunlock(curcpu);
+	xt_write_recseq_end(addend);
 
  unlock_up_free:
 	local_bh_enable();
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index a9adf4c6b299..52959efca858 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -762,8 +762,8 @@ void xt_compat_unlock(u_int8_t af)
 EXPORT_SYMBOL_GPL(xt_compat_unlock);
 #endif
 
-DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
-EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
+DEFINE_PER_CPU(seqcount_t, xt_recseq);
+EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
 
 static int xt_jumpstack_alloc(struct xt_table_info *i)
 {
@@ -1362,10 +1362,7 @@ static int __init xt_init(void)
 	int rv;
 
 	for_each_possible_cpu(i) {
-		struct xt_info_lock *lock = &per_cpu(xt_info_locks, i);
-
-		seqlock_init(&lock->lock);
-		lock->readers = 0;
+		seqcount_init(&per_cpu(xt_recseq, i));
 	}
 
 	xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);
-- 
cgit v1.2.3-71-gd317


From d430d3d7e646eb1eac2bb4aa244a644312e67c76 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Wed, 16 Mar 2011 17:29:47 -0400
Subject: jump label: Introduce static_branch() interface

Introduce:

static __always_inline bool static_branch(struct jump_label_key *key);

instead of the old JUMP_LABEL(key, label) macro.

In this way, jump labels become really easy to use:

Define:

        struct jump_label_key jump_key;

Can be used as:

        if (static_branch(&jump_key))
                do unlikely code

enable/disale via:

        jump_label_inc(&jump_key);
        jump_label_dec(&jump_key);

that's it!

For the jump labels disabled case, the static_branch() becomes an
atomic_read(), and jump_label_inc()/dec() are simply atomic_inc(),
atomic_dec() operations. We show testing results for this change below.

Thanks to H. Peter Anvin for suggesting the 'static_branch()' construct.

Since we now require a 'struct jump_label_key *key', we can store a pointer into
the jump table addresses. In this way, we can enable/disable jump labels, in
basically constant time. This change allows us to completely remove the previous
hashtable scheme. Thanks to Peter Zijlstra for this re-write.

Testing:

I ran a series of 'tbench 20' runs 5 times (with reboots) for 3
configurations, where tracepoints were disabled.

jump label configured in
avg: 815.6

jump label *not* configured in (using atomic reads)
avg: 800.1

jump label *not* configured in (regular reads)
avg: 803.4

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20110316212947.GA8792@redhat.com>
Signed-off-by: Jason Baron <jbaron@redhat.com>
Suggested-by: H. Peter Anvin <hpa@linux.intel.com>
Tested-by: David Daney <ddaney@caviumnetworks.com>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/mips/include/asm/jump_label.h  |  22 +-
 arch/sparc/include/asm/jump_label.h |  25 +-
 arch/x86/include/asm/alternative.h  |   3 +-
 arch/x86/include/asm/jump_label.h   |  26 +-
 arch/x86/kernel/alternative.c       |   2 +-
 arch/x86/kernel/module.c            |   1 +
 include/asm-generic/vmlinux.lds.h   |  14 +-
 include/linux/dynamic_debug.h       |   2 -
 include/linux/jump_label.h          |  89 +++---
 include/linux/jump_label_ref.h      |  44 ---
 include/linux/perf_event.h          |  26 +-
 include/linux/tracepoint.h          |  22 +-
 kernel/jump_label.c                 | 539 +++++++++++++++---------------------
 kernel/perf_event.c                 |   4 +-
 kernel/tracepoint.c                 |  23 +-
 15 files changed, 356 insertions(+), 486 deletions(-)
 delete mode 100644 include/linux/jump_label_ref.h

(limited to 'include/linux')

diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index 7622ccf75076..1881b316ca45 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -20,16 +20,18 @@
 #define WORD_INSN ".word"
 #endif
 
-#define JUMP_LABEL(key, label)						\
-	do {								\
-		asm goto("1:\tnop\n\t"					\
-			"nop\n\t"					\
-			".pushsection __jump_table,  \"a\"\n\t"		\
-			WORD_INSN " 1b, %l[" #label "], %0\n\t"		\
-			".popsection\n\t"				\
-			: :  "i" (key) :  : label);			\
-	} while (0)
-
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+	asm goto("1:\tnop\n\t"
+		"nop\n\t"
+		".pushsection __jump_table,  \"aw\"\n\t"
+		WORD_INSN " 1b, %l[l_yes], %0\n\t"
+		".popsection\n\t"
+		: :  "i" (key) : : l_yes);
+	return false;
+l_yes:
+	return true;
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index 427d4684e0d2..fc73a82366f8 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -7,17 +7,20 @@
 
 #define JUMP_LABEL_NOP_SIZE 4
 
-#define JUMP_LABEL(key, label)					\
-	do {							\
-		asm goto("1:\n\t"				\
-			 "nop\n\t"				\
-			 "nop\n\t"				\
-			 ".pushsection __jump_table,  \"a\"\n\t"\
-			 ".align 4\n\t"				\
-			 ".word 1b, %l[" #label "], %c0\n\t"	\
-			 ".popsection \n\t"			\
-			 : :  "i" (key) :  : label);\
-	} while (0)
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+		asm goto("1:\n\t"
+			 "nop\n\t"
+			 "nop\n\t"
+			 ".pushsection __jump_table,  \"aw\"\n\t"
+			 ".align 4\n\t"
+			 ".word 1b, %l[l_yes], %c0\n\t"
+			 ".popsection \n\t"
+			 : :  "i" (key) : : l_yes);
+	return false;
+l_yes:
+	return true;
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 13009d1af99a..8cdd1e247975 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -4,7 +4,6 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/stringify.h>
-#include <linux/jump_label.h>
 #include <asm/asm.h>
 
 /*
@@ -191,7 +190,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
 extern void text_poke_smp_batch(struct text_poke_param *params, int n);
 
-#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
+#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_JUMP_LABEL)
 #define IDEAL_NOP_SIZE_5 5
 extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
 extern void arch_init_ideal_nop5(void);
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 574dbc22893a..f217cee86533 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -5,20 +5,24 @@
 
 #include <linux/types.h>
 #include <asm/nops.h>
+#include <asm/asm.h>
 
 #define JUMP_LABEL_NOP_SIZE 5
 
-# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
-
-# define JUMP_LABEL(key, label)					\
-	do {							\
-		asm goto("1:"					\
-			JUMP_LABEL_INITIAL_NOP			\
-			".pushsection __jump_table,  \"aw\" \n\t"\
-			_ASM_PTR "1b, %l[" #label "], %c0 \n\t" \
-			".popsection \n\t"			\
-			: :  "i" (key) :  : label);		\
-	} while (0)
+#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
+
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+	asm goto("1:"
+		JUMP_LABEL_INITIAL_NOP
+		".pushsection __jump_table,  \"aw\" \n\t"
+		_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
+		".popsection \n\t"
+		: :  "i" (key) : : l_yes);
+	return false;
+l_yes:
+	return true;
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4a234677e213..651454b0c811 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -679,7 +679,7 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
 	__stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
 }
 
-#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
+#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_JUMP_LABEL)
 
 #ifdef CONFIG_X86_64
 unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index ab23f1ad4bf1..52f256f2cc81 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -24,6 +24,7 @@
 #include <linux/bug.h>
 #include <linux/mm.h>
 #include <linux/gfp.h>
+#include <linux/jump_label.h>
 
 #include <asm/system.h>
 #include <asm/page.h>
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 32c45e5fe0ab..79522166d7f1 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -170,6 +170,10 @@
 	STRUCT_ALIGN();							\
 	*(__tracepoints)						\
 	/* implement dynamic printk debug */				\
+	. = ALIGN(8);                                                   \
+	VMLINUX_SYMBOL(__start___jump_table) = .;                       \
+	*(__jump_table)                                                 \
+	VMLINUX_SYMBOL(__stop___jump_table) = .;                        \
 	. = ALIGN(8);							\
 	VMLINUX_SYMBOL(__start___verbose) = .;                          \
 	*(__verbose)                                                    \
@@ -228,8 +232,6 @@
 									\
 	BUG_TABLE							\
 									\
-	JUMP_TABLE							\
-									\
 	/* PCI quirks */						\
 	.pci_fixup        : AT(ADDR(.pci_fixup) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start_pci_fixups_early) = .;		\
@@ -589,14 +591,6 @@
 #define BUG_TABLE
 #endif
 
-#define JUMP_TABLE							\
-	. = ALIGN(8);							\
-	__jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) {		\
-		VMLINUX_SYMBOL(__start___jump_table) = .;		\
-		*(__jump_table)						\
-		VMLINUX_SYMBOL(__stop___jump_table) = .;		\
-	}
-
 #ifdef CONFIG_PM_TRACE
 #define TRACEDATA							\
 	. = ALIGN(4);							\
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 0c9653f11c18..e747ecd48e1c 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -1,8 +1,6 @@
 #ifndef _DYNAMIC_DEBUG_H
 #define _DYNAMIC_DEBUG_H
 
-#include <linux/jump_label.h>
-
 /* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which
  * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
  * use independent hash functions, to reduce the chance of false positives.
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 7880f18e4b86..83e745f3ead7 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -1,20 +1,43 @@
 #ifndef _LINUX_JUMP_LABEL_H
 #define _LINUX_JUMP_LABEL_H
 
+#include <linux/types.h>
+#include <linux/compiler.h>
+
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
+
+struct jump_label_key {
+	atomic_t enabled;
+	struct jump_entry *entries;
+#ifdef CONFIG_MODULES
+	struct jump_label_mod *next;
+#endif
+};
+
 # include <asm/jump_label.h>
 # define HAVE_JUMP_LABEL
 #endif
 
 enum jump_label_type {
+	JUMP_LABEL_DISABLE = 0,
 	JUMP_LABEL_ENABLE,
-	JUMP_LABEL_DISABLE
 };
 
 struct module;
 
 #ifdef HAVE_JUMP_LABEL
 
+#ifdef CONFIG_MODULES
+#define JUMP_LABEL_INIT {{ 0 }, NULL, NULL}
+#else
+#define JUMP_LABEL_INIT {{ 0 }, NULL}
+#endif
+
+static __always_inline bool static_branch(struct jump_label_key *key)
+{
+	return arch_static_branch(key);
+}
+
 extern struct jump_entry __start___jump_table[];
 extern struct jump_entry __stop___jump_table[];
 
@@ -23,37 +46,37 @@ extern void jump_label_unlock(void);
 extern void arch_jump_label_transform(struct jump_entry *entry,
 				 enum jump_label_type type);
 extern void arch_jump_label_text_poke_early(jump_label_t addr);
-extern void jump_label_update(unsigned long key, enum jump_label_type type);
-extern void jump_label_apply_nops(struct module *mod);
 extern int jump_label_text_reserved(void *start, void *end);
+extern void jump_label_inc(struct jump_label_key *key);
+extern void jump_label_dec(struct jump_label_key *key);
+extern bool jump_label_enabled(struct jump_label_key *key);
+extern void jump_label_apply_nops(struct module *mod);
 
-#define jump_label_enable(key) \
-	jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE);
+#else
 
-#define jump_label_disable(key) \
-	jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE);
+#include <asm/atomic.h>
 
-#else
+#define JUMP_LABEL_INIT {ATOMIC_INIT(0)}
 
-#define JUMP_LABEL(key, label)			\
-do {						\
-	if (unlikely(*key))			\
-		goto label;			\
-} while (0)
+struct jump_label_key {
+	atomic_t enabled;
+};
 
-#define jump_label_enable(cond_var)	\
-do {					\
-       *(cond_var) = 1;			\
-} while (0)
+static __always_inline bool static_branch(struct jump_label_key *key)
+{
+	if (unlikely(atomic_read(&key->enabled)))
+		return true;
+	return false;
+}
 
-#define jump_label_disable(cond_var)	\
-do {					\
-       *(cond_var) = 0;			\
-} while (0)
+static inline void jump_label_inc(struct jump_label_key *key)
+{
+	atomic_inc(&key->enabled);
+}
 
-static inline int jump_label_apply_nops(struct module *mod)
+static inline void jump_label_dec(struct jump_label_key *key)
 {
-	return 0;
+	atomic_dec(&key->enabled);
 }
 
 static inline int jump_label_text_reserved(void *start, void *end)
@@ -64,16 +87,16 @@ static inline int jump_label_text_reserved(void *start, void *end)
 static inline void jump_label_lock(void) {}
 static inline void jump_label_unlock(void) {}
 
-#endif
+static inline bool jump_label_enabled(struct jump_label_key *key)
+{
+	return !!atomic_read(&key->enabled);
+}
 
-#define COND_STMT(key, stmt)					\
-do {								\
-	__label__ jl_enabled;					\
-	JUMP_LABEL(key, jl_enabled);				\
-	if (0) {						\
-jl_enabled:							\
-		stmt;						\
-	}							\
-} while (0)
+static inline int jump_label_apply_nops(struct module *mod)
+{
+	return 0;
+}
+
+#endif
 
 #endif
diff --git a/include/linux/jump_label_ref.h b/include/linux/jump_label_ref.h
deleted file mode 100644
index e5d012ad92c6..000000000000
--- a/include/linux/jump_label_ref.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef _LINUX_JUMP_LABEL_REF_H
-#define _LINUX_JUMP_LABEL_REF_H
-
-#include <linux/jump_label.h>
-#include <asm/atomic.h>
-
-#ifdef HAVE_JUMP_LABEL
-
-static inline void jump_label_inc(atomic_t *key)
-{
-	if (atomic_add_return(1, key) == 1)
-		jump_label_enable(key);
-}
-
-static inline void jump_label_dec(atomic_t *key)
-{
-	if (atomic_dec_and_test(key))
-		jump_label_disable(key);
-}
-
-#else /* !HAVE_JUMP_LABEL */
-
-static inline void jump_label_inc(atomic_t *key)
-{
-	atomic_inc(key);
-}
-
-static inline void jump_label_dec(atomic_t *key)
-{
-	atomic_dec(key);
-}
-
-#undef JUMP_LABEL
-#define JUMP_LABEL(key, label)						\
-do {									\
-	if (unlikely(__builtin_choose_expr(				\
-	      __builtin_types_compatible_p(typeof(key), atomic_t *),	\
-	      atomic_read((atomic_t *)(key)), *(key))))			\
-		goto label;						\
-} while (0)
-
-#endif /* HAVE_JUMP_LABEL */
-
-#endif /* _LINUX_JUMP_LABEL_REF_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 311b4dc785a1..730b7821690f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -505,7 +505,7 @@ struct perf_guest_info_callbacks {
 #include <linux/ftrace.h>
 #include <linux/cpu.h>
 #include <linux/irq_work.h>
-#include <linux/jump_label_ref.h>
+#include <linux/jump_label.h>
 #include <asm/atomic.h>
 #include <asm/local.h>
 
@@ -1034,7 +1034,7 @@ static inline int is_software_event(struct perf_event *event)
 	return event->pmu->task_ctx_nr == perf_sw_context;
 }
 
-extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
+extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
 
@@ -1063,22 +1063,21 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 {
 	struct pt_regs hot_regs;
 
-	JUMP_LABEL(&perf_swevent_enabled[event_id], have_event);
-	return;
-
-have_event:
-	if (!regs) {
-		perf_fetch_caller_regs(&hot_regs);
-		regs = &hot_regs;
+	if (static_branch(&perf_swevent_enabled[event_id])) {
+		if (!regs) {
+			perf_fetch_caller_regs(&hot_regs);
+			regs = &hot_regs;
+		}
+		__perf_sw_event(event_id, nr, nmi, regs, addr);
 	}
-	__perf_sw_event(event_id, nr, nmi, regs, addr);
 }
 
-extern atomic_t perf_sched_events;
+extern struct jump_label_key perf_sched_events;
 
 static inline void perf_event_task_sched_in(struct task_struct *task)
 {
-	COND_STMT(&perf_sched_events, __perf_event_task_sched_in(task));
+	if (static_branch(&perf_sched_events))
+		__perf_event_task_sched_in(task);
 }
 
 static inline
@@ -1086,7 +1085,8 @@ void perf_event_task_sched_out(struct task_struct *task, struct task_struct *nex
 {
 	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
 
-	COND_STMT(&perf_sched_events, __perf_event_task_sched_out(task, next));
+	if (static_branch(&perf_sched_events))
+		__perf_event_task_sched_out(task, next);
 }
 
 extern void perf_event_mmap(struct vm_area_struct *vma);
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 97c84a58efb8..d530a4460a0b 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -29,7 +29,7 @@ struct tracepoint_func {
 
 struct tracepoint {
 	const char *name;		/* Tracepoint name */
-	int state;			/* State. */
+	struct jump_label_key key;
 	void (*regfunc)(void);
 	void (*unregfunc)(void);
 	struct tracepoint_func __rcu *funcs;
@@ -146,9 +146,7 @@ void tracepoint_update_probe_range(struct tracepoint * const *begin,
 	extern struct tracepoint __tracepoint_##name;			\
 	static inline void trace_##name(proto)				\
 	{								\
-		JUMP_LABEL(&__tracepoint_##name.state, do_trace);	\
-		return;							\
-do_trace:								\
+		if (static_branch(&__tracepoint_##name.key))		\
 			__DO_TRACE(&__tracepoint_##name,		\
 				TP_PROTO(data_proto),			\
 				TP_ARGS(data_args),			\
@@ -176,14 +174,14 @@ do_trace:								\
  * structures, so we create an array of pointers that will be used for iteration
  * on the tracepoints.
  */
-#define DEFINE_TRACE_FN(name, reg, unreg)				\
-	static const char __tpstrtab_##name[]				\
-	__attribute__((section("__tracepoints_strings"))) = #name;	\
-	struct tracepoint __tracepoint_##name				\
-	__attribute__((section("__tracepoints"))) =			\
-		{ __tpstrtab_##name, 0, reg, unreg, NULL };		\
-	static struct tracepoint * const __tracepoint_ptr_##name __used	\
-	__attribute__((section("__tracepoints_ptrs"))) =		\
+#define DEFINE_TRACE_FN(name, reg, unreg)				 \
+	static const char __tpstrtab_##name[]				 \
+	__attribute__((section("__tracepoints_strings"))) = #name;	 \
+	struct tracepoint __tracepoint_##name				 \
+	__attribute__((section("__tracepoints"))) =			 \
+		{ __tpstrtab_##name, JUMP_LABEL_INIT, reg, unreg, NULL };\
+	static struct tracepoint * const __tracepoint_ptr_##name __used	 \
+	__attribute__((section("__tracepoints_ptrs"))) =		 \
 		&__tracepoint_##name;
 
 #define DEFINE_TRACE(name)						\
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 3b79bd938330..74d1c099fbd1 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -2,43 +2,23 @@
  * jump label support
  *
  * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
+ * Copyright (C) 2011 Peter Zijlstra <pzijlstr@redhat.com>
  *
  */
-#include <linux/jump_label.h>
 #include <linux/memory.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/list.h>
-#include <linux/jhash.h>
 #include <linux/slab.h>
 #include <linux/sort.h>
 #include <linux/err.h>
+#include <linux/jump_label.h>
 
 #ifdef HAVE_JUMP_LABEL
 
-#define JUMP_LABEL_HASH_BITS 6
-#define JUMP_LABEL_TABLE_SIZE (1 << JUMP_LABEL_HASH_BITS)
-static struct hlist_head jump_label_table[JUMP_LABEL_TABLE_SIZE];
-
 /* mutex to protect coming/going of the the jump_label table */
 static DEFINE_MUTEX(jump_label_mutex);
 
-struct jump_label_entry {
-	struct hlist_node hlist;
-	struct jump_entry *table;
-	int nr_entries;
-	/* hang modules off here */
-	struct hlist_head modules;
-	unsigned long key;
-};
-
-struct jump_label_module_entry {
-	struct hlist_node hlist;
-	struct jump_entry *table;
-	int nr_entries;
-	struct module *mod;
-};
-
 void jump_label_lock(void)
 {
 	mutex_lock(&jump_label_mutex);
@@ -49,6 +29,11 @@ void jump_label_unlock(void)
 	mutex_unlock(&jump_label_mutex);
 }
 
+bool jump_label_enabled(struct jump_label_key *key)
+{
+	return !!atomic_read(&key->enabled);
+}
+
 static int jump_label_cmp(const void *a, const void *b)
 {
 	const struct jump_entry *jea = a;
@@ -64,7 +49,7 @@ static int jump_label_cmp(const void *a, const void *b)
 }
 
 static void
-sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop)
+jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop)
 {
 	unsigned long size;
 
@@ -73,118 +58,25 @@ sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop)
 	sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL);
 }
 
-static struct jump_label_entry *get_jump_label_entry(jump_label_t key)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct jump_label_entry *e;
-	u32 hash = jhash((void *)&key, sizeof(jump_label_t), 0);
-
-	head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)];
-	hlist_for_each_entry(e, node, head, hlist) {
-		if (key == e->key)
-			return e;
-	}
-	return NULL;
-}
+static void jump_label_update(struct jump_label_key *key, int enable);
 
-static struct jump_label_entry *
-add_jump_label_entry(jump_label_t key, int nr_entries, struct jump_entry *table)
+void jump_label_inc(struct jump_label_key *key)
 {
-	struct hlist_head *head;
-	struct jump_label_entry *e;
-	u32 hash;
-
-	e = get_jump_label_entry(key);
-	if (e)
-		return ERR_PTR(-EEXIST);
-
-	e = kmalloc(sizeof(struct jump_label_entry), GFP_KERNEL);
-	if (!e)
-		return ERR_PTR(-ENOMEM);
-
-	hash = jhash((void *)&key, sizeof(jump_label_t), 0);
-	head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)];
-	e->key = key;
-	e->table = table;
-	e->nr_entries = nr_entries;
-	INIT_HLIST_HEAD(&(e->modules));
-	hlist_add_head(&e->hlist, head);
-	return e;
-}
+	if (atomic_inc_not_zero(&key->enabled))
+		return;
 
-static int
-build_jump_label_hashtable(struct jump_entry *start, struct jump_entry *stop)
-{
-	struct jump_entry *iter, *iter_begin;
-	struct jump_label_entry *entry;
-	int count;
-
-	sort_jump_label_entries(start, stop);
-	iter = start;
-	while (iter < stop) {
-		entry = get_jump_label_entry(iter->key);
-		if (!entry) {
-			iter_begin = iter;
-			count = 0;
-			while ((iter < stop) &&
-				(iter->key == iter_begin->key)) {
-				iter++;
-				count++;
-			}
-			entry = add_jump_label_entry(iter_begin->key,
-							count, iter_begin);
-			if (IS_ERR(entry))
-				return PTR_ERR(entry);
-		 } else {
-			WARN_ONCE(1, KERN_ERR "build_jump_hashtable: unexpected entry!\n");
-			return -1;
-		}
-	}
-	return 0;
+	jump_label_lock();
+	if (atomic_add_return(1, &key->enabled) == 1)
+		jump_label_update(key, JUMP_LABEL_ENABLE);
+	jump_label_unlock();
 }
 
-/***
- * jump_label_update - update jump label text
- * @key -  key value associated with a a jump label
- * @type - enum set to JUMP_LABEL_ENABLE or JUMP_LABEL_DISABLE
- *
- * Will enable/disable the jump for jump label @key, depending on the
- * value of @type.
- *
- */
-
-void jump_label_update(unsigned long key, enum jump_label_type type)
+void jump_label_dec(struct jump_label_key *key)
 {
-	struct jump_entry *iter;
-	struct jump_label_entry *entry;
-	struct hlist_node *module_node;
-	struct jump_label_module_entry *e_module;
-	int count;
+	if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex))
+		return;
 
-	jump_label_lock();
-	entry = get_jump_label_entry((jump_label_t)key);
-	if (entry) {
-		count = entry->nr_entries;
-		iter = entry->table;
-		while (count--) {
-			if (kernel_text_address(iter->code))
-				arch_jump_label_transform(iter, type);
-			iter++;
-		}
-		/* eanble/disable jump labels in modules */
-		hlist_for_each_entry(e_module, module_node, &(entry->modules),
-							hlist) {
-			count = e_module->nr_entries;
-			iter = e_module->table;
-			while (count--) {
-				if (iter->key &&
-						kernel_text_address(iter->code))
-					arch_jump_label_transform(iter, type);
-				iter++;
-			}
-		}
-	}
+	jump_label_update(key, JUMP_LABEL_DISABLE);
 	jump_label_unlock();
 }
 
@@ -197,77 +89,33 @@ static int addr_conflict(struct jump_entry *entry, void *start, void *end)
 	return 0;
 }
 
-#ifdef CONFIG_MODULES
-
-static int module_conflict(void *start, void *end)
+static int __jump_label_text_reserved(struct jump_entry *iter_start,
+		struct jump_entry *iter_stop, void *start, void *end)
 {
-	struct hlist_head *head;
-	struct hlist_node *node, *node_next, *module_node, *module_node_next;
-	struct jump_label_entry *e;
-	struct jump_label_module_entry *e_module;
 	struct jump_entry *iter;
-	int i, count;
-	int conflict = 0;
-
-	for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
-		head = &jump_label_table[i];
-		hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
-			hlist_for_each_entry_safe(e_module, module_node,
-							module_node_next,
-							&(e->modules), hlist) {
-				count = e_module->nr_entries;
-				iter = e_module->table;
-				while (count--) {
-					if (addr_conflict(iter, start, end)) {
-						conflict = 1;
-						goto out;
-					}
-					iter++;
-				}
-			}
-		}
-	}
-out:
-	return conflict;
-}
-
-#endif
-
-/***
- * jump_label_text_reserved - check if addr range is reserved
- * @start: start text addr
- * @end: end text addr
- *
- * checks if the text addr located between @start and @end
- * overlaps with any of the jump label patch addresses. Code
- * that wants to modify kernel text should first verify that
- * it does not overlap with any of the jump label addresses.
- * Caller must hold jump_label_mutex.
- *
- * returns 1 if there is an overlap, 0 otherwise
- */
-int jump_label_text_reserved(void *start, void *end)
-{
-	struct jump_entry *iter;
-	struct jump_entry *iter_start = __start___jump_table;
-	struct jump_entry *iter_stop = __start___jump_table;
-	int conflict = 0;
 
 	iter = iter_start;
 	while (iter < iter_stop) {
-		if (addr_conflict(iter, start, end)) {
-			conflict = 1;
-			goto out;
-		}
+		if (addr_conflict(iter, start, end))
+			return 1;
 		iter++;
 	}
 
-	/* now check modules */
-#ifdef CONFIG_MODULES
-	conflict = module_conflict(start, end);
-#endif
-out:
-	return conflict;
+	return 0;
+}
+
+static void __jump_label_update(struct jump_label_key *key,
+		struct jump_entry *entry, int enable)
+{
+	for (; entry->key == (jump_label_t)(unsigned long)key; entry++) {
+		/*
+		 * entry->code set to 0 invalidates module init text sections
+		 * kernel_text_address() verifies we are not in core kernel
+		 * init code, see jump_label_invalidate_module_init().
+		 */
+		if (entry->code && kernel_text_address(entry->code))
+			arch_jump_label_transform(entry, enable);
+	}
 }
 
 /*
@@ -277,142 +125,173 @@ void __weak arch_jump_label_text_poke_early(jump_label_t addr)
 {
 }
 
-static __init int init_jump_label(void)
+static __init int jump_label_init(void)
 {
-	int ret;
 	struct jump_entry *iter_start = __start___jump_table;
 	struct jump_entry *iter_stop = __stop___jump_table;
+	struct jump_label_key *key = NULL;
 	struct jump_entry *iter;
 
 	jump_label_lock();
-	ret = build_jump_label_hashtable(__start___jump_table,
-					 __stop___jump_table);
-	iter = iter_start;
-	while (iter < iter_stop) {
+	jump_label_sort_entries(iter_start, iter_stop);
+
+	for (iter = iter_start; iter < iter_stop; iter++) {
 		arch_jump_label_text_poke_early(iter->code);
-		iter++;
+		if (iter->key == (jump_label_t)(unsigned long)key)
+			continue;
+
+		key = (struct jump_label_key *)(unsigned long)iter->key;
+		atomic_set(&key->enabled, 0);
+		key->entries = iter;
+#ifdef CONFIG_MODULES
+		key->next = NULL;
+#endif
 	}
 	jump_label_unlock();
-	return ret;
+
+	return 0;
 }
-early_initcall(init_jump_label);
+early_initcall(jump_label_init);
 
 #ifdef CONFIG_MODULES
 
-static struct jump_label_module_entry *
-add_jump_label_module_entry(struct jump_label_entry *entry,
-			    struct jump_entry *iter_begin,
-			    int count, struct module *mod)
+struct jump_label_mod {
+	struct jump_label_mod *next;
+	struct jump_entry *entries;
+	struct module *mod;
+};
+
+static int __jump_label_mod_text_reserved(void *start, void *end)
+{
+	struct module *mod;
+
+	mod = __module_text_address((unsigned long)start);
+	if (!mod)
+		return 0;
+
+	WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod);
+
+	return __jump_label_text_reserved(mod->jump_entries,
+				mod->jump_entries + mod->num_jump_entries,
+				start, end);
+}
+
+static void __jump_label_mod_update(struct jump_label_key *key, int enable)
+{
+	struct jump_label_mod *mod = key->next;
+
+	while (mod) {
+		__jump_label_update(key, mod->entries, enable);
+		mod = mod->next;
+	}
+}
+
+/***
+ * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop()
+ * @mod: module to patch
+ *
+ * Allow for run-time selection of the optimal nops. Before the module
+ * loads patch these with arch_get_jump_label_nop(), which is specified by
+ * the arch specific jump label code.
+ */
+void jump_label_apply_nops(struct module *mod)
 {
-	struct jump_label_module_entry *e;
-
-	e = kmalloc(sizeof(struct jump_label_module_entry), GFP_KERNEL);
-	if (!e)
-		return ERR_PTR(-ENOMEM);
-	e->mod = mod;
-	e->nr_entries = count;
-	e->table = iter_begin;
-	hlist_add_head(&e->hlist, &entry->modules);
-	return e;
+	struct jump_entry *iter_start = mod->jump_entries;
+	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
+	struct jump_entry *iter;
+
+	/* if the module doesn't have jump label entries, just return */
+	if (iter_start == iter_stop)
+		return;
+
+	for (iter = iter_start; iter < iter_stop; iter++)
+		arch_jump_label_text_poke_early(iter->code);
 }
 
-static int add_jump_label_module(struct module *mod)
+static int jump_label_add_module(struct module *mod)
 {
-	struct jump_entry *iter, *iter_begin;
-	struct jump_label_entry *entry;
-	struct jump_label_module_entry *module_entry;
-	int count;
+	struct jump_entry *iter_start = mod->jump_entries;
+	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
+	struct jump_entry *iter;
+	struct jump_label_key *key = NULL;
+	struct jump_label_mod *jlm;
 
 	/* if the module doesn't have jump label entries, just return */
-	if (!mod->num_jump_entries)
+	if (iter_start == iter_stop)
 		return 0;
 
-	sort_jump_label_entries(mod->jump_entries,
-				mod->jump_entries + mod->num_jump_entries);
-	iter = mod->jump_entries;
-	while (iter < mod->jump_entries + mod->num_jump_entries) {
-		entry = get_jump_label_entry(iter->key);
-		iter_begin = iter;
-		count = 0;
-		while ((iter < mod->jump_entries + mod->num_jump_entries) &&
-			(iter->key == iter_begin->key)) {
-				iter++;
-				count++;
-		}
-		if (!entry) {
-			entry = add_jump_label_entry(iter_begin->key, 0, NULL);
-			if (IS_ERR(entry))
-				return PTR_ERR(entry);
+	jump_label_sort_entries(iter_start, iter_stop);
+
+	for (iter = iter_start; iter < iter_stop; iter++) {
+		if (iter->key == (jump_label_t)(unsigned long)key)
+			continue;
+
+		key = (struct jump_label_key *)(unsigned long)iter->key;
+
+		if (__module_address(iter->key) == mod) {
+			atomic_set(&key->enabled, 0);
+			key->entries = iter;
+			key->next = NULL;
+			continue;
 		}
-		module_entry = add_jump_label_module_entry(entry, iter_begin,
-							   count, mod);
-		if (IS_ERR(module_entry))
-			return PTR_ERR(module_entry);
+
+		jlm = kzalloc(sizeof(struct jump_label_mod), GFP_KERNEL);
+		if (!jlm)
+			return -ENOMEM;
+
+		jlm->mod = mod;
+		jlm->entries = iter;
+		jlm->next = key->next;
+		key->next = jlm;
+
+		if (jump_label_enabled(key))
+			__jump_label_update(key, iter, JUMP_LABEL_ENABLE);
 	}
+
 	return 0;
 }
 
-static void remove_jump_label_module(struct module *mod)
+static void jump_label_del_module(struct module *mod)
 {
-	struct hlist_head *head;
-	struct hlist_node *node, *node_next, *module_node, *module_node_next;
-	struct jump_label_entry *e;
-	struct jump_label_module_entry *e_module;
-	int i;
+	struct jump_entry *iter_start = mod->jump_entries;
+	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
+	struct jump_entry *iter;
+	struct jump_label_key *key = NULL;
+	struct jump_label_mod *jlm, **prev;
 
-	/* if the module doesn't have jump label entries, just return */
-	if (!mod->num_jump_entries)
-		return;
+	for (iter = iter_start; iter < iter_stop; iter++) {
+		if (iter->key == (jump_label_t)(unsigned long)key)
+			continue;
+
+		key = (struct jump_label_key *)(unsigned long)iter->key;
+
+		if (__module_address(iter->key) == mod)
+			continue;
+
+		prev = &key->next;
+		jlm = key->next;
 
-	for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
-		head = &jump_label_table[i];
-		hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
-			hlist_for_each_entry_safe(e_module, module_node,
-						  module_node_next,
-						  &(e->modules), hlist) {
-				if (e_module->mod == mod) {
-					hlist_del(&e_module->hlist);
-					kfree(e_module);
-				}
-			}
-			if (hlist_empty(&e->modules) && (e->nr_entries == 0)) {
-				hlist_del(&e->hlist);
-				kfree(e);
-			}
+		while (jlm && jlm->mod != mod) {
+			prev = &jlm->next;
+			jlm = jlm->next;
+		}
+
+		if (jlm) {
+			*prev = jlm->next;
+			kfree(jlm);
 		}
 	}
 }
 
-static void remove_jump_label_module_init(struct module *mod)
+static void jump_label_invalidate_module_init(struct module *mod)
 {
-	struct hlist_head *head;
-	struct hlist_node *node, *node_next, *module_node, *module_node_next;
-	struct jump_label_entry *e;
-	struct jump_label_module_entry *e_module;
+	struct jump_entry *iter_start = mod->jump_entries;
+	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
 	struct jump_entry *iter;
-	int i, count;
-
-	/* if the module doesn't have jump label entries, just return */
-	if (!mod->num_jump_entries)
-		return;
 
-	for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
-		head = &jump_label_table[i];
-		hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
-			hlist_for_each_entry_safe(e_module, module_node,
-						  module_node_next,
-						  &(e->modules), hlist) {
-				if (e_module->mod != mod)
-					continue;
-				count = e_module->nr_entries;
-				iter = e_module->table;
-				while (count--) {
-					if (within_module_init(iter->code, mod))
-						iter->key = 0;
-					iter++;
-				}
-			}
-		}
+	for (iter = iter_start; iter < iter_stop; iter++) {
+		if (within_module_init(iter->code, mod))
+			iter->code = 0;
 	}
 }
 
@@ -426,59 +305,77 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
 	switch (val) {
 	case MODULE_STATE_COMING:
 		jump_label_lock();
-		ret = add_jump_label_module(mod);
+		ret = jump_label_add_module(mod);
 		if (ret)
-			remove_jump_label_module(mod);
+			jump_label_del_module(mod);
 		jump_label_unlock();
 		break;
 	case MODULE_STATE_GOING:
 		jump_label_lock();
-		remove_jump_label_module(mod);
+		jump_label_del_module(mod);
 		jump_label_unlock();
 		break;
 	case MODULE_STATE_LIVE:
 		jump_label_lock();
-		remove_jump_label_module_init(mod);
+		jump_label_invalidate_module_init(mod);
 		jump_label_unlock();
 		break;
 	}
-	return ret;
-}
 
-/***
- * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop()
- * @mod: module to patch
- *
- * Allow for run-time selection of the optimal nops. Before the module
- * loads patch these with arch_get_jump_label_nop(), which is specified by
- * the arch specific jump label code.
- */
-void jump_label_apply_nops(struct module *mod)
-{
-	struct jump_entry *iter;
-
-	/* if the module doesn't have jump label entries, just return */
-	if (!mod->num_jump_entries)
-		return;
-
-	iter = mod->jump_entries;
-	while (iter < mod->jump_entries + mod->num_jump_entries) {
-		arch_jump_label_text_poke_early(iter->code);
-		iter++;
-	}
+	return notifier_from_errno(ret);
 }
 
 struct notifier_block jump_label_module_nb = {
 	.notifier_call = jump_label_module_notify,
-	.priority = 0,
+	.priority = 1, /* higher than tracepoints */
 };
 
-static __init int init_jump_label_module(void)
+static __init int jump_label_init_module(void)
 {
 	return register_module_notifier(&jump_label_module_nb);
 }
-early_initcall(init_jump_label_module);
+early_initcall(jump_label_init_module);
 
 #endif /* CONFIG_MODULES */
 
+/***
+ * jump_label_text_reserved - check if addr range is reserved
+ * @start: start text addr
+ * @end: end text addr
+ *
+ * checks if the text addr located between @start and @end
+ * overlaps with any of the jump label patch addresses. Code
+ * that wants to modify kernel text should first verify that
+ * it does not overlap with any of the jump label addresses.
+ * Caller must hold jump_label_mutex.
+ *
+ * returns 1 if there is an overlap, 0 otherwise
+ */
+int jump_label_text_reserved(void *start, void *end)
+{
+	int ret = __jump_label_text_reserved(__start___jump_table,
+			__stop___jump_table, start, end);
+
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_MODULES
+	ret = __jump_label_mod_text_reserved(start, end);
+#endif
+	return ret;
+}
+
+static void jump_label_update(struct jump_label_key *key, int enable)
+{
+	struct jump_entry *entry = key->entries;
+
+	/* if there are no users, entry can be NULL */
+	if (entry)
+		__jump_label_update(key, entry, enable);
+
+#ifdef CONFIG_MODULES
+	__jump_label_mod_update(key, enable);
+#endif
+}
+
 #endif
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index c75925c4d1e2..d665e92fbd44 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -125,7 +125,7 @@ enum event_type_t {
  * perf_sched_events : >0 events exist
  * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
  */
-atomic_t perf_sched_events __read_mostly;
+struct jump_label_key perf_sched_events __read_mostly;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 
 static atomic_t nr_mmap_events __read_mostly;
@@ -5417,7 +5417,7 @@ fail:
 	return err;
 }
 
-atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
+struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 static void sw_perf_event_destroy(struct perf_event *event)
 {
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 68187af4889e..b219f1449c54 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -251,9 +251,9 @@ static void set_tracepoint(struct tracepoint_entry **entry,
 {
 	WARN_ON(strcmp((*entry)->name, elem->name) != 0);
 
-	if (elem->regfunc && !elem->state && active)
+	if (elem->regfunc && !jump_label_enabled(&elem->key) && active)
 		elem->regfunc();
-	else if (elem->unregfunc && elem->state && !active)
+	else if (elem->unregfunc && jump_label_enabled(&elem->key) && !active)
 		elem->unregfunc();
 
 	/*
@@ -264,13 +264,10 @@ static void set_tracepoint(struct tracepoint_entry **entry,
 	 * is used.
 	 */
 	rcu_assign_pointer(elem->funcs, (*entry)->funcs);
-	if (!elem->state && active) {
-		jump_label_enable(&elem->state);
-		elem->state = active;
-	} else if (elem->state && !active) {
-		jump_label_disable(&elem->state);
-		elem->state = active;
-	}
+	if (active && !jump_label_enabled(&elem->key))
+		jump_label_inc(&elem->key);
+	else if (!active && jump_label_enabled(&elem->key))
+		jump_label_dec(&elem->key);
 }
 
 /*
@@ -281,13 +278,11 @@ static void set_tracepoint(struct tracepoint_entry **entry,
  */
 static void disable_tracepoint(struct tracepoint *elem)
 {
-	if (elem->unregfunc && elem->state)
+	if (elem->unregfunc && jump_label_enabled(&elem->key))
 		elem->unregfunc();
 
-	if (elem->state) {
-		jump_label_disable(&elem->state);
-		elem->state = 0;
-	}
+	if (jump_label_enabled(&elem->key))
+		jump_label_dec(&elem->key);
 	rcu_assign_pointer(elem->funcs, NULL);
 }
 
-- 
cgit v1.2.3-71-gd317


From 0545a3037773512d3448557ba048cebb73b3e4af Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 4 Apr 2011 05:30:58 +0000
Subject: pkt_sched: QFQ - quick fair queue scheduler

This is an implementation of the Quick Fair Queue scheduler developed
by Fabio Checconi. The same algorithm is already implemented in ipfw
in FreeBSD. Fabio had an earlier version developed on Linux, I just
cleaned it up.  Thanks to Eric Dumazet for testing this under load.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h |   15 +
 net/sched/Kconfig         |   11 +
 net/sched/Makefile        |    1 +
 net/sched/sch_qfq.c       | 1137 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 1164 insertions(+)
 create mode 100644 net/sched/sch_qfq.c

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index b1032a3fafdc..8062e0a68dda 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -588,4 +588,19 @@ struct tc_sfb_xstats {
 
 #define SFB_MAX_PROB 0xFFFF
 
+/* QFQ */
+enum {
+	TCA_QFQ_UNSPEC,
+	TCA_QFQ_WEIGHT,
+	TCA_QFQ_LMAX,
+	__TCA_QFQ_MAX
+};
+
+#define TCA_QFQ_MAX	(__TCA_QFQ_MAX - 1)
+
+struct tc_qfq_stats {
+	__u32 weight;
+	__u32 lmax;
+};
+
 #endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index a7a5583d4f68..aeaa2110b699 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -239,6 +239,17 @@ config NET_SCH_CHOKE
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_choke.
 
+config NET_SCH_QFQ
+	tristate "Quick Fair Queueing scheduler (QFQ)"
+	help
+	  Say Y here if you want to use the Quick Fair Queueing Scheduler (QFQ)
+	  packet scheduling algorithm.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called sch_qfq.
+
+	  If unsure, say N.
+
 config NET_SCH_INGRESS
 	tristate "Ingress Qdisc"
 	depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 2e77b8dba22e..dc5889c0a15a 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)	+= sch_drr.o
 obj-$(CONFIG_NET_SCH_MQPRIO)	+= sch_mqprio.o
 obj-$(CONFIG_NET_SCH_CHOKE)	+= sch_choke.o
+obj-$(CONFIG_NET_SCH_QFQ)	+= sch_qfq.o
 
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
new file mode 100644
index 000000000000..103343408593
--- /dev/null
+++ b/net/sched/sch_qfq.c
@@ -0,0 +1,1137 @@
+/*
+ * net/sched/sch_qfq.c         Quick Fair Queueing Scheduler.
+ *
+ * Copyright (c) 2009 Fabio Checconi, Luigi Rizzo, and Paolo Valente.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/pkt_sched.h>
+#include <net/sch_generic.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+
+
+/*  Quick Fair Queueing
+    ===================
+
+    Sources:
+
+    Fabio Checconi, Luigi Rizzo, and Paolo Valente: "QFQ: Efficient
+    Packet Scheduling with Tight Bandwidth Distribution Guarantees."
+
+    See also:
+    http://retis.sssup.it/~fabio/linux/qfq/
+ */
+
+/*
+
+  Virtual time computations.
+
+  S, F and V are all computed in fixed point arithmetic with
+  FRAC_BITS decimal bits.
+
+  QFQ_MAX_INDEX is the maximum index allowed for a group. We need
+	one bit per index.
+  QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
+
+  The layout of the bits is as below:
+
+                   [ MTU_SHIFT ][      FRAC_BITS    ]
+                   [ MAX_INDEX    ][ MIN_SLOT_SHIFT ]
+				 ^.__grp->index = 0
+				 *.__grp->slot_shift
+
+  where MIN_SLOT_SHIFT is derived by difference from the others.
+
+  The max group index corresponds to Lmax/w_min, where
+  Lmax=1<<MTU_SHIFT, w_min = 1 .
+  From this, and knowing how many groups (MAX_INDEX) we want,
+  we can derive the shift corresponding to each group.
+
+  Because we often need to compute
+	F = S + len/w_i  and V = V + len/wsum
+  instead of storing w_i store the value
+	inv_w = (1<<FRAC_BITS)/w_i
+  so we can do F = S + len * inv_w * wsum.
+  We use W_TOT in the formulas so we can easily move between
+  static and adaptive weight sum.
+
+  The per-scheduler-instance data contain all the data structures
+  for the scheduler: bitmaps and bucket lists.
+
+ */
+
+/*
+ * Maximum number of consecutive slots occupied by backlogged classes
+ * inside a group.
+ */
+#define QFQ_MAX_SLOTS	32
+
+/*
+ * Shifts used for class<->group mapping.  We allow class weights that are
+ * in the range [1, 2^MAX_WSHIFT], and we try to map each class i to the
+ * group with the smallest index that can support the L_i / r_i configured
+ * for the class.
+ *
+ * grp->index is the index of the group; and grp->slot_shift
+ * is the shift for the corresponding (scaled) sigma_i.
+ */
+#define QFQ_MAX_INDEX		19
+#define QFQ_MAX_WSHIFT		16
+
+#define	QFQ_MAX_WEIGHT		(1<<QFQ_MAX_WSHIFT)
+#define QFQ_MAX_WSUM		(2*QFQ_MAX_WEIGHT)
+
+#define FRAC_BITS		30	/* fixed point arithmetic */
+#define ONE_FP			(1UL << FRAC_BITS)
+#define IWSUM			(ONE_FP/QFQ_MAX_WSUM)
+
+#define QFQ_MTU_SHIFT		11
+#define QFQ_MIN_SLOT_SHIFT	(FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
+
+/*
+ * Possible group states.  These values are used as indexes for the bitmaps
+ * array of struct qfq_queue.
+ */
+enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
+
+struct qfq_group;
+
+struct qfq_class {
+	struct Qdisc_class_common common;
+
+	unsigned int refcnt;
+	unsigned int filter_cnt;
+
+	struct gnet_stats_basic_packed bstats;
+	struct gnet_stats_queue qstats;
+	struct gnet_stats_rate_est rate_est;
+	struct Qdisc *qdisc;
+
+	struct hlist_node next;	/* Link for the slot list. */
+	u64 S, F;		/* flow timestamps (exact) */
+
+	/* group we belong to. In principle we would need the index,
+	 * which is log_2(lmax/weight), but we never reference it
+	 * directly, only the group.
+	 */
+	struct qfq_group *grp;
+
+	/* these are copied from the flowset. */
+	u32	inv_w;		/* ONE_FP/weight */
+	u32	lmax;		/* Max packet size for this flow. */
+};
+
+struct qfq_group {
+	u64 S, F;			/* group timestamps (approx). */
+	unsigned int slot_shift;	/* Slot shift. */
+	unsigned int index;		/* Group index. */
+	unsigned int front;		/* Index of the front slot. */
+	unsigned long full_slots;	/* non-empty slots */
+
+	/* Array of RR lists of active classes. */
+	struct hlist_head slots[QFQ_MAX_SLOTS];
+};
+
+struct qfq_sched {
+	struct tcf_proto *filter_list;
+	struct Qdisc_class_hash clhash;
+
+	u64		V;		/* Precise virtual time. */
+	u32		wsum;		/* weight sum */
+
+	unsigned long bitmaps[QFQ_MAX_STATE];	    /* Group bitmaps. */
+	struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
+};
+
+static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct Qdisc_class_common *clc;
+
+	clc = qdisc_class_find(&q->clhash, classid);
+	if (clc == NULL)
+		return NULL;
+	return container_of(clc, struct qfq_class, common);
+}
+
+static void qfq_purge_queue(struct qfq_class *cl)
+{
+	unsigned int len = cl->qdisc->q.qlen;
+
+	qdisc_reset(cl->qdisc);
+	qdisc_tree_decrease_qlen(cl->qdisc, len);
+}
+
+static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
+	[TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
+	[TCA_QFQ_LMAX] = { .type = NLA_U32 },
+};
+
+/*
+ * Calculate a flow index, given its weight and maximum packet length.
+ * index = log_2(maxlen/weight) but we need to apply the scaling.
+ * This is used only once at flow creation.
+ */
+static int qfq_calc_index(u32 inv_w, unsigned int maxlen)
+{
+	u64 slot_size = (u64)maxlen * inv_w;
+	unsigned long size_map;
+	int index = 0;
+
+	size_map = slot_size >> QFQ_MIN_SLOT_SHIFT;
+	if (!size_map)
+		goto out;
+
+	index = __fls(size_map) + 1;	/* basically a log_2 */
+	index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
+
+	if (index < 0)
+		index = 0;
+out:
+	pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n",
+		 (unsigned long) ONE_FP/inv_w, maxlen, index);
+
+	return index;
+}
+
+static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+			    struct nlattr **tca, unsigned long *arg)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_class *cl = (struct qfq_class *)*arg;
+	struct nlattr *tb[TCA_QFQ_MAX + 1];
+	u32 weight, lmax, inv_w;
+	int i, err;
+
+	if (tca[TCA_OPTIONS] == NULL) {
+		pr_notice("qfq: no options\n");
+		return -EINVAL;
+	}
+
+	err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_QFQ_WEIGHT]) {
+		weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
+		if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
+			pr_notice("qfq: invalid weight %u\n", weight);
+			return -EINVAL;
+		}
+	} else
+		weight = 1;
+
+	inv_w = ONE_FP / weight;
+	weight = ONE_FP / inv_w;
+	if (q->wsum + weight > QFQ_MAX_WSUM) {
+		pr_notice("qfq: total weight out of range (%u + %u)\n",
+			  weight, q->wsum);
+		return -EINVAL;
+	}
+
+	if (tb[TCA_QFQ_LMAX]) {
+		lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
+		if (!lmax || lmax > (1UL << QFQ_MTU_SHIFT)) {
+			pr_notice("qfq: invalid max length %u\n", lmax);
+			return -EINVAL;
+		}
+	} else
+		lmax = 1UL << QFQ_MTU_SHIFT;
+
+	if (cl != NULL) {
+		if (tca[TCA_RATE]) {
+			err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+						    qdisc_root_sleeping_lock(sch),
+						    tca[TCA_RATE]);
+			if (err)
+				return err;
+		}
+
+		sch_tree_lock(sch);
+		if (tb[TCA_QFQ_WEIGHT]) {
+			q->wsum = weight - ONE_FP / cl->inv_w;
+			cl->inv_w = inv_w;
+		}
+		sch_tree_unlock(sch);
+
+		return 0;
+	}
+
+	cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
+	if (cl == NULL)
+		return -ENOBUFS;
+
+	cl->refcnt = 1;
+	cl->common.classid = classid;
+	cl->lmax = lmax;
+	cl->inv_w = inv_w;
+	i = qfq_calc_index(cl->inv_w, cl->lmax);
+
+	cl->grp = &q->groups[i];
+	q->wsum += weight;
+
+	cl->qdisc = qdisc_create_dflt(sch->dev_queue,
+				      &pfifo_qdisc_ops, classid);
+	if (cl->qdisc == NULL)
+		cl->qdisc = &noop_qdisc;
+
+	if (tca[TCA_RATE]) {
+		err = gen_new_estimator(&cl->bstats, &cl->rate_est,
+					qdisc_root_sleeping_lock(sch),
+					tca[TCA_RATE]);
+		if (err) {
+			qdisc_destroy(cl->qdisc);
+			kfree(cl);
+			return err;
+		}
+	}
+
+	sch_tree_lock(sch);
+	qdisc_class_hash_insert(&q->clhash, &cl->common);
+	sch_tree_unlock(sch);
+
+	qdisc_class_hash_grow(sch, &q->clhash);
+
+	*arg = (unsigned long)cl;
+	return 0;
+}
+
+static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+
+	if (cl->inv_w) {
+		q->wsum -= ONE_FP / cl->inv_w;
+		cl->inv_w = 0;
+	}
+
+	gen_kill_estimator(&cl->bstats, &cl->rate_est);
+	qdisc_destroy(cl->qdisc);
+	kfree(cl);
+}
+
+static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_class *cl = (struct qfq_class *)arg;
+
+	if (cl->filter_cnt > 0)
+		return -EBUSY;
+
+	sch_tree_lock(sch);
+
+	qfq_purge_queue(cl);
+	qdisc_class_hash_remove(&q->clhash, &cl->common);
+
+	BUG_ON(--cl->refcnt == 0);
+	/*
+	 * This shouldn't happen: we "hold" one cops->get() when called
+	 * from tc_ctl_tclass; the destroy method is done from cops->put().
+	 */
+
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid)
+{
+	struct qfq_class *cl = qfq_find_class(sch, classid);
+
+	if (cl != NULL)
+		cl->refcnt++;
+
+	return (unsigned long)cl;
+}
+
+static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
+{
+	struct qfq_class *cl = (struct qfq_class *)arg;
+
+	if (--cl->refcnt == 0)
+		qfq_destroy_class(sch, cl);
+}
+
+static struct tcf_proto **qfq_tcf_chain(struct Qdisc *sch, unsigned long cl)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+
+	return &q->filter_list;
+}
+
+static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
+				  u32 classid)
+{
+	struct qfq_class *cl = qfq_find_class(sch, classid);
+
+	if (cl != NULL)
+		cl->filter_cnt++;
+
+	return (unsigned long)cl;
+}
+
+static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
+{
+	struct qfq_class *cl = (struct qfq_class *)arg;
+
+	cl->filter_cnt--;
+}
+
+static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
+			   struct Qdisc *new, struct Qdisc **old)
+{
+	struct qfq_class *cl = (struct qfq_class *)arg;
+
+	if (new == NULL) {
+		new = qdisc_create_dflt(sch->dev_queue,
+					&pfifo_qdisc_ops, cl->common.classid);
+		if (new == NULL)
+			new = &noop_qdisc;
+	}
+
+	sch_tree_lock(sch);
+	qfq_purge_queue(cl);
+	*old = cl->qdisc;
+	cl->qdisc = new;
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct qfq_class *cl = (struct qfq_class *)arg;
+
+	return cl->qdisc;
+}
+
+static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
+			  struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct qfq_class *cl = (struct qfq_class *)arg;
+	struct nlattr *nest;
+
+	tcm->tcm_parent	= TC_H_ROOT;
+	tcm->tcm_handle	= cl->common.classid;
+	tcm->tcm_info	= cl->qdisc->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+	NLA_PUT_U32(skb, TCA_QFQ_WEIGHT, ONE_FP/cl->inv_w);
+	NLA_PUT_U32(skb, TCA_QFQ_LMAX, cl->lmax);
+	return nla_nest_end(skb, nest);
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
+static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+				struct gnet_dump *d)
+{
+	struct qfq_class *cl = (struct qfq_class *)arg;
+	struct tc_qfq_stats xstats;
+
+	memset(&xstats, 0, sizeof(xstats));
+	cl->qdisc->qstats.qlen = cl->qdisc->q.qlen;
+
+	xstats.weight = ONE_FP/cl->inv_w;
+	xstats.lmax = cl->lmax;
+
+	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
+	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
+	    gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
+		return -1;
+
+	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
+}
+
+static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_class *cl;
+	struct hlist_node *n;
+	unsigned int i;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+			if (arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
+				arg->stop = 1;
+				return;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
+				      int *qerr)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_class *cl;
+	struct tcf_result res;
+	int result;
+
+	if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
+		pr_debug("qfq_classify: found %d\n", skb->priority);
+		cl = qfq_find_class(sch, skb->priority);
+		if (cl != NULL)
+			return cl;
+	}
+
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	result = tc_classify(skb, q->filter_list, &res);
+	if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_QUEUED:
+		case TC_ACT_STOLEN:
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+		case TC_ACT_SHOT:
+			return NULL;
+		}
+#endif
+		cl = (struct qfq_class *)res.class;
+		if (cl == NULL)
+			cl = qfq_find_class(sch, res.classid);
+		return cl;
+	}
+
+	return NULL;
+}
+
+/* Generic comparison function, handling wraparound. */
+static inline int qfq_gt(u64 a, u64 b)
+{
+	return (s64)(a - b) > 0;
+}
+
+/* Round a precise timestamp to its slotted value. */
+static inline u64 qfq_round_down(u64 ts, unsigned int shift)
+{
+	return ts & ~((1ULL << shift) - 1);
+}
+
+/* return the pointer to the group with lowest index in the bitmap */
+static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
+					unsigned long bitmap)
+{
+	int index = __ffs(bitmap);
+	return &q->groups[index];
+}
+/* Calculate a mask to mimic what would be ffs_from(). */
+static inline unsigned long mask_from(unsigned long bitmap, int from)
+{
+	return bitmap & ~((1UL << from) - 1);
+}
+
+/*
+ * The state computation relies on ER=0, IR=1, EB=2, IB=3
+ * First compute eligibility comparing grp->S, q->V,
+ * then check if someone is blocking us and possibly add EB
+ */
+static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
+{
+	/* if S > V we are not eligible */
+	unsigned int state = qfq_gt(grp->S, q->V);
+	unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
+	struct qfq_group *next;
+
+	if (mask) {
+		next = qfq_ffs(q, mask);
+		if (qfq_gt(grp->F, next->F))
+			state |= EB;
+	}
+
+	return state;
+}
+
+
+/*
+ * In principle
+ *	q->bitmaps[dst] |= q->bitmaps[src] & mask;
+ *	q->bitmaps[src] &= ~mask;
+ * but we should make sure that src != dst
+ */
+static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
+				   int src, int dst)
+{
+	q->bitmaps[dst] |= q->bitmaps[src] & mask;
+	q->bitmaps[src] &= ~mask;
+}
+
+static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
+{
+	unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
+	struct qfq_group *next;
+
+	if (mask) {
+		next = qfq_ffs(q, mask);
+		if (!qfq_gt(next->F, old_F))
+			return;
+	}
+
+	mask = (1UL << index) - 1;
+	qfq_move_groups(q, mask, EB, ER);
+	qfq_move_groups(q, mask, IB, IR);
+}
+
+/*
+ * perhaps
+ *
+	old_V ^= q->V;
+	old_V >>= QFQ_MIN_SLOT_SHIFT;
+	if (old_V) {
+		...
+	}
+ *
+ */
+static void qfq_make_eligible(struct qfq_sched *q, u64 old_V)
+{
+	unsigned long vslot = q->V >> QFQ_MIN_SLOT_SHIFT;
+	unsigned long old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
+
+	if (vslot != old_vslot) {
+		unsigned long mask = (1UL << fls(vslot ^ old_vslot)) - 1;
+		qfq_move_groups(q, mask, IR, ER);
+		qfq_move_groups(q, mask, IB, EB);
+	}
+}
+
+
+/*
+ * XXX we should make sure that slot becomes less than 32.
+ * This is guaranteed by the input values.
+ * roundedS is always cl->S rounded on grp->slot_shift bits.
+ */
+static void qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl,
+			    u64 roundedS)
+{
+	u64 slot = (roundedS - grp->S) >> grp->slot_shift;
+	unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
+
+	hlist_add_head(&cl->next, &grp->slots[i]);
+	__set_bit(slot, &grp->full_slots);
+}
+
+/* Maybe introduce hlist_first_entry?? */
+static struct qfq_class *qfq_slot_head(struct qfq_group *grp)
+{
+	return hlist_entry(grp->slots[grp->front].first,
+			   struct qfq_class, next);
+}
+
+/*
+ * remove the entry from the slot
+ */
+static void qfq_front_slot_remove(struct qfq_group *grp)
+{
+	struct qfq_class *cl = qfq_slot_head(grp);
+
+	BUG_ON(!cl);
+	hlist_del(&cl->next);
+	if (hlist_empty(&grp->slots[grp->front]))
+		__clear_bit(0, &grp->full_slots);
+}
+
+/*
+ * Returns the first full queue in a group. As a side effect,
+ * adjust the bucket list so the first non-empty bucket is at
+ * position 0 in full_slots.
+ */
+static struct qfq_class *qfq_slot_scan(struct qfq_group *grp)
+{
+	unsigned int i;
+
+	pr_debug("qfq slot_scan: grp %u full %#lx\n",
+		 grp->index, grp->full_slots);
+
+	if (grp->full_slots == 0)
+		return NULL;
+
+	i = __ffs(grp->full_slots);  /* zero based */
+	if (i > 0) {
+		grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
+		grp->full_slots >>= i;
+	}
+
+	return qfq_slot_head(grp);
+}
+
+/*
+ * adjust the bucket list. When the start time of a group decreases,
+ * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to
+ * move the objects. The mask of occupied slots must be shifted
+ * because we use ffs() to find the first non-empty slot.
+ * This covers decreases in the group's start time, but what about
+ * increases of the start time ?
+ * Here too we should make sure that i is less than 32
+ */
+static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
+{
+	unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
+
+	grp->full_slots <<= i;
+	grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
+}
+
+static void qfq_update_eligible(struct qfq_sched *q, u64 old_V)
+{
+	struct qfq_group *grp;
+	unsigned long ineligible;
+
+	ineligible = q->bitmaps[IR] | q->bitmaps[IB];
+	if (ineligible) {
+		if (!q->bitmaps[ER]) {
+			grp = qfq_ffs(q, ineligible);
+			if (qfq_gt(grp->S, q->V))
+				q->V = grp->S;
+		}
+		qfq_make_eligible(q, old_V);
+	}
+}
+
+/* What is length of next packet in queue (0 if queue is empty) */
+static unsigned int qdisc_peek_len(struct Qdisc *sch)
+{
+	struct sk_buff *skb;
+
+	skb = sch->ops->peek(sch);
+	return skb ? qdisc_pkt_len(skb) : 0;
+}
+
+/*
+ * Updates the class, returns true if also the group needs to be updated.
+ */
+static bool qfq_update_class(struct qfq_group *grp, struct qfq_class *cl)
+{
+	unsigned int len = qdisc_peek_len(cl->qdisc);
+
+	cl->S = cl->F;
+	if (!len)
+		qfq_front_slot_remove(grp);	/* queue is empty */
+	else {
+		u64 roundedS;
+
+		cl->F = cl->S + (u64)len * cl->inv_w;
+		roundedS = qfq_round_down(cl->S, grp->slot_shift);
+		if (roundedS == grp->S)
+			return false;
+
+		qfq_front_slot_remove(grp);
+		qfq_slot_insert(grp, cl, roundedS);
+	}
+
+	return true;
+}
+
+static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_group *grp;
+	struct qfq_class *cl;
+	struct sk_buff *skb;
+	unsigned int len;
+	u64 old_V;
+
+	if (!q->bitmaps[ER])
+		return NULL;
+
+	grp = qfq_ffs(q, q->bitmaps[ER]);
+
+	cl = qfq_slot_head(grp);
+	skb = qdisc_dequeue_peeked(cl->qdisc);
+	if (!skb) {
+		WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
+		return NULL;
+	}
+
+	sch->q.qlen--;
+	qdisc_bstats_update(sch, skb);
+
+	old_V = q->V;
+	len = qdisc_pkt_len(skb);
+	q->V += (u64)len * IWSUM;
+	pr_debug("qfq dequeue: len %u F %lld now %lld\n",
+		 len, (unsigned long long) cl->F, (unsigned long long) q->V);
+
+	if (qfq_update_class(grp, cl)) {
+		u64 old_F = grp->F;
+
+		cl = qfq_slot_scan(grp);
+		if (!cl)
+			__clear_bit(grp->index, &q->bitmaps[ER]);
+		else {
+			u64 roundedS = qfq_round_down(cl->S, grp->slot_shift);
+			unsigned int s;
+
+			if (grp->S == roundedS)
+				goto skip_unblock;
+			grp->S = roundedS;
+			grp->F = roundedS + (2ULL << grp->slot_shift);
+			__clear_bit(grp->index, &q->bitmaps[ER]);
+			s = qfq_calc_state(q, grp);
+			__set_bit(grp->index, &q->bitmaps[s]);
+		}
+
+		qfq_unblock_groups(q, grp->index, old_F);
+	}
+
+skip_unblock:
+	qfq_update_eligible(q, old_V);
+
+	return skb;
+}
+
+/*
+ * Assign a reasonable start time for a new flow k in group i.
+ * Admissible values for \hat(F) are multiples of \sigma_i
+ * no greater than V+\sigma_i . Larger values mean that
+ * we had a wraparound so we consider the timestamp to be stale.
+ *
+ * If F is not stale and F >= V then we set S = F.
+ * Otherwise we should assign S = V, but this may violate
+ * the ordering in ER. So, if we have groups in ER, set S to
+ * the F_j of the first group j which would be blocking us.
+ * We are guaranteed not to move S backward because
+ * otherwise our group i would still be blocked.
+ */
+static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
+{
+	unsigned long mask;
+	uint32_t limit, roundedF;
+	int slot_shift = cl->grp->slot_shift;
+
+	roundedF = qfq_round_down(cl->F, slot_shift);
+	limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift);
+
+	if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) {
+		/* timestamp was stale */
+		mask = mask_from(q->bitmaps[ER], cl->grp->index);
+		if (mask) {
+			struct qfq_group *next = qfq_ffs(q, mask);
+			if (qfq_gt(roundedF, next->F)) {
+				cl->S = next->F;
+				return;
+			}
+		}
+		cl->S = q->V;
+	} else  /* timestamp is not stale */
+		cl->S = cl->F;
+}
+
+static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_group *grp;
+	struct qfq_class *cl;
+	int err;
+	u64 roundedS;
+	int s;
+
+	cl = qfq_classify(skb, sch, &err);
+	if (cl == NULL) {
+		if (err & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return err;
+	}
+	pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
+
+	err = qdisc_enqueue(skb, cl->qdisc);
+	if (unlikely(err != NET_XMIT_SUCCESS)) {
+		pr_debug("qfq_enqueue: enqueue failed %d\n", err);
+		if (net_xmit_drop_count(err)) {
+			cl->qstats.drops++;
+			sch->qstats.drops++;
+		}
+		return err;
+	}
+
+	bstats_update(&cl->bstats, skb);
+	++sch->q.qlen;
+
+	/* If the new skb is not the head of queue, then done here. */
+	if (cl->qdisc->q.qlen != 1)
+		return err;
+
+	/* If reach this point, queue q was idle */
+	grp = cl->grp;
+	qfq_update_start(q, cl);
+
+	/* compute new finish time and rounded start. */
+	cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w;
+	roundedS = qfq_round_down(cl->S, grp->slot_shift);
+
+	/*
+	 * insert cl in the correct bucket.
+	 * If cl->S >= grp->S we don't need to adjust the
+	 * bucket list and simply go to the insertion phase.
+	 * Otherwise grp->S is decreasing, we must make room
+	 * in the bucket list, and also recompute the group state.
+	 * Finally, if there were no flows in this group and nobody
+	 * was in ER make sure to adjust V.
+	 */
+	if (grp->full_slots) {
+		if (!qfq_gt(grp->S, cl->S))
+			goto skip_update;
+
+		/* create a slot for this cl->S */
+		qfq_slot_rotate(grp, roundedS);
+		/* group was surely ineligible, remove */
+		__clear_bit(grp->index, &q->bitmaps[IR]);
+		__clear_bit(grp->index, &q->bitmaps[IB]);
+	} else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V))
+		q->V = roundedS;
+
+	grp->S = roundedS;
+	grp->F = roundedS + (2ULL << grp->slot_shift);
+	s = qfq_calc_state(q, grp);
+	__set_bit(grp->index, &q->bitmaps[s]);
+
+	pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n",
+		 s, q->bitmaps[s],
+		 (unsigned long long) cl->S,
+		 (unsigned long long) cl->F,
+		 (unsigned long long) q->V);
+
+skip_update:
+	qfq_slot_insert(grp, cl, roundedS);
+
+	return err;
+}
+
+
+static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
+			    struct qfq_class *cl)
+{
+	unsigned int i, offset;
+	u64 roundedS;
+
+	roundedS = qfq_round_down(cl->S, grp->slot_shift);
+	offset = (roundedS - grp->S) >> grp->slot_shift;
+	i = (grp->front + offset) % QFQ_MAX_SLOTS;
+
+	hlist_del(&cl->next);
+	if (hlist_empty(&grp->slots[i]))
+		__clear_bit(offset, &grp->full_slots);
+}
+
+/*
+ * called to forcibly destroy a queue.
+ * If the queue is not in the front bucket, or if it has
+ * other queues in the front bucket, we can simply remove
+ * the queue with no other side effects.
+ * Otherwise we must propagate the event up.
+ */
+static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
+{
+	struct qfq_group *grp = cl->grp;
+	unsigned long mask;
+	u64 roundedS;
+	int s;
+
+	cl->F = cl->S;
+	qfq_slot_remove(q, grp, cl);
+
+	if (!grp->full_slots) {
+		__clear_bit(grp->index, &q->bitmaps[IR]);
+		__clear_bit(grp->index, &q->bitmaps[EB]);
+		__clear_bit(grp->index, &q->bitmaps[IB]);
+
+		if (test_bit(grp->index, &q->bitmaps[ER]) &&
+		    !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
+			mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
+			if (mask)
+				mask = ~((1UL << __fls(mask)) - 1);
+			else
+				mask = ~0UL;
+			qfq_move_groups(q, mask, EB, ER);
+			qfq_move_groups(q, mask, IB, IR);
+		}
+		__clear_bit(grp->index, &q->bitmaps[ER]);
+	} else if (hlist_empty(&grp->slots[grp->front])) {
+		cl = qfq_slot_scan(grp);
+		roundedS = qfq_round_down(cl->S, grp->slot_shift);
+		if (grp->S != roundedS) {
+			__clear_bit(grp->index, &q->bitmaps[ER]);
+			__clear_bit(grp->index, &q->bitmaps[IR]);
+			__clear_bit(grp->index, &q->bitmaps[EB]);
+			__clear_bit(grp->index, &q->bitmaps[IB]);
+			grp->S = roundedS;
+			grp->F = roundedS + (2ULL << grp->slot_shift);
+			s = qfq_calc_state(q, grp);
+			__set_bit(grp->index, &q->bitmaps[s]);
+		}
+	}
+
+	qfq_update_eligible(q, q->V);
+}
+
+static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_class *cl = (struct qfq_class *)arg;
+
+	if (cl->qdisc->q.qlen == 0)
+		qfq_deactivate_class(q, cl);
+}
+
+static unsigned int qfq_drop(struct Qdisc *sch)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_group *grp;
+	unsigned int i, j, len;
+
+	for (i = 0; i <= QFQ_MAX_INDEX; i++) {
+		grp = &q->groups[i];
+		for (j = 0; j < QFQ_MAX_SLOTS; j++) {
+			struct qfq_class *cl;
+			struct hlist_node *n;
+
+			hlist_for_each_entry(cl, n, &grp->slots[j], next) {
+
+				if (!cl->qdisc->ops->drop)
+					continue;
+
+				len = cl->qdisc->ops->drop(cl->qdisc);
+				if (len > 0) {
+					sch->q.qlen--;
+					if (!cl->qdisc->q.qlen)
+						qfq_deactivate_class(q, cl);
+
+					return len;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_group *grp;
+	int i, j, err;
+
+	err = qdisc_class_hash_init(&q->clhash);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i <= QFQ_MAX_INDEX; i++) {
+		grp = &q->groups[i];
+		grp->index = i;
+		grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS
+				   - (QFQ_MAX_INDEX - i);
+		for (j = 0; j < QFQ_MAX_SLOTS; j++)
+			INIT_HLIST_HEAD(&grp->slots[j]);
+	}
+
+	return 0;
+}
+
+static void qfq_reset_qdisc(struct Qdisc *sch)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_group *grp;
+	struct qfq_class *cl;
+	struct hlist_node *n, *tmp;
+	unsigned int i, j;
+
+	for (i = 0; i <= QFQ_MAX_INDEX; i++) {
+		grp = &q->groups[i];
+		for (j = 0; j < QFQ_MAX_SLOTS; j++) {
+			hlist_for_each_entry_safe(cl, n, tmp,
+						  &grp->slots[j], next) {
+				qfq_deactivate_class(q, cl);
+			}
+		}
+	}
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
+			qdisc_reset(cl->qdisc);
+	}
+	sch->q.qlen = 0;
+}
+
+static void qfq_destroy_qdisc(struct Qdisc *sch)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_class *cl;
+	struct hlist_node *n, *next;
+	unsigned int i;
+
+	tcf_destroy_chain(&q->filter_list);
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+					  common.hnode) {
+			qfq_destroy_class(sch, cl);
+		}
+	}
+	qdisc_class_hash_destroy(&q->clhash);
+}
+
+static const struct Qdisc_class_ops qfq_class_ops = {
+	.change		= qfq_change_class,
+	.delete		= qfq_delete_class,
+	.get		= qfq_get_class,
+	.put		= qfq_put_class,
+	.tcf_chain	= qfq_tcf_chain,
+	.bind_tcf	= qfq_bind_tcf,
+	.unbind_tcf	= qfq_unbind_tcf,
+	.graft		= qfq_graft_class,
+	.leaf		= qfq_class_leaf,
+	.qlen_notify	= qfq_qlen_notify,
+	.dump		= qfq_dump_class,
+	.dump_stats	= qfq_dump_class_stats,
+	.walk		= qfq_walk,
+};
+
+static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
+	.cl_ops		= &qfq_class_ops,
+	.id		= "qfq",
+	.priv_size	= sizeof(struct qfq_sched),
+	.enqueue	= qfq_enqueue,
+	.dequeue	= qfq_dequeue,
+	.peek		= qdisc_peek_dequeued,
+	.drop		= qfq_drop,
+	.init		= qfq_init_qdisc,
+	.reset		= qfq_reset_qdisc,
+	.destroy	= qfq_destroy_qdisc,
+	.owner		= THIS_MODULE,
+};
+
+static int __init qfq_init(void)
+{
+	return register_qdisc(&qfq_qdisc_ops);
+}
+
+static void __exit qfq_exit(void)
+{
+	unregister_qdisc(&qfq_qdisc_ops);
+}
+
+module_init(qfq_init);
+module_exit(qfq_exit);
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-71-gd317


From 4dd365fd55991b4e54a1d1c255081e6370b9da29 Mon Sep 17 00:00:00 2001
From: Bing Zhao <bzhao@marvell.com>
Date: Wed, 30 Mar 2011 18:01:15 -0700
Subject: ieee80211: add HT extended capabilities masks

IEEE Std 802.11n, Oct. 29, 2009:
7.3.2.56.5 HT Extended Capabilities field

Signed-off-by: Bing Zhao <bzhao@marvell.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 2d1c6117d92c..79690b710665 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -884,6 +884,15 @@ struct ieee80211_ht_cap {
 #define IEEE80211_HT_CAP_40MHZ_INTOLERANT	0x4000
 #define IEEE80211_HT_CAP_LSIG_TXOP_PROT		0x8000
 
+/* 802.11n HT extended capabilities masks (for extended_ht_cap_info) */
+#define IEEE80211_HT_EXT_CAP_PCO		0x0001
+#define IEEE80211_HT_EXT_CAP_PCO_TIME		0x0006
+#define		IEEE80211_HT_EXT_CAP_PCO_TIME_SHIFT	1
+#define IEEE80211_HT_EXT_CAP_MCS_FB		0x0300
+#define		IEEE80211_HT_EXT_CAP_MCS_FB_SHIFT	8
+#define IEEE80211_HT_EXT_CAP_HTC_SUP		0x0400
+#define IEEE80211_HT_EXT_CAP_RD_RESPONDER	0x0800
+
 /* 802.11n HT capability AMPDU settings (for ampdu_params_info) */
 #define IEEE80211_HT_AMPDU_PARM_FACTOR		0x03
 #define IEEE80211_HT_AMPDU_PARM_DENSITY		0x1C
-- 
cgit v1.2.3-71-gd317


From ce57d9e694d98e421e329fbac5d6f5dc5e9e101e Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Fri, 1 Apr 2011 12:06:48 +0200
Subject: ssb: trivial: use u8 for chip_rev (it's mask is 0xF)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/scan.c      | 2 +-
 include/linux/ssb/ssb.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ssb/scan.c b/drivers/ssb/scan.c
index 29884c00c4d5..7dca719fbcfb 100644
--- a/drivers/ssb/scan.c
+++ b/drivers/ssb/scan.c
@@ -307,7 +307,7 @@ int ssb_bus_scan(struct ssb_bus *bus,
 	} else {
 		if (bus->bustype == SSB_BUSTYPE_PCI) {
 			bus->chip_id = pcidev_to_chipid(bus->host_pci);
-			pci_read_config_word(bus->host_pci, PCI_REVISION_ID,
+			pci_read_config_byte(bus->host_pci, PCI_REVISION_ID,
 					     &bus->chip_rev);
 			bus->chip_package = 0;
 		} else {
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 9659eff52ca2..7e99b348834c 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -308,7 +308,7 @@ struct ssb_bus {
 
 	/* ID information about the Chip. */
 	u16 chip_id;
-	u16 chip_rev;
+	u8 chip_rev;
 	u16 sprom_offset;
 	u16 sprom_size;		/* number of words in sprom */
 	u8 chip_package;
-- 
cgit v1.2.3-71-gd317


From c6e1a0d12ca7b4f22c58e55a16beacfb7d3d8462 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 4 Apr 2011 22:30:30 -0700
Subject: net: Allow no-cache copy from user on transmit

This patch uses __copy_from_user_nocache on transmit to bypass data
cache for a performance improvement.  skb_add_data_nocache and
skb_copy_to_page_nocache can be called by sendmsg functions to use
this feature, initial support is in tcp_sendmsg.  This functionality is
configurable per device using ethtool.

Presumably, this feature would only be useful when the driver does
not touch the data.  The feature is turned on by default if a device
indicates that it does some form of checksum offload; it is off by
default for devices that do no checksum offload or indicate no checksum
is necessary.  For the former case copy-checksum is probably done
anyway, in the latter case the device is likely loopback in which case
the no cache copy is probably not beneficial.

This patch was tested using 200 instances of netperf TCP_RR with
1400 byte request and one byte reply.  Platform is 16 core AMD x86.

No-cache copy disabled:
   672703 tps, 97.13% utilization
   50/90/99% latency:244.31 484.205 1028.41

No-cache copy enabled:
   702113 tps, 96.16% utilization,
   50/90/99% latency 238.56 467.56 956.955

Using 14000 byte request and response sizes demonstrate the
effects more dramatically:

No-cache copy disabled:
   79571 tps, 34.34 %utlization
   50/90/95% latency 1584.46 2319.59 5001.76

No-cache copy enabled:
   83856 tps, 34.81% utilization
   50/90/95% latency 2508.42 2622.62 2735.88

Note especially the effect on latency tail (95th percentile).

This seems to provide a nice performance improvement and is
consistent in the tests I ran.  Presumably, this would provide
the greatest benfits in the presence of an application workload
stressing the cache and a lot of transmit data happening.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  2 +-
 include/linux/netdevice.h       |  3 ++-
 include/net/sock.h              | 53 +++++++++++++++++++++++++++++++++++++++++
 net/core/dev.c                  | 12 ++++++++++
 net/core/ethtool.c              |  2 +-
 net/ipv4/tcp.c                  |  7 +++---
 6 files changed, 73 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 16d6fe954695..b51e021354b5 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1407,7 +1407,7 @@ static int bond_compute_features(struct bonding *bond)
 	int i;
 
 	features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES);
-	features |=  NETIF_F_GSO_MASK | NETIF_F_NO_CSUM;
+	features |=  NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_NOCACHE_COPY;
 
 	if (!bond->first_slave)
 		goto done;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a4664cc68e2b..09d262415769 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1066,6 +1066,7 @@ struct net_device {
 #define NETIF_F_NTUPLE		(1 << 27) /* N-tuple filters supported */
 #define NETIF_F_RXHASH		(1 << 28) /* Receive hashing offload */
 #define NETIF_F_RXCSUM		(1 << 29) /* Receive checksumming offload */
+#define NETIF_F_NOCACHE_COPY	(1 << 30) /* Use no-cache copyfromuser */
 
 	/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT	16
@@ -1081,7 +1082,7 @@ struct net_device {
 	/* = all defined minus driver/device-class-related */
 #define NETIF_F_NEVER_CHANGE	(NETIF_F_HIGHDMA | NETIF_F_VLAN_CHALLENGED | \
 				  NETIF_F_LLTX | NETIF_F_NETNS_LOCAL)
-#define NETIF_F_ETHTOOL_BITS	(0x3f3fffff & ~NETIF_F_NEVER_CHANGE)
+#define NETIF_F_ETHTOOL_BITS	(0x7f3fffff & ~NETIF_F_NEVER_CHANGE)
 
 	/* List of features with software fallbacks. */
 #define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | \
diff --git a/include/net/sock.h b/include/net/sock.h
index da0534d3401c..43bd515e92fd 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -52,6 +52,7 @@
 #include <linux/mm.h>
 #include <linux/security.h>
 #include <linux/slab.h>
+#include <linux/uaccess.h>
 
 #include <linux/filter.h>
 #include <linux/rculist_nulls.h>
@@ -1389,6 +1390,58 @@ static inline void sk_nocaps_add(struct sock *sk, int flags)
 	sk->sk_route_caps &= ~flags;
 }
 
+static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
+					   char __user *from, char *to,
+					   int copy)
+{
+	if (skb->ip_summed == CHECKSUM_NONE) {
+		int err = 0;
+		__wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err);
+		if (err)
+			return err;
+		skb->csum = csum_block_add(skb->csum, csum, skb->len);
+	} else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
+		if (!access_ok(VERIFY_READ, from, copy) ||
+		    __copy_from_user_nocache(to, from, copy))
+			return -EFAULT;
+	} else if (copy_from_user(to, from, copy))
+		return -EFAULT;
+
+	return 0;
+}
+
+static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
+				       char __user *from, int copy)
+{
+	int err;
+
+	err = skb_do_copy_data_nocache(sk, skb, from, skb_put(skb, copy), copy);
+	if (err)
+		__skb_trim(skb, skb->len);
+
+	return err;
+}
+
+static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from,
+					   struct sk_buff *skb,
+					   struct page *page,
+					   int off, int copy)
+{
+	int err;
+
+	err = skb_do_copy_data_nocache(sk, skb, from,
+				       page_address(page) + off, copy);
+	if (err)
+		return err;
+
+	skb->len	     += copy;
+	skb->data_len	     += copy;
+	skb->truesize	     += copy;
+	sk->sk_wmem_queued   += copy;
+	sk_mem_charge(sk, copy);
+	return 0;
+}
+
 static inline int skb_copy_to_page(struct sock *sk, char __user *from,
 				   struct sk_buff *skb, struct page *page,
 				   int off, int copy)
diff --git a/net/core/dev.c b/net/core/dev.c
index 02f56376fe99..5d0b4f6f1a72 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5425,6 +5425,14 @@ int register_netdevice(struct net_device *dev)
 		dev->features &= ~NETIF_F_GSO;
 	}
 
+	/* Turn on no cache copy if HW is doing checksum */
+	dev->hw_features |= NETIF_F_NOCACHE_COPY;
+	if ((dev->features & NETIF_F_ALL_CSUM) &&
+	    !(dev->features & NETIF_F_NO_CSUM)) {
+		dev->wanted_features |= NETIF_F_NOCACHE_COPY;
+		dev->features |= NETIF_F_NOCACHE_COPY;
+	}
+
 	/* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
 	 * vlan_dev_init() will do the dev->features check, so these features
 	 * are enabled only if supported by underlying device.
@@ -6182,6 +6190,10 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask)
 		}
 	}
 
+	/* If device can't no cache copy, don't do for all */
+	if (!(one & NETIF_F_NOCACHE_COPY))
+		all &= ~NETIF_F_NOCACHE_COPY;
+
 	one |= NETIF_F_ALL_CSUM;
 
 	one |= all & NETIF_F_ONE_FOR_ALL;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 439e4b0e1312..719670ae199c 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -359,7 +359,7 @@ static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GS
 	/* NETIF_F_NTUPLE */          "rx-ntuple-filter",
 	/* NETIF_F_RXHASH */          "rx-hashing",
 	/* NETIF_F_RXCSUM */          "rx-checksum",
-	"",
+	/* NETIF_F_NOCACHE_COPY */    "tx-nocache-copy"
 	"",
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b22d45010545..054a59d21eb0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -999,7 +999,8 @@ new_segment:
 				/* We have some space in skb head. Superb! */
 				if (copy > skb_tailroom(skb))
 					copy = skb_tailroom(skb);
-				if ((err = skb_add_data(skb, from, copy)) != 0)
+				err = skb_add_data_nocache(sk, skb, from, copy);
+				if (err)
 					goto do_fault;
 			} else {
 				int merge = 0;
@@ -1042,8 +1043,8 @@ new_segment:
 
 				/* Time to copy data. We are close to
 				 * the end! */
-				err = skb_copy_to_page(sk, from, skb, page,
-						       off, copy);
+				err = skb_copy_to_page_nocache(sk, from, skb,
+							       page, off, copy);
 				if (err) {
 					/* If this page was new, give it to the
 					 * socket so it does not get leaked.
-- 
cgit v1.2.3-71-gd317


From e20b5b61a36bd5b80eea064c0f2e73285dbe0d3b Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 1 Apr 2011 22:52:34 +0100
Subject: ethtool: Convert struct ethtool_ops comment to kernel-doc format

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 include/linux/ethtool.h | 80 ++++++++++++++++++++++---------------------------
 1 file changed, 35 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index c8fcbdd2b0e7..ab12f84c17bd 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -683,63 +683,53 @@ void ethtool_ntuple_flush(struct net_device *dev);
 bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
 
 /**
- * &ethtool_ops - Alter and report network device settings
- * get_settings: Get device-specific settings
- * set_settings: Set device-specific settings
- * get_drvinfo: Report driver information
- * get_regs: Get device registers
- * get_wol: Report whether Wake-on-Lan is enabled
- * set_wol: Turn Wake-on-Lan on or off
- * get_msglevel: Report driver message level
- * set_msglevel: Set driver message level
- * nway_reset: Restart autonegotiation
- * get_link: Get link status
- * get_eeprom: Read data from the device EEPROM
- * set_eeprom: Write data to the device EEPROM
- * get_coalesce: Get interrupt coalescing parameters
- * set_coalesce: Set interrupt coalescing parameters
- * get_ringparam: Report ring sizes
- * set_ringparam: Set ring sizes
- * get_pauseparam: Report pause parameters
- * set_pauseparam: Set pause parameters
- * get_rx_csum: Report whether receive checksums are turned on or off
- * set_rx_csum: Turn receive checksum on or off
- * get_tx_csum: Report whether transmit checksums are turned on or off
- * set_tx_csum: Turn transmit checksums on or off
- * get_sg: Report whether scatter-gather is enabled
- * set_sg: Turn scatter-gather on or off
- * get_tso: Report whether TCP segmentation offload is enabled
- * set_tso: Turn TCP segmentation offload on or off
- * get_ufo: Report whether UDP fragmentation offload is enabled
- * set_ufo: Turn UDP fragmentation offload on or off
- * self_test: Run specified self-tests
- * get_strings: Return a set of strings that describe the requested objects
- * phys_id: Identify the device
- * get_stats: Return statistics about the device
- * get_flags: get 32-bit flags bitmap
- * set_flags: set 32-bit flags bitmap
- *
- * Description:
- *
- * get_settings:
+ * struct ethtool_ops - Alter and report network device settings
+ * @get_settings: Get device-specific settings.
  *	@get_settings is passed an &ethtool_cmd to fill in.  It returns
  *	an negative errno or zero.
- *
- * set_settings:
+ * @set_settings: Set device-specific settings.
  *	@set_settings is passed an &ethtool_cmd and should attempt to set
  *	all the settings this device supports.  It may return an error value
  *	if something goes wrong (otherwise 0).
- *
- * get_eeprom:
+ * @get_drvinfo: Report driver information
+ * @get_regs: Get device registers
+ * @get_wol: Report whether Wake-on-Lan is enabled
+ * @set_wol: Turn Wake-on-Lan on or off
+ * @get_msglevel: Report driver message level
+ * @set_msglevel: Set driver message level
+ * @nway_reset: Restart autonegotiation
+ * @get_link: Get link status
+ * @get_eeprom: Read data from the device EEPROM.
  *	Should fill in the magic field.  Don't need to check len for zero
  *	or wraparound.  Fill in the data argument with the eeprom values
  *	from offset to offset + len.  Update len to the amount read.
  *	Returns an error or zero.
- *
- * set_eeprom:
+ * @set_eeprom: Write data to the device EEPROM.
  *	Should validate the magic field.  Don't need to check len for zero
  *	or wraparound.  Update len to the amount written.  Returns an error
  *	or zero.
+ * @get_coalesce: Get interrupt coalescing parameters
+ * @set_coalesce: Set interrupt coalescing parameters
+ * @get_ringparam: Report ring sizes
+ * @set_ringparam: Set ring sizes
+ * @get_pauseparam: Report pause parameters
+ * @set_pauseparam: Set pause parameters
+ * @get_rx_csum: Report whether receive checksums are turned on or off
+ * @set_rx_csum: Turn receive checksum on or off
+ * @get_tx_csum: Report whether transmit checksums are turned on or off
+ * @set_tx_csum: Turn transmit checksums on or off
+ * @get_sg: Report whether scatter-gather is enabled
+ * @set_sg: Turn scatter-gather on or off
+ * @get_tso: Report whether TCP segmentation offload is enabled
+ * @set_tso: Turn TCP segmentation offload on or off
+ * @get_ufo: Report whether UDP fragmentation offload is enabled
+ * @set_ufo: Turn UDP fragmentation offload on or off
+ * @self_test: Run specified self-tests
+ * @get_strings: Return a set of strings that describe the requested objects
+ * @phys_id: Identify the device
+ * @get_stats: Return statistics about the device
+ * @get_flags: get 32-bit flags bitmap
+ * @set_flags: set 32-bit flags bitmap
  */
 struct ethtool_ops {
 	int	(*get_settings)(struct net_device *, struct ethtool_cmd *);
-- 
cgit v1.2.3-71-gd317


From 8717d07b1143e0f150921f5bd7cfe7af579a995a Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 1 Apr 2011 23:57:41 +0100
Subject: ethtool: Fill out and update comment for struct ethtool_ops

Briefly document all operations (except get_rx_ntuple), including
whether they may return an error code and whether they are deprecated.
Also mention some things that should be handled by the ethtool core
rather than by drivers.

Briefly document general requirements for callers.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 include/linux/ethtool.h | 124 ++++++++++++++++++++++++++++++++++++------------
 1 file changed, 93 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index ab12f84c17bd..ead7dcb1bf1e 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -683,22 +683,28 @@ void ethtool_ntuple_flush(struct net_device *dev);
 bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
 
 /**
- * struct ethtool_ops - Alter and report network device settings
- * @get_settings: Get device-specific settings.
- *	@get_settings is passed an &ethtool_cmd to fill in.  It returns
- *	an negative errno or zero.
- * @set_settings: Set device-specific settings.
- *	@set_settings is passed an &ethtool_cmd and should attempt to set
- *	all the settings this device supports.  It may return an error value
- *	if something goes wrong (otherwise 0).
- * @get_drvinfo: Report driver information
+ * struct ethtool_ops - optional netdev operations
+ * @get_settings: Get various device settings including Ethernet link
+ *	settings.  Returns a negative error code or zero.
+ * @set_settings: Set various device settings including Ethernet link
+ *	settings.  Returns a negative error code or zero.
+ * @get_drvinfo: Report driver/device information.  Should only set the
+ *	@driver, @version, @fw_version and @bus_info fields.  If not
+ *	implemented, the @driver and @bus_info fields will be filled in
+ *	according to the netdev's parent device.
+ * @get_regs_len: Get buffer length required for @get_regs
  * @get_regs: Get device registers
  * @get_wol: Report whether Wake-on-Lan is enabled
- * @set_wol: Turn Wake-on-Lan on or off
- * @get_msglevel: Report driver message level
+ * @set_wol: Turn Wake-on-Lan on or off.  Returns a negative error code
+ *	or zero.
+ * @get_msglevel: Report driver message level.  This should be the value
+ *	of the @msg_enable field used by netif logging functions.
  * @set_msglevel: Set driver message level
- * @nway_reset: Restart autonegotiation
- * @get_link: Get link status
+ * @nway_reset: Restart autonegotiation.  Returns a negative error code
+ *	or zero.
+ * @get_link: Report whether physical link is up.  Will only be called if
+ *	the netdev is up.  Should usually be set to ethtool_op_get_link(),
+ *	which uses netif_carrier_ok().
  * @get_eeprom: Read data from the device EEPROM.
  *	Should fill in the magic field.  Don't need to check len for zero
  *	or wraparound.  Fill in the data argument with the eeprom values
@@ -708,28 +714,84 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  *	Should validate the magic field.  Don't need to check len for zero
  *	or wraparound.  Update len to the amount written.  Returns an error
  *	or zero.
- * @get_coalesce: Get interrupt coalescing parameters
- * @set_coalesce: Set interrupt coalescing parameters
+ * @get_coalesce: Get interrupt coalescing parameters.  Returns a negative
+ *	error code or zero.
+ * @set_coalesce: Set interrupt coalescing parameters.  Returns a negative
+ *	error code or zero.
  * @get_ringparam: Report ring sizes
- * @set_ringparam: Set ring sizes
+ * @set_ringparam: Set ring sizes.  Returns a negative error code or zero.
  * @get_pauseparam: Report pause parameters
- * @set_pauseparam: Set pause parameters
- * @get_rx_csum: Report whether receive checksums are turned on or off
- * @set_rx_csum: Turn receive checksum on or off
- * @get_tx_csum: Report whether transmit checksums are turned on or off
- * @set_tx_csum: Turn transmit checksums on or off
- * @get_sg: Report whether scatter-gather is enabled
- * @set_sg: Turn scatter-gather on or off
- * @get_tso: Report whether TCP segmentation offload is enabled
- * @set_tso: Turn TCP segmentation offload on or off
- * @get_ufo: Report whether UDP fragmentation offload is enabled
- * @set_ufo: Turn UDP fragmentation offload on or off
+ * @set_pauseparam: Set pause parameters.  Returns a negative error code
+ *	or zero.
+ * @get_rx_csum: Deprecated in favour of the netdev feature %NETIF_F_RXCSUM.
+ *	Report whether receive checksums are turned on or off.
+ * @set_rx_csum: Deprecated in favour of generic netdev features.  Turn
+ *	receive checksum on or off.  Returns a negative error code or zero.
+ * @get_tx_csum: Deprecated as redundant. Report whether transmit checksums
+ *	are turned on or off.
+ * @set_tx_csum: Deprecated in favour of generic netdev features.  Turn
+ *	transmit checksums on or off.  Returns a egative error code or zero.
+ * @get_sg: Deprecated as redundant.  Report whether scatter-gather is
+ *	enabled.  
+ * @set_sg: Deprecated in favour of generic netdev features.  Turn
+ *	scatter-gather on or off. Returns a negative error code or zero.
+ * @get_tso: Deprecated as redundant.  Report whether TCP segmentation
+ *	offload is enabled.
+ * @set_tso: Deprecated in favour of generic netdev features.  Turn TCP
+ *	segmentation offload on or off.  Returns a negative error code or zero.
  * @self_test: Run specified self-tests
  * @get_strings: Return a set of strings that describe the requested objects
- * @phys_id: Identify the device
- * @get_stats: Return statistics about the device
- * @get_flags: get 32-bit flags bitmap
- * @set_flags: set 32-bit flags bitmap
+ * @phys_id: Identify the physical device, e.g. by flashing an LED
+ *	attached to it until interrupted by a signal or the given time
+ *	(in seconds) elapses.  If the given time is zero, use a default
+ *	time limit.  Returns a negative error code or zero.  Being
+ *	interrupted by a signal is not an error.
+ * @get_ethtool_stats: Return extended statistics about the device.
+ *	This is only useful if the device maintains statistics not
+ *	included in &struct rtnl_link_stats64.
+ * @begin: Function to be called before any other operation.  Returns a
+ *	negative error code or zero.
+ * @complete: Function to be called after any other operation except
+ *	@begin.  Will be called even if the other operation failed.
+ * @get_ufo: Deprecated as redundant.  Report whether UDP fragmentation
+ *	offload is enabled.
+ * @set_ufo: Deprecated in favour of generic netdev features.  Turn UDP
+ *	fragmentation offload on or off.  Returns a negative error code or zero.
+ * @get_flags: Deprecated as redundant.  Report features included in
+ *	&enum ethtool_flags that are enabled.  
+ * @set_flags: Deprecated in favour of generic netdev features.  Turn
+ *	features included in &enum ethtool_flags on or off.  Returns a
+ *	negative error code or zero.
+ * @get_priv_flags: Report driver-specific feature flags.
+ * @set_priv_flags: Set driver-specific feature flags.  Returns a negative
+ *	error code or zero.
+ * @get_sset_count: Get number of strings that @get_strings will write.
+ * @get_rxnfc: Get RX flow classification rules.  Returns a negative
+ *	error code or zero.
+ * @set_rxnfc: Set RX flow classification rules.  Returns a negative
+ *	error code or zero.
+ * @flash_device: Write a firmware image to device's flash memory.
+ *	Returns a negative error code or zero.
+ * @reset: Reset (part of) the device, as specified by a bitmask of
+ *	flags from &enum ethtool_reset_flags.  Returns a negative
+ *	error code or zero.
+ * @set_rx_ntuple: Set an RX n-tuple rule.  Returns a negative error code
+ *	or zero.
+ * @get_rx_ntuple: Deprecated.
+ * @get_rxfh_indir: Get the contents of the RX flow hash indirection table.
+ *	Returns a negative error code or zero.
+ * @set_rxfh_indir: Set the contents of the RX flow hash indirection table.
+ *	Returns a negative error code or zero.
+ *
+ * All operations are optional (i.e. the function pointer may be set
+ * to %NULL) and callers must take this into account.  Callers must
+ * hold the RTNL, except that for @get_drvinfo the caller may or may
+ * not hold the RTNL.
+ *
+ * See the structures used by these operations for further documentation.
+ *
+ * See &struct net_device and &struct net_device_ops for documentation
+ * of the generic netdev features interface.
  */
 struct ethtool_ops {
 	int	(*get_settings)(struct net_device *, struct ethtool_cmd *);
-- 
cgit v1.2.3-71-gd317


From 68f512f21a64c9b264df6c61a9333e7890faf74b Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Sat, 2 Apr 2011 00:35:15 +0100
Subject: ethtool: Change ETHTOOL_PHYS_ID implementation to allow dropping RTNL

The ethtool ETHTOOL_PHYS_ID command runs for an arbitrarily long
period of time, holding the RTNL lock.  This blocks routing updates,
device enumeration, and various important operations that one might
want to keep running while hunting for the flashing LED.

We need to drop the RTNL lock during this operation, but currently the
core implementation is a thin wrapper around a driver operation and
drivers may well depend upon holding the lock.

Define a new driver operation 'set_phys_id' with an argument that sets
the ID indicator on/off/inactive/active (the last optional, for any
driver or firmware that prefers to handle blinking asynchronously).
When this is defined, the ethtool core drops the lock while waiting
and only acquires it around calls to this operation.

Deprecate the 'phys_id' operation in favour of this.  It can be
removed once all in-tree drivers are converted.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 include/linux/ethtool.h | 30 ++++++++++++++++++++++++++-
 net/core/ethtool.c      | 55 +++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 82 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index ead7dcb1bf1e..c04d1316d221 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -663,6 +663,22 @@ struct ethtool_rx_ntuple_list {
 	unsigned int		count;
 };
 
+/**
+ * enum ethtool_phys_id_state - indicator state for physical identification
+ * @ETHTOOL_ID_INACTIVE: Physical ID indicator should be deactivated
+ * @ETHTOOL_ID_ACTIVE: Physical ID indicator should be activated
+ * @ETHTOOL_ID_ON: LED should be turned on (used iff %ETHTOOL_ID_ACTIVE
+ *	is not supported)
+ * @ETHTOOL_ID_OFF: LED should be turned off (used iff %ETHTOOL_ID_ACTIVE
+ *	is not supported)
+ */
+enum ethtool_phys_id_state {
+	ETHTOOL_ID_INACTIVE,
+	ETHTOOL_ID_ACTIVE,
+	ETHTOOL_ID_ON,
+	ETHTOOL_ID_OFF
+};
+
 struct net_device;
 
 /* Some generic methods drivers may use in their ethtool_ops */
@@ -741,7 +757,18 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  *	segmentation offload on or off.  Returns a negative error code or zero.
  * @self_test: Run specified self-tests
  * @get_strings: Return a set of strings that describe the requested objects
- * @phys_id: Identify the physical device, e.g. by flashing an LED
+ * @set_phys_id: Identify the physical devices, e.g. by flashing an LED
+ *	attached to it.  The implementation may update the indicator
+ *	asynchronously or synchronously, but in either case it must return
+ *	quickly.  It is initially called with the argument %ETHTOOL_ID_ACTIVE,
+ *	and must either activate asynchronous updates or return -%EINVAL.
+ *	If it returns -%EINVAL then it will be called again at intervals with
+ *	argument %ETHTOOL_ID_ON or %ETHTOOL_ID_OFF and should set the state of
+ *	the indicator accordingly.  Finally, it is called with the argument
+ *	%ETHTOOL_ID_INACTIVE and must deactivate the indicator.  Returns a
+ *	negative error code or zero.
+ * @phys_id: Deprecated in favour of @set_phys_id.
+ *	Identify the physical device, e.g. by flashing an LED
  *	attached to it until interrupted by a signal or the given time
  *	(in seconds) elapses.  If the given time is zero, use a default
  *	time limit.  Returns a negative error code or zero.  Being
@@ -830,6 +857,7 @@ struct ethtool_ops {
 	int	(*set_tso)(struct net_device *, u32);
 	void	(*self_test)(struct net_device *, struct ethtool_test *, u64 *);
 	void	(*get_strings)(struct net_device *, u32 stringset, u8 *);
+	int	(*set_phys_id)(struct net_device *, enum ethtool_phys_id_state);
 	int	(*phys_id)(struct net_device *, u32);
 	void	(*get_ethtool_stats)(struct net_device *,
 				     struct ethtool_stats *, u64 *);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 74ead9eca126..1c95f0fb8b3a 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -21,6 +21,8 @@
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/rtnetlink.h>
+#include <linux/sched.h>
 
 /*
  * Some useful ethtool_ops methods that're device independent.
@@ -1618,14 +1620,63 @@ out:
 static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_value id;
+	static bool busy;
+	int rc;
 
-	if (!dev->ethtool_ops->phys_id)
+	if (!dev->ethtool_ops->set_phys_id && !dev->ethtool_ops->phys_id)
 		return -EOPNOTSUPP;
 
+	if (busy)
+		return -EBUSY;
+
 	if (copy_from_user(&id, useraddr, sizeof(id)))
 		return -EFAULT;
 
-	return dev->ethtool_ops->phys_id(dev, id.data);
+	if (!dev->ethtool_ops->set_phys_id)
+		/* Do it the old way */
+		return dev->ethtool_ops->phys_id(dev, id.data);
+
+	rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE);
+	if (rc && rc != -EINVAL)
+		return rc;
+
+	/* Drop the RTNL lock while waiting, but prevent reentry or
+	 * removal of the device.
+	 */
+	busy = true;
+	dev_hold(dev);
+	rtnl_unlock();
+
+	if (rc == 0) {
+		/* Driver will handle this itself */
+		schedule_timeout_interruptible(
+			id.data ? id.data : MAX_SCHEDULE_TIMEOUT);
+	} else {
+		/* Driver expects to be called periodically */
+		do {
+			rtnl_lock();
+			rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ON);
+			rtnl_unlock();
+			if (rc)
+				break;
+			schedule_timeout_interruptible(HZ / 2);
+
+			rtnl_lock();
+			rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_OFF);
+			rtnl_unlock();
+			if (rc)
+				break;
+			schedule_timeout_interruptible(HZ / 2);
+		} while (!signal_pending(current) &&
+			 (id.data == 0 || --id.data != 0));
+	}
+
+	rtnl_lock();
+	dev_put(dev);
+	busy = false;
+
+	(void)dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE);
+	return rc;
 }
 
 static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
-- 
cgit v1.2.3-71-gd317


From f4263c9857e6411ef2388868cc6c79a1602a654e Mon Sep 17 00:00:00 2001
From: Paul Stewart <pstew@chromium.org>
Date: Thu, 31 Mar 2011 09:25:41 -0700
Subject: nl80211: Add BSS parameters to station

This allows user-space monitoring of BSS parameters for the associated
station.  This is useful for debugging and verifying that the paramaters
are as expected.

[Exactly the same as before but bundled into a single message]

Signed-off-by: Paul Stewart <pstew@chromium.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 35 ++++++++++++++++++++++++++++++++++-
 include/net/cfg80211.h  | 34 ++++++++++++++++++++++++++++++++++
 net/mac80211/cfg.c      | 13 ++++++++++++-
 net/wireless/nl80211.c  | 21 ++++++++++++++++++++-
 4 files changed, 100 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 30022189104d..16eea7229e99 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1221,6 +1221,36 @@ enum nl80211_rate_info {
 	NL80211_RATE_INFO_MAX = __NL80211_RATE_INFO_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_sta_bss_param - BSS information collected by STA
+ *
+ * These attribute types are used with %NL80211_STA_INFO_BSS_PARAM
+ * when getting information about the bitrate of a station.
+ *
+ * @__NL80211_STA_BSS_PARAM_INVALID: attribute number 0 is reserved
+ * @NL80211_STA_BSS_PARAM_CTS_PROT: whether CTS protection is enabled (flag)
+ * @NL80211_STA_BSS_PARAM_SHORT_PREAMBLE:  whether short preamble is enabled
+ *	(flag)
+ * @NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME:  whether short slot time is enabled
+ *	(flag)
+ * @NL80211_STA_BSS_PARAM_DTIM_PERIOD: DTIM period for beaconing (u8)
+ * @NL80211_STA_BSS_PARAM_BEACON_INTERVAL: Beacon interval (u16)
+ * @NL80211_STA_BSS_PARAM_MAX: highest sta_bss_param number currently defined
+ * @__NL80211_STA_BSS_PARAM_AFTER_LAST: internal use
+ */
+enum nl80211_sta_bss_param {
+	__NL80211_STA_BSS_PARAM_INVALID,
+	NL80211_STA_BSS_PARAM_CTS_PROT,
+	NL80211_STA_BSS_PARAM_SHORT_PREAMBLE,
+	NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME,
+	NL80211_STA_BSS_PARAM_DTIM_PERIOD,
+	NL80211_STA_BSS_PARAM_BEACON_INTERVAL,
+
+	/* keep last */
+	__NL80211_STA_BSS_PARAM_AFTER_LAST,
+	NL80211_STA_BSS_PARAM_MAX = __NL80211_STA_BSS_PARAM_AFTER_LAST - 1
+};
+
 /**
  * enum nl80211_sta_info - station information
  *
@@ -1233,7 +1263,7 @@ enum nl80211_rate_info {
  * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (u32, to this station)
  * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm)
  * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute
- * 	containing info as possible, see &enum nl80211_sta_info_txrate.
+ * 	containing info as possible, see &enum nl80211_rate_info
  * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station)
  * @NL80211_STA_INFO_TX_PACKETS: total transmitted packets (u32, to this
  *	station)
@@ -1245,6 +1275,8 @@ enum nl80211_rate_info {
  * @NL80211_STA_INFO_PLINK_STATE: peer link state for the station
  * @NL80211_STA_INFO_RX_BITRATE: last unicast data frame rx rate, nested
  *	attribute, like NL80211_STA_INFO_TX_BITRATE.
+ * @NL80211_STA_INFO_BSS_PARAM: current station's view of BSS, nested attribute
+ *     containing info as possible, see &enum nl80211_sta_bss_param
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -1264,6 +1296,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_TX_FAILED,
 	NL80211_STA_INFO_SIGNAL_AVG,
 	NL80211_STA_INFO_RX_BITRATE,
+	NL80211_STA_INFO_BSS_PARAM,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 2c4530451721..ba7384acf4e0 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -422,6 +422,7 @@ struct station_parameters {
  * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled
  * @STATION_INFO_SIGNAL_AVG: @signal_avg filled
  * @STATION_INFO_RX_BITRATE: @rxrate fields are filled
+ * @STATION_INFO_BSS_PARAM: @bss_param filled
  */
 enum station_info_flags {
 	STATION_INFO_INACTIVE_TIME	= 1<<0,
@@ -439,6 +440,7 @@ enum station_info_flags {
 	STATION_INFO_RX_DROP_MISC	= 1<<12,
 	STATION_INFO_SIGNAL_AVG		= 1<<13,
 	STATION_INFO_RX_BITRATE		= 1<<14,
+	STATION_INFO_BSS_PARAM          = 1<<15,
 };
 
 /**
@@ -472,6 +474,37 @@ struct rate_info {
 	u16 legacy;
 };
 
+/**
+ * enum station_info_rate_flags - bitrate info flags
+ *
+ * Used by the driver to indicate the specific rate transmission
+ * type for 802.11n transmissions.
+ *
+ * @BSS_PARAM_FLAGS_CTS_PROT: whether CTS protection is enabled
+ * @BSS_PARAM_FLAGS_SHORT_PREAMBLE: whether short preamble is enabled
+ * @BSS_PARAM_FLAGS_SHORT_SLOT_TIME: whether short slot time is enabled
+ */
+enum bss_param_flags {
+	BSS_PARAM_FLAGS_CTS_PROT	= 1<<0,
+	BSS_PARAM_FLAGS_SHORT_PREAMBLE	= 1<<1,
+	BSS_PARAM_FLAGS_SHORT_SLOT_TIME	= 1<<2,
+};
+
+/**
+ * struct sta_bss_parameters - BSS parameters for the attached station
+ *
+ * Information about the currently associated BSS
+ *
+ * @flags: bitflag of flags from &enum bss_param_flags
+ * @dtim_period: DTIM period for the BSS
+ * @beacon_interval: beacon interval
+ */
+struct sta_bss_parameters {
+	u8 flags;
+	u8 dtim_period;
+	u16 beacon_interval;
+};
+
 /**
  * struct station_info - station information
  *
@@ -515,6 +548,7 @@ struct station_info {
 	u32 tx_retries;
 	u32 tx_failed;
 	u32 rx_dropped_misc;
+	struct sta_bss_parameters bss_param;
 
 	int generation;
 };
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 334213571ad0..bf5d28da46e6 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -342,7 +342,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 			STATION_INFO_TX_FAILED |
 			STATION_INFO_TX_BITRATE |
 			STATION_INFO_RX_BITRATE |
-			STATION_INFO_RX_DROP_MISC;
+			STATION_INFO_RX_DROP_MISC |
+			STATION_INFO_BSS_PARAM;
 
 	sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
 	sinfo->rx_bytes = sta->rx_bytes;
@@ -389,6 +390,16 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		sinfo->plink_state = sta->plink_state;
 #endif
 	}
+
+	sinfo->bss_param.flags = 0;
+	if (sdata->vif.bss_conf.use_cts_prot)
+		sinfo->bss_param.flags |= BSS_PARAM_FLAGS_CTS_PROT;
+	if (sdata->vif.bss_conf.use_short_preamble)
+		sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE;
+	if (sdata->vif.bss_conf.use_short_slot)
+		sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME;
+	sinfo->bss_param.dtim_period = sdata->local->hw.conf.ps_dtim_period;
+	sinfo->bss_param.beacon_interval = sdata->vif.bss_conf.beacon_int;
 }
 
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 40c90fb461c4..297d7ce4117b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2002,7 +2002,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 				const u8 *mac_addr, struct station_info *sinfo)
 {
 	void *hdr;
-	struct nlattr *sinfoattr;
+	struct nlattr *sinfoattr, *bss_param;
 
 	hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION);
 	if (!hdr)
@@ -2062,6 +2062,25 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	if (sinfo->filled & STATION_INFO_TX_FAILED)
 		NLA_PUT_U32(msg, NL80211_STA_INFO_TX_FAILED,
 			    sinfo->tx_failed);
+	if (sinfo->filled & STATION_INFO_BSS_PARAM) {
+		bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM);
+		if (!bss_param)
+			goto nla_put_failure;
+
+		if (sinfo->bss_param.flags & BSS_PARAM_FLAGS_CTS_PROT)
+			NLA_PUT_FLAG(msg, NL80211_STA_BSS_PARAM_CTS_PROT);
+		if (sinfo->bss_param.flags & BSS_PARAM_FLAGS_SHORT_PREAMBLE)
+			NLA_PUT_FLAG(msg, NL80211_STA_BSS_PARAM_SHORT_PREAMBLE);
+		if (sinfo->bss_param.flags & BSS_PARAM_FLAGS_SHORT_SLOT_TIME)
+			NLA_PUT_FLAG(msg,
+				     NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME);
+		NLA_PUT_U8(msg, NL80211_STA_BSS_PARAM_DTIM_PERIOD,
+			   sinfo->bss_param.dtim_period);
+		NLA_PUT_U16(msg, NL80211_STA_BSS_PARAM_BEACON_INTERVAL,
+			    sinfo->bss_param.beacon_interval);
+
+		nla_nest_end(msg, bss_param);
+	}
 	nla_nest_end(msg, sinfoattr);
 
 	return genlmsg_end(msg, hdr);
-- 
cgit v1.2.3-71-gd317


From 0e028465d18b7c6797fcbdea632299d16097c5cd Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 6 Mar 2011 18:02:54 +0100
Subject: exec: unify do_execve/compat_do_execve code

Add the appropriate members into struct user_arg_ptr and teach
get_user_arg_ptr() to handle is_compat = T case correctly.

This allows us to remove the compat_do_execve() code from fs/compat.c
and reimplement compat_do_execve() as the trivial wrapper on top of
do_execve_common(is_compat => true).

In fact, this fixes another (minor) bug. "compat_uptr_t str" can
overflow after "str += len" in compat_copy_strings() if a 64bit
application execs via sys32_execve().

Unexport acct_arg_size() and get_arg_page(), fs/compat.c doesn't
need them any longer.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---
 fs/compat.c             | 235 ------------------------------------------------
 fs/exec.c               |  62 ++++++++++---
 include/linux/binfmts.h |   4 -
 3 files changed, 50 insertions(+), 251 deletions(-)

(limited to 'include/linux')

diff --git a/fs/compat.c b/fs/compat.c
index 72fe6cda9108..0ea00832de23 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1306,241 +1306,6 @@ compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int
 	return do_sys_open(dfd, filename, flags, mode);
 }
 
-/*
- * compat_count() counts the number of arguments/envelopes. It is basically
- * a copy of count() from fs/exec.c, except that it works with 32 bit argv
- * and envp pointers.
- */
-static int compat_count(compat_uptr_t __user *argv, int max)
-{
-	int i = 0;
-
-	if (argv != NULL) {
-		for (;;) {
-			compat_uptr_t p;
-
-			if (get_user(p, argv))
-				return -EFAULT;
-			if (!p)
-				break;
-			argv++;
-			if (i++ >= max)
-				return -E2BIG;
-
-			if (fatal_signal_pending(current))
-				return -ERESTARTNOHAND;
-			cond_resched();
-		}
-	}
-	return i;
-}
-
-/*
- * compat_copy_strings() is basically a copy of copy_strings() from fs/exec.c
- * except that it works with 32 bit argv and envp pointers.
- */
-static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
-				struct linux_binprm *bprm)
-{
-	struct page *kmapped_page = NULL;
-	char *kaddr = NULL;
-	unsigned long kpos = 0;
-	int ret;
-
-	while (argc-- > 0) {
-		compat_uptr_t str;
-		int len;
-		unsigned long pos;
-
-		if (get_user(str, argv+argc) ||
-		    !(len = strnlen_user(compat_ptr(str), MAX_ARG_STRLEN))) {
-			ret = -EFAULT;
-			goto out;
-		}
-
-		if (len > MAX_ARG_STRLEN) {
-			ret = -E2BIG;
-			goto out;
-		}
-
-		/* We're going to work our way backwords. */
-		pos = bprm->p;
-		str += len;
-		bprm->p -= len;
-
-		while (len > 0) {
-			int offset, bytes_to_copy;
-
-			if (fatal_signal_pending(current)) {
-				ret = -ERESTARTNOHAND;
-				goto out;
-			}
-			cond_resched();
-
-			offset = pos % PAGE_SIZE;
-			if (offset == 0)
-				offset = PAGE_SIZE;
-
-			bytes_to_copy = offset;
-			if (bytes_to_copy > len)
-				bytes_to_copy = len;
-
-			offset -= bytes_to_copy;
-			pos -= bytes_to_copy;
-			str -= bytes_to_copy;
-			len -= bytes_to_copy;
-
-			if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
-				struct page *page;
-
-				page = get_arg_page(bprm, pos, 1);
-				if (!page) {
-					ret = -E2BIG;
-					goto out;
-				}
-
-				if (kmapped_page) {
-					flush_kernel_dcache_page(kmapped_page);
-					kunmap(kmapped_page);
-					put_page(kmapped_page);
-				}
-				kmapped_page = page;
-				kaddr = kmap(kmapped_page);
-				kpos = pos & PAGE_MASK;
-				flush_cache_page(bprm->vma, kpos,
-						 page_to_pfn(kmapped_page));
-			}
-			if (copy_from_user(kaddr+offset, compat_ptr(str),
-						bytes_to_copy)) {
-				ret = -EFAULT;
-				goto out;
-			}
-		}
-	}
-	ret = 0;
-out:
-	if (kmapped_page) {
-		flush_kernel_dcache_page(kmapped_page);
-		kunmap(kmapped_page);
-		put_page(kmapped_page);
-	}
-	return ret;
-}
-
-/*
- * compat_do_execve() is mostly a copy of do_execve(), with the exception
- * that it processes 32 bit argv and envp pointers.
- */
-int compat_do_execve(char * filename,
-	compat_uptr_t __user *argv,
-	compat_uptr_t __user *envp,
-	struct pt_regs * regs)
-{
-	struct linux_binprm *bprm;
-	struct file *file;
-	struct files_struct *displaced;
-	bool clear_in_exec;
-	int retval;
-
-	retval = unshare_files(&displaced);
-	if (retval)
-		goto out_ret;
-
-	retval = -ENOMEM;
-	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
-	if (!bprm)
-		goto out_files;
-
-	retval = prepare_bprm_creds(bprm);
-	if (retval)
-		goto out_free;
-
-	retval = check_unsafe_exec(bprm);
-	if (retval < 0)
-		goto out_free;
-	clear_in_exec = retval;
-	current->in_execve = 1;
-
-	file = open_exec(filename);
-	retval = PTR_ERR(file);
-	if (IS_ERR(file))
-		goto out_unmark;
-
-	sched_exec();
-
-	bprm->file = file;
-	bprm->filename = filename;
-	bprm->interp = filename;
-
-	retval = bprm_mm_init(bprm);
-	if (retval)
-		goto out_file;
-
-	bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
-	if ((retval = bprm->argc) < 0)
-		goto out;
-
-	bprm->envc = compat_count(envp, MAX_ARG_STRINGS);
-	if ((retval = bprm->envc) < 0)
-		goto out;
-
-	retval = prepare_binprm(bprm);
-	if (retval < 0)
-		goto out;
-
-	retval = copy_strings_kernel(1, &bprm->filename, bprm);
-	if (retval < 0)
-		goto out;
-
-	bprm->exec = bprm->p;
-	retval = compat_copy_strings(bprm->envc, envp, bprm);
-	if (retval < 0)
-		goto out;
-
-	retval = compat_copy_strings(bprm->argc, argv, bprm);
-	if (retval < 0)
-		goto out;
-
-	retval = search_binary_handler(bprm, regs);
-	if (retval < 0)
-		goto out;
-
-	/* execve succeeded */
-	current->fs->in_exec = 0;
-	current->in_execve = 0;
-	acct_update_integrals(current);
-	free_bprm(bprm);
-	if (displaced)
-		put_files_struct(displaced);
-	return retval;
-
-out:
-	if (bprm->mm) {
-		acct_arg_size(bprm, 0);
-		mmput(bprm->mm);
-	}
-
-out_file:
-	if (bprm->file) {
-		allow_write_access(bprm->file);
-		fput(bprm->file);
-	}
-
-out_unmark:
-	if (clear_in_exec)
-		current->fs->in_exec = 0;
-	current->in_execve = 0;
-
-out_free:
-	free_bprm(bprm);
-
-out_files:
-	if (displaced)
-		reset_files_struct(displaced);
-out_ret:
-	return retval;
-}
-
 #define __COMPAT_NFDBITS       (8 * sizeof(compat_ulong_t))
 
 static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
diff --git a/fs/exec.c b/fs/exec.c
index 526a0399d963..89d788ca7829 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
 #include <linux/fs_struct.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/oom.h>
+#include <linux/compat.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -167,7 +168,7 @@ out:
 
 #ifdef CONFIG_MMU
 
-void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
 {
 	struct mm_struct *mm = current->mm;
 	long diff = (long)(pages - bprm->vma_pages);
@@ -186,7 +187,7 @@ void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
 #endif
 }
 
-struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		int write)
 {
 	struct page *page;
@@ -305,11 +306,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
 
 #else
 
-void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
 {
 }
 
-struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		int write)
 {
 	struct page *page;
@@ -399,17 +400,36 @@ err:
 }
 
 struct user_arg_ptr {
-	const char __user *const __user *native;
+#ifdef CONFIG_COMPAT
+	bool is_compat;
+#endif
+	union {
+		const char __user *const __user *native;
+#ifdef CONFIG_COMPAT
+		compat_uptr_t __user *compat;
+#endif
+	} ptr;
 };
 
 static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
 {
-	const char __user *ptr;
+	const char __user *native;
+
+#ifdef CONFIG_COMPAT
+	if (unlikely(argv.is_compat)) {
+		compat_uptr_t compat;
+
+		if (get_user(compat, argv.ptr.compat + nr))
+			return ERR_PTR(-EFAULT);
 
-	if (get_user(ptr, argv.native + nr))
+		return compat_ptr(compat);
+	}
+#endif
+
+	if (get_user(native, argv.ptr.native + nr))
 		return ERR_PTR(-EFAULT);
 
-	return ptr;
+	return native;
 }
 
 /*
@@ -419,7 +439,7 @@ static int count(struct user_arg_ptr argv, int max)
 {
 	int i = 0;
 
-	if (argv.native != NULL) {
+	if (argv.ptr.native != NULL) {
 		for (;;) {
 			const char __user *p = get_user_arg_ptr(argv, i);
 
@@ -542,7 +562,7 @@ int copy_strings_kernel(int argc, const char *const *__argv,
 	int r;
 	mm_segment_t oldfs = get_fs();
 	struct user_arg_ptr argv = {
-		.native = (const char __user *const  __user *)__argv,
+		.ptr.native = (const char __user *const  __user *)__argv,
 	};
 
 	set_fs(KERNEL_DS);
@@ -1516,10 +1536,28 @@ int do_execve(const char *filename,
 	const char __user *const __user *__envp,
 	struct pt_regs *regs)
 {
-	struct user_arg_ptr argv = { .native = __argv };
-	struct user_arg_ptr envp = { .native = __envp };
+	struct user_arg_ptr argv = { .ptr.native = __argv };
+	struct user_arg_ptr envp = { .ptr.native = __envp };
+	return do_execve_common(filename, argv, envp, regs);
+}
+
+#ifdef CONFIG_COMPAT
+int compat_do_execve(char *filename,
+	compat_uptr_t __user *__argv,
+	compat_uptr_t __user *__envp,
+	struct pt_regs *regs)
+{
+	struct user_arg_ptr argv = {
+		.is_compat = true,
+		.ptr.compat = __argv,
+	};
+	struct user_arg_ptr envp = {
+		.is_compat = true,
+		.ptr.compat = __envp,
+	};
 	return do_execve_common(filename, argv, envp, regs);
 }
+#endif
 
 void set_binfmt(struct linux_binfmt *new)
 {
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index c3d6512eded1..8845613fd7e3 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -60,10 +60,6 @@ struct linux_binprm {
 	unsigned long loader, exec;
 };
 
-extern void acct_arg_size(struct linux_binprm *bprm, unsigned long pages);
-extern struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
-					int write);
-
 #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
 #define BINPRM_FLAGS_ENFORCE_NONDUMP (1 << BINPRM_FLAGS_ENFORCE_NONDUMP_BIT)
 
-- 
cgit v1.2.3-71-gd317


From 5cdede2408e80f190c5595e592c24e77c1bf44b2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 4 Apr 2011 15:55:18 +0200
Subject: PCI: Move ATS declarations in seperate header file

This patch moves the relevant declarations from the local
header file in drivers/pci to a more accessible locations so
that it can be used by the AMD IOMMU driver too.
The file is named pci-ats.h because support for the PCI PRI
capability will also be added there in a later patch-set.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/intel-iommu.c |  1 +
 drivers/pci/iov.c         |  1 +
 drivers/pci/pci.h         | 37 ---------------------------------
 include/linux/pci-ats.h   | 52 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 54 insertions(+), 37 deletions(-)
 create mode 100644 include/linux/pci-ats.h

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 7da3bef60d87..fdb2cef2b908 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -39,6 +39,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/tboot.h>
 #include <linux/dmi.h>
+#include <linux/pci-ats.h>
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
 #include "pci.h"
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 553d8ee55c1c..42fae4776515 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -13,6 +13,7 @@
 #include <linux/mutex.h>
 #include <linux/string.h>
 #include <linux/delay.h>
+#include <linux/pci-ats.h>
 #include "pci.h"
 
 #define VIRTFN_ID_LEN	16
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index a6ec200fe5ee..4020025f854e 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -250,15 +250,6 @@ struct pci_sriov {
 	u8 __iomem *mstate;	/* VF Migration State Array */
 };
 
-/* Address Translation Service */
-struct pci_ats {
-	int pos;	/* capability position */
-	int stu;	/* Smallest Translation Unit */
-	int qdep;	/* Invalidate Queue Depth */
-	int ref_cnt;	/* Physical Function reference count */
-	unsigned int is_enabled:1;	/* Enable bit is set */
-};
-
 #ifdef CONFIG_PCI_IOV
 extern int pci_iov_init(struct pci_dev *dev);
 extern void pci_iov_release(struct pci_dev *dev);
@@ -269,19 +260,6 @@ extern resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev,
 extern void pci_restore_iov_state(struct pci_dev *dev);
 extern int pci_iov_bus_range(struct pci_bus *bus);
 
-extern int pci_enable_ats(struct pci_dev *dev, int ps);
-extern void pci_disable_ats(struct pci_dev *dev);
-extern int pci_ats_queue_depth(struct pci_dev *dev);
-/**
- * pci_ats_enabled - query the ATS status
- * @dev: the PCI device
- *
- * Returns 1 if ATS capability is enabled, or 0 if not.
- */
-static inline int pci_ats_enabled(struct pci_dev *dev)
-{
-	return dev->ats && dev->ats->is_enabled;
-}
 #else
 static inline int pci_iov_init(struct pci_dev *dev)
 {
@@ -304,21 +282,6 @@ static inline int pci_iov_bus_range(struct pci_bus *bus)
 	return 0;
 }
 
-static inline int pci_enable_ats(struct pci_dev *dev, int ps)
-{
-	return -ENODEV;
-}
-static inline void pci_disable_ats(struct pci_dev *dev)
-{
-}
-static inline int pci_ats_queue_depth(struct pci_dev *dev)
-{
-	return -ENODEV;
-}
-static inline int pci_ats_enabled(struct pci_dev *dev)
-{
-	return 0;
-}
 #endif /* CONFIG_PCI_IOV */
 
 static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
new file mode 100644
index 000000000000..655824fa4c76
--- /dev/null
+++ b/include/linux/pci-ats.h
@@ -0,0 +1,52 @@
+#ifndef LINUX_PCI_ATS_H
+#define LINUX_PCI_ATS_H
+
+/* Address Translation Service */
+struct pci_ats {
+	int pos;        /* capability position */
+	int stu;        /* Smallest Translation Unit */
+	int qdep;       /* Invalidate Queue Depth */
+	int ref_cnt;    /* Physical Function reference count */
+	unsigned int is_enabled:1;      /* Enable bit is set */
+};
+
+#ifdef CONFIG_PCI_IOV
+
+extern int pci_enable_ats(struct pci_dev *dev, int ps);
+extern void pci_disable_ats(struct pci_dev *dev);
+extern int pci_ats_queue_depth(struct pci_dev *dev);
+/**
+ * pci_ats_enabled - query the ATS status
+ * @dev: the PCI device
+ *
+ * Returns 1 if ATS capability is enabled, or 0 if not.
+ */
+static inline int pci_ats_enabled(struct pci_dev *dev)
+{
+	return dev->ats && dev->ats->is_enabled;
+}
+
+#else /* CONFIG_PCI_IOV */
+
+static inline int pci_enable_ats(struct pci_dev *dev, int ps)
+{
+	return -ENODEV;
+}
+
+static inline void pci_disable_ats(struct pci_dev *dev)
+{
+}
+
+static inline int pci_ats_queue_depth(struct pci_dev *dev)
+{
+	return -ENODEV;
+}
+
+static inline int pci_ats_enabled(struct pci_dev *dev)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PCI_IOV */
+
+#endif /* LINUX_PCI_ATS_H*/
-- 
cgit v1.2.3-71-gd317


From dce840a08702bd13a9a186e07e63d1ef82256b5e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 7 Apr 2011 14:09:50 +0200
Subject: sched: Dynamically allocate sched_domain/sched_group data-structures

Instead of relying on static allocations for the sched_domain and
sched_group trees, dynamically allocate and RCU free them.

Allocating this dynamically also allows for some build_sched_groups()
simplification since we can now (like with other simplifications) rely
on the sched_domain tree instead of hard-coded knowledge.

One tricky to note is that detach_destroy_domains() needs to hold
rcu_read_lock() over the entire tear-down, per-cpu is not sufficient
since that can lead to partial sched_group existance (could possibly
be solved by doing the tear-down backwards but this is much more
robust).

A concequence of the above is that we can no longer print the
sched_domain debug stuff from cpu_attach_domain() since that might now
run with preemption disabled (due to classic RCU etc.) and
sched_domain_debug() does some GFP_KERNEL allocations.

Another thing to note is that we now fully rely on normal RCU and not
RCU-sched, this is because with the new and exiting RCU flavours we
grew over the years BH doesn't necessarily hold off RCU-sched grace
periods (-rt is known to break this). This would in fact already cause
us grief since we do sched_domain/sched_group iterations from softirq
context.

This patch is somewhat larger than I would like it to be, but I didn't
find any means of shrinking/splitting this.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110407122942.245307941@chello.nl
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |   5 +
 kernel/sched.c        | 479 ++++++++++++++++++++------------------------------
 kernel/sched_fair.c   |  30 +++-
 3 files changed, 218 insertions(+), 296 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4ec2c027e92c..020b79d6c486 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -868,6 +868,7 @@ static inline int sd_power_saving_flags(void)
 
 struct sched_group {
 	struct sched_group *next;	/* Must be a circular list */
+	atomic_t ref;
 
 	/*
 	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
@@ -973,6 +974,10 @@ struct sched_domain {
 #ifdef CONFIG_SCHED_DEBUG
 	char *name;
 #endif
+	union {
+		void *private;		/* used during construction */
+		struct rcu_head rcu;	/* used during destruction */
+	};
 
 	unsigned int span_weight;
 	/*
diff --git a/kernel/sched.c b/kernel/sched.c
index 1cca59ec4a49..65204845063e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -417,6 +417,7 @@ struct rt_rq {
  */
 struct root_domain {
 	atomic_t refcount;
+	struct rcu_head rcu;
 	cpumask_var_t span;
 	cpumask_var_t online;
 
@@ -571,7 +572,7 @@ static inline int cpu_of(struct rq *rq)
 
 #define rcu_dereference_check_sched_domain(p) \
 	rcu_dereference_check((p), \
-			      rcu_read_lock_sched_held() || \
+			      rcu_read_lock_held() || \
 			      lockdep_is_held(&sched_domains_mutex))
 
 /*
@@ -6572,12 +6573,11 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 	return 1;
 }
 
-static void free_rootdomain(struct root_domain *rd)
+static void free_rootdomain(struct rcu_head *rcu)
 {
-	synchronize_sched();
+	struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
 
 	cpupri_cleanup(&rd->cpupri);
-
 	free_cpumask_var(rd->rto_mask);
 	free_cpumask_var(rd->online);
 	free_cpumask_var(rd->span);
@@ -6618,7 +6618,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 
 	if (old_rd)
-		free_rootdomain(old_rd);
+		call_rcu_sched(&old_rd->rcu, free_rootdomain);
 }
 
 static int init_rootdomain(struct root_domain *rd)
@@ -6669,6 +6669,25 @@ static struct root_domain *alloc_rootdomain(void)
 	return rd;
 }
 
+static void free_sched_domain(struct rcu_head *rcu)
+{
+	struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
+	if (atomic_dec_and_test(&sd->groups->ref))
+		kfree(sd->groups);
+	kfree(sd);
+}
+
+static void destroy_sched_domain(struct sched_domain *sd, int cpu)
+{
+	call_rcu(&sd->rcu, free_sched_domain);
+}
+
+static void destroy_sched_domains(struct sched_domain *sd, int cpu)
+{
+	for (; sd; sd = sd->parent)
+		destroy_sched_domain(sd, cpu);
+}
+
 /*
  * Attach the domain 'sd' to 'cpu' as its base domain. Callers must
  * hold the hotplug lock.
@@ -6689,20 +6708,25 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
 			tmp->parent = parent->parent;
 			if (parent->parent)
 				parent->parent->child = tmp;
+			destroy_sched_domain(parent, cpu);
 		} else
 			tmp = tmp->parent;
 	}
 
 	if (sd && sd_degenerate(sd)) {
+		tmp = sd;
 		sd = sd->parent;
+		destroy_sched_domain(tmp, cpu);
 		if (sd)
 			sd->child = NULL;
 	}
 
-	sched_domain_debug(sd, cpu);
+	/* sched_domain_debug(sd, cpu); */
 
 	rq_attach_root(rq, rd);
+	tmp = rq->sd;
 	rcu_assign_pointer(rq->sd, sd);
+	destroy_sched_domains(tmp, cpu);
 }
 
 /* cpus with isolated domains */
@@ -6718,56 +6742,6 @@ static int __init isolated_cpu_setup(char *str)
 
 __setup("isolcpus=", isolated_cpu_setup);
 
-/*
- * init_sched_build_groups takes the cpumask we wish to span, and a pointer
- * to a function which identifies what group(along with sched group) a CPU
- * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
- * (due to the fact that we keep track of groups covered with a struct cpumask).
- *
- * init_sched_build_groups will build a circular linked list of the groups
- * covered by the given span, and will set each group's ->cpumask correctly,
- * and ->cpu_power to 0.
- */
-static void
-init_sched_build_groups(const struct cpumask *span,
-			const struct cpumask *cpu_map,
-			int (*group_fn)(int cpu, const struct cpumask *cpu_map,
-					struct sched_group **sg,
-					struct cpumask *tmpmask),
-			struct cpumask *covered, struct cpumask *tmpmask)
-{
-	struct sched_group *first = NULL, *last = NULL;
-	int i;
-
-	cpumask_clear(covered);
-
-	for_each_cpu(i, span) {
-		struct sched_group *sg;
-		int group = group_fn(i, cpu_map, &sg, tmpmask);
-		int j;
-
-		if (cpumask_test_cpu(i, covered))
-			continue;
-
-		cpumask_clear(sched_group_cpus(sg));
-		sg->cpu_power = 0;
-
-		for_each_cpu(j, span) {
-			if (group_fn(j, cpu_map, NULL, tmpmask) != group)
-				continue;
-
-			cpumask_set_cpu(j, covered);
-			cpumask_set_cpu(j, sched_group_cpus(sg));
-		}
-		if (!first)
-			first = sg;
-		if (last)
-			last->next = sg;
-		last = sg;
-	}
-	last->next = first;
-}
-
 #define SD_NODES_PER_DOMAIN 16
 
 #ifdef CONFIG_NUMA
@@ -6858,154 +6832,96 @@ struct static_sched_domain {
 	DECLARE_BITMAP(span, CONFIG_NR_CPUS);
 };
 
+struct sd_data {
+	struct sched_domain **__percpu sd;
+	struct sched_group **__percpu sg;
+};
+
 struct s_data {
 #ifdef CONFIG_NUMA
 	int			sd_allnodes;
 #endif
 	cpumask_var_t		nodemask;
 	cpumask_var_t		send_covered;
-	cpumask_var_t		tmpmask;
 	struct sched_domain ** __percpu sd;
+	struct sd_data 		sdd[SD_LV_MAX];
 	struct root_domain	*rd;
 };
 
 enum s_alloc {
 	sa_rootdomain,
 	sa_sd,
-	sa_tmpmask,
+	sa_sd_storage,
 	sa_send_covered,
 	sa_nodemask,
 	sa_none,
 };
 
 /*
- * SMT sched-domains:
+ * Assumes the sched_domain tree is fully constructed
  */
-#ifdef CONFIG_SCHED_SMT
-static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_groups);
-
-static int
-cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
-		 struct sched_group **sg, struct cpumask *unused)
+static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
 {
-	if (sg)
-		*sg = &per_cpu(sched_groups, cpu).sg;
-	return cpu;
-}
-#endif /* CONFIG_SCHED_SMT */
+	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
+	struct sched_domain *child = sd->child;
 
-/*
- * multi-core sched-domains:
- */
-#ifdef CONFIG_SCHED_MC
-static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
+	if (child)
+		cpu = cpumask_first(sched_domain_span(child));
 
-static int
-cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
-		  struct sched_group **sg, struct cpumask *mask)
-{
-	int group;
-#ifdef CONFIG_SCHED_SMT
-	cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#else
-	group = cpu;
-#endif
 	if (sg)
-		*sg = &per_cpu(sched_group_core, group).sg;
-	return group;
+		*sg = *per_cpu_ptr(sdd->sg, cpu);
+
+	return cpu;
 }
-#endif /* CONFIG_SCHED_MC */
 
 /*
- * book sched-domains:
+ * build_sched_groups takes the cpumask we wish to span, and a pointer
+ * to a function which identifies what group(along with sched group) a CPU
+ * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
+ * (due to the fact that we keep track of groups covered with a struct cpumask).
+ *
+ * build_sched_groups will build a circular linked list of the groups
+ * covered by the given span, and will set each group's ->cpumask correctly,
+ * and ->cpu_power to 0.
  */
-#ifdef CONFIG_SCHED_BOOK
-static DEFINE_PER_CPU(struct static_sched_domain, book_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_book);
-
-static int
-cpu_to_book_group(int cpu, const struct cpumask *cpu_map,
-		  struct sched_group **sg, struct cpumask *mask)
-{
-	int group = cpu;
-#ifdef CONFIG_SCHED_MC
-	cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#elif defined(CONFIG_SCHED_SMT)
-	cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#endif
-	if (sg)
-		*sg = &per_cpu(sched_group_book, group).sg;
-	return group;
-}
-#endif /* CONFIG_SCHED_BOOK */
-
-static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
-
-static int
-cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
-		  struct sched_group **sg, struct cpumask *mask)
+static void
+build_sched_groups(struct sched_domain *sd, struct cpumask *covered)
 {
-	int group;
-#ifdef CONFIG_SCHED_BOOK
-	cpumask_and(mask, cpu_book_mask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#elif defined(CONFIG_SCHED_MC)
-	cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#elif defined(CONFIG_SCHED_SMT)
-	cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#else
-	group = cpu;
-#endif
-	if (sg)
-		*sg = &per_cpu(sched_group_phys, group).sg;
-	return group;
-}
-
-#ifdef CONFIG_NUMA
-static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_node);
+	struct sched_group *first = NULL, *last = NULL;
+	struct sd_data *sdd = sd->private;
+	const struct cpumask *span = sched_domain_span(sd);
+	int i;
 
-static int cpu_to_node_group(int cpu, const struct cpumask *cpu_map,
-				 struct sched_group **sg,
-				 struct cpumask *nodemask)
-{
-	int group;
+	cpumask_clear(covered);
 
-	cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map);
-	group = cpumask_first(nodemask);
+	for_each_cpu(i, span) {
+		struct sched_group *sg;
+		int group = get_group(i, sdd, &sg);
+		int j;
 
-	if (sg)
-		*sg = &per_cpu(sched_group_node, group).sg;
-	return group;
-}
+		if (cpumask_test_cpu(i, covered))
+			continue;
 
-static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
+		cpumask_clear(sched_group_cpus(sg));
+		sg->cpu_power = 0;
 
-static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
-				 struct sched_group **sg,
-				 struct cpumask *nodemask)
-{
-	int group;
+		for_each_cpu(j, span) {
+			if (get_group(j, sdd, NULL) != group)
+				continue;
 
-	cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map);
-	group = cpumask_first(nodemask);
+			cpumask_set_cpu(j, covered);
+			cpumask_set_cpu(j, sched_group_cpus(sg));
+		}
 
-	if (sg)
-		*sg = &per_cpu(sched_group_allnodes, group).sg;
-	return group;
+		if (!first)
+			first = sg;
+		if (last)
+			last->next = sg;
+		last = sg;
+	}
+	last->next = first;
 }
 
-#endif /* CONFIG_NUMA */
-
 /*
  * Initialize sched groups cpu_power.
  *
@@ -7039,15 +6955,15 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 # define SD_INIT_NAME(sd, type)		do { } while (0)
 #endif
 
-#define	SD_INIT(sd, type)	sd_init_##type(sd)
-
-#define SD_INIT_FUNC(type)	\
-static noinline void sd_init_##type(struct sched_domain *sd)	\
-{								\
-	memset(sd, 0, sizeof(*sd));				\
-	*sd = SD_##type##_INIT;					\
-	sd->level = SD_LV_##type;				\
-	SD_INIT_NAME(sd, type);					\
+#define SD_INIT_FUNC(type)						       \
+static noinline struct sched_domain *sd_init_##type(struct s_data *d, int cpu) \
+{									       \
+	struct sched_domain *sd = *per_cpu_ptr(d->sdd[SD_LV_##type].sd, cpu);  \
+	*sd = SD_##type##_INIT;						       \
+	sd->level = SD_LV_##type;					       \
+	SD_INIT_NAME(sd, type);						       \
+	sd->private = &d->sdd[SD_LV_##type];				       \
+	return sd;							       \
 }
 
 SD_INIT_FUNC(CPU)
@@ -7103,13 +7019,22 @@ static void set_domain_attribute(struct sched_domain *sd,
 static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
 				 const struct cpumask *cpu_map)
 {
+	int i, j;
+
 	switch (what) {
 	case sa_rootdomain:
-		free_rootdomain(d->rd); /* fall through */
+		free_rootdomain(&d->rd->rcu); /* fall through */
 	case sa_sd:
 		free_percpu(d->sd); /* fall through */
-	case sa_tmpmask:
-		free_cpumask_var(d->tmpmask); /* fall through */
+	case sa_sd_storage:
+		for (i = 0; i < SD_LV_MAX; i++) {
+			for_each_cpu(j, cpu_map) {
+				kfree(*per_cpu_ptr(d->sdd[i].sd, j));
+				kfree(*per_cpu_ptr(d->sdd[i].sg, j));
+			}
+			free_percpu(d->sdd[i].sd);
+			free_percpu(d->sdd[i].sg);
+		} /* fall through */
 	case sa_send_covered:
 		free_cpumask_var(d->send_covered); /* fall through */
 	case sa_nodemask:
@@ -7122,25 +7047,70 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
 static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
 						   const struct cpumask *cpu_map)
 {
+	int i, j;
+
+	memset(d, 0, sizeof(*d));
+
 	if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL))
 		return sa_none;
 	if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
 		return sa_nodemask;
-	if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL))
-		return sa_send_covered;
-	d->sd = alloc_percpu(struct sched_domain *);
-	if (!d->sd) {
-		printk(KERN_WARNING "Cannot alloc per-cpu pointers\n");
-		return sa_tmpmask;
+	for (i = 0; i < SD_LV_MAX; i++) {
+		d->sdd[i].sd = alloc_percpu(struct sched_domain *);
+		if (!d->sdd[i].sd)
+			return sa_sd_storage;
+
+		d->sdd[i].sg = alloc_percpu(struct sched_group *);
+		if (!d->sdd[i].sg)
+			return sa_sd_storage;
+
+		for_each_cpu(j, cpu_map) {
+			struct sched_domain *sd;
+			struct sched_group *sg;
+
+		       	sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
+					GFP_KERNEL, cpu_to_node(j));
+			if (!sd)
+				return sa_sd_storage;
+
+			*per_cpu_ptr(d->sdd[i].sd, j) = sd;
+
+			sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
+					GFP_KERNEL, cpu_to_node(j));
+			if (!sg)
+				return sa_sd_storage;
+
+			*per_cpu_ptr(d->sdd[i].sg, j) = sg;
+		}
 	}
+	d->sd = alloc_percpu(struct sched_domain *);
+	if (!d->sd)
+		return sa_sd_storage;
 	d->rd = alloc_rootdomain();
-	if (!d->rd) {
-		printk(KERN_WARNING "Cannot alloc root domain\n");
+	if (!d->rd)
 		return sa_sd;
-	}
 	return sa_rootdomain;
 }
 
+/*
+ * NULL the sd_data elements we've used to build the sched_domain and
+ * sched_group structure so that the subsequent __free_domain_allocs()
+ * will not free the data we're using.
+ */
+static void claim_allocations(int cpu, struct sched_domain *sd)
+{
+	struct sd_data *sdd = sd->private;
+	struct sched_group *sg = sd->groups;
+
+	WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
+	*per_cpu_ptr(sdd->sd, cpu) = NULL;
+
+	if (cpu == cpumask_first(sched_group_cpus(sg))) {
+		WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg);
+		*per_cpu_ptr(sdd->sg, cpu) = NULL;
+	}
+}
+
 static struct sched_domain *__build_numa_sched_domains(struct s_data *d,
 	const struct cpumask *cpu_map, struct sched_domain_attr *attr, int i)
 {
@@ -7151,24 +7121,20 @@ static struct sched_domain *__build_numa_sched_domains(struct s_data *d,
 	d->sd_allnodes = 0;
 	if (cpumask_weight(cpu_map) >
 	    SD_NODES_PER_DOMAIN * cpumask_weight(d->nodemask)) {
-		sd = &per_cpu(allnodes_domains, i).sd;
-		SD_INIT(sd, ALLNODES);
+		sd = sd_init_ALLNODES(d, i);
 		set_domain_attribute(sd, attr);
 		cpumask_copy(sched_domain_span(sd), cpu_map);
-		cpu_to_allnodes_group(i, cpu_map, &sd->groups, d->tmpmask);
 		d->sd_allnodes = 1;
 	}
 	parent = sd;
 
-	sd = &per_cpu(node_domains, i).sd;
-	SD_INIT(sd, NODE);
+	sd = sd_init_NODE(d, i);
 	set_domain_attribute(sd, attr);
 	sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
 	sd->parent = parent;
 	if (parent)
 		parent->child = sd;
 	cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map);
-	cpu_to_node_group(i, cpu_map, &sd->groups, d->tmpmask);
 #endif
 	return sd;
 }
@@ -7178,14 +7144,12 @@ static struct sched_domain *__build_cpu_sched_domain(struct s_data *d,
 	struct sched_domain *parent, int i)
 {
 	struct sched_domain *sd;
-	sd = &per_cpu(phys_domains, i).sd;
-	SD_INIT(sd, CPU);
+	sd = sd_init_CPU(d, i);
 	set_domain_attribute(sd, attr);
 	cpumask_copy(sched_domain_span(sd), d->nodemask);
 	sd->parent = parent;
 	if (parent)
 		parent->child = sd;
-	cpu_to_phys_group(i, cpu_map, &sd->groups, d->tmpmask);
 	return sd;
 }
 
@@ -7195,13 +7159,11 @@ static struct sched_domain *__build_book_sched_domain(struct s_data *d,
 {
 	struct sched_domain *sd = parent;
 #ifdef CONFIG_SCHED_BOOK
-	sd = &per_cpu(book_domains, i).sd;
-	SD_INIT(sd, BOOK);
+	sd = sd_init_BOOK(d, i);
 	set_domain_attribute(sd, attr);
 	cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i));
 	sd->parent = parent;
 	parent->child = sd;
-	cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask);
 #endif
 	return sd;
 }
@@ -7212,13 +7174,11 @@ static struct sched_domain *__build_mc_sched_domain(struct s_data *d,
 {
 	struct sched_domain *sd = parent;
 #ifdef CONFIG_SCHED_MC
-	sd = &per_cpu(core_domains, i).sd;
-	SD_INIT(sd, MC);
+	sd = sd_init_MC(d, i);
 	set_domain_attribute(sd, attr);
 	cpumask_and(sched_domain_span(sd), cpu_map, cpu_coregroup_mask(i));
 	sd->parent = parent;
 	parent->child = sd;
-	cpu_to_core_group(i, cpu_map, &sd->groups, d->tmpmask);
 #endif
 	return sd;
 }
@@ -7229,92 +7189,32 @@ static struct sched_domain *__build_smt_sched_domain(struct s_data *d,
 {
 	struct sched_domain *sd = parent;
 #ifdef CONFIG_SCHED_SMT
-	sd = &per_cpu(cpu_domains, i).sd;
-	SD_INIT(sd, SIBLING);
+	sd = sd_init_SIBLING(d, i);
 	set_domain_attribute(sd, attr);
 	cpumask_and(sched_domain_span(sd), cpu_map, topology_thread_cpumask(i));
 	sd->parent = parent;
 	parent->child = sd;
-	cpu_to_cpu_group(i, cpu_map, &sd->groups, d->tmpmask);
 #endif
 	return sd;
 }
 
-static void build_sched_groups(struct s_data *d, struct sched_domain *sd,
-			       const struct cpumask *cpu_map, int cpu)
-{
-	switch (sd->level) {
-#ifdef CONFIG_SCHED_SMT
-	case SD_LV_SIBLING: /* set up CPU (sibling) groups */
-		if (cpu == cpumask_first(sched_domain_span(sd)))
-			init_sched_build_groups(sched_domain_span(sd), cpu_map,
-						&cpu_to_cpu_group,
-						d->send_covered, d->tmpmask);
-		break;
-#endif
-#ifdef CONFIG_SCHED_MC
-	case SD_LV_MC: /* set up multi-core groups */
-		if (cpu == cpumask_first(sched_domain_span(sd)))
-			init_sched_build_groups(sched_domain_span(sd), cpu_map,
-						&cpu_to_core_group,
-						d->send_covered, d->tmpmask);
-		break;
-#endif
-#ifdef CONFIG_SCHED_BOOK
-	case SD_LV_BOOK: /* set up book groups */
-		if (cpu == cpumask_first(sched_domain_span(sd)))
-			init_sched_build_groups(sched_domain_span(sd), cpu_map,
-						&cpu_to_book_group,
-						d->send_covered, d->tmpmask);
-		break;
-#endif
-	case SD_LV_CPU: /* set up physical groups */
-		if (cpu == cpumask_first(sched_domain_span(sd)))
-			init_sched_build_groups(sched_domain_span(sd), cpu_map,
-						&cpu_to_phys_group,
-						d->send_covered, d->tmpmask);
-		break;
-#ifdef CONFIG_NUMA
-	case SD_LV_NODE:
-		if (cpu == cpumask_first(sched_domain_span(sd)))
-			init_sched_build_groups(sched_domain_span(sd), cpu_map,
-						&cpu_to_node_group,
-						d->send_covered, d->tmpmask);
-
-	case SD_LV_ALLNODES:
-		if (cpu == cpumask_first(cpu_map))
-			init_sched_build_groups(cpu_map, cpu_map,
-					&cpu_to_allnodes_group,
-					d->send_covered, d->tmpmask);
-		break;
-#endif
-	default:
-		break;
-	}
-}
-
 /*
  * Build sched domains for a given set of cpus and attach the sched domains
  * to the individual cpus
  */
-static int __build_sched_domains(const struct cpumask *cpu_map,
-				 struct sched_domain_attr *attr)
+static int build_sched_domains(const struct cpumask *cpu_map,
+			       struct sched_domain_attr *attr)
 {
 	enum s_alloc alloc_state = sa_none;
+	struct sched_domain *sd;
 	struct s_data d;
-	struct sched_domain *sd, *tmp;
 	int i;
-#ifdef CONFIG_NUMA
-	d.sd_allnodes = 0;
-#endif
 
 	alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
 	if (alloc_state != sa_rootdomain)
 		goto error;
 
-	/*
-	 * Set up domains for cpus specified by the cpu_map.
-	 */
+	/* Set up domains for cpus specified by the cpu_map. */
 	for_each_cpu(i, cpu_map) {
 		cpumask_and(d.nodemask, cpumask_of_node(cpu_to_node(i)),
 			    cpu_map);
@@ -7326,10 +7226,19 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 		sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i);
 
 		*per_cpu_ptr(d.sd, i) = sd;
+	}
+
+	/* Build the groups for the domains */
+	for_each_cpu(i, cpu_map) {
+		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+			sd->span_weight = cpumask_weight(sched_domain_span(sd));
+			get_group(i, sd->private, &sd->groups);
+			atomic_inc(&sd->groups->ref);
 
-		for (tmp = sd; tmp; tmp = tmp->parent) {
-			tmp->span_weight = cpumask_weight(sched_domain_span(tmp));
-			build_sched_groups(&d, tmp, cpu_map, i);
+			if (i != cpumask_first(sched_domain_span(sd)))
+				continue;
+
+			build_sched_groups(sd, d.send_covered);
 		}
 	}
 
@@ -7338,18 +7247,21 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 		if (!cpumask_test_cpu(i, cpu_map))
 			continue;
 
-		sd = *per_cpu_ptr(d.sd, i);
-		for (; sd; sd = sd->parent)
+		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+			claim_allocations(i, sd);
 			init_sched_groups_power(i, sd);
+		}
 	}
 
 	/* Attach the domains */
+	rcu_read_lock();
 	for_each_cpu(i, cpu_map) {
 		sd = *per_cpu_ptr(d.sd, i);
 		cpu_attach_domain(sd, d.rd, i);
 	}
+	rcu_read_unlock();
 
-	__free_domain_allocs(&d, sa_tmpmask, cpu_map);
+	__free_domain_allocs(&d, sa_sd, cpu_map);
 	return 0;
 
 error:
@@ -7357,11 +7269,6 @@ error:
 	return -ENOMEM;
 }
 
-static int build_sched_domains(const struct cpumask *cpu_map)
-{
-	return __build_sched_domains(cpu_map, NULL);
-}
-
 static cpumask_var_t *doms_cur;	/* current sched domains */
 static int ndoms_cur;		/* number of sched domains in 'doms_cur' */
 static struct sched_domain_attr *dattr_cur;
@@ -7425,31 +7332,24 @@ static int init_sched_domains(const struct cpumask *cpu_map)
 		doms_cur = &fallback_doms;
 	cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
 	dattr_cur = NULL;
-	err = build_sched_domains(doms_cur[0]);
+	err = build_sched_domains(doms_cur[0], NULL);
 	register_sched_domain_sysctl();
 
 	return err;
 }
 
-static void destroy_sched_domains(const struct cpumask *cpu_map,
-				       struct cpumask *tmpmask)
-{
-}
-
 /*
  * Detach sched domains from a group of cpus specified in cpu_map
  * These cpus will now be attached to the NULL domain
  */
 static void detach_destroy_domains(const struct cpumask *cpu_map)
 {
-	/* Save because hotplug lock held. */
-	static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS);
 	int i;
 
+	rcu_read_lock();
 	for_each_cpu(i, cpu_map)
 		cpu_attach_domain(NULL, &def_root_domain, i);
-	synchronize_sched();
-	destroy_sched_domains(cpu_map, to_cpumask(tmpmask));
+	rcu_read_unlock();
 }
 
 /* handle null as "default" */
@@ -7538,8 +7438,7 @@ match1:
 				goto match2;
 		}
 		/* no match - add a new doms_new */
-		__build_sched_domains(doms_new[i],
-					dattr_new ? dattr_new + i : NULL);
+		build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL);
 match2:
 		;
 	}
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 4ee50f0af8d1..4a8ac7c2a18e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1622,6 +1622,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
 	/*
 	 * Otherwise, iterate the domains and find an elegible idle cpu.
 	 */
+	rcu_read_lock();
 	for_each_domain(target, sd) {
 		if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
 			break;
@@ -1641,6 +1642,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
 		    cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
 			break;
 	}
+	rcu_read_unlock();
 
 	return target;
 }
@@ -1673,6 +1675,7 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
 		new_cpu = prev_cpu;
 	}
 
+	rcu_read_lock();
 	for_each_domain(cpu, tmp) {
 		if (!(tmp->flags & SD_LOAD_BALANCE))
 			continue;
@@ -1723,9 +1726,10 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
 
 	if (affine_sd) {
 		if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
-			return select_idle_sibling(p, cpu);
-		else
-			return select_idle_sibling(p, prev_cpu);
+			prev_cpu = cpu;
+
+		new_cpu = select_idle_sibling(p, prev_cpu);
+		goto unlock;
 	}
 
 	while (sd) {
@@ -1766,6 +1770,8 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
 		}
 		/* while loop will break here if sd == NULL */
 	}
+unlock:
+	rcu_read_unlock();
 
 	return new_cpu;
 }
@@ -3462,6 +3468,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
 	raw_spin_unlock(&this_rq->lock);
 
 	update_shares(this_cpu);
+	rcu_read_lock();
 	for_each_domain(this_cpu, sd) {
 		unsigned long interval;
 		int balance = 1;
@@ -3483,6 +3490,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
 			break;
 		}
 	}
+	rcu_read_unlock();
 
 	raw_spin_lock(&this_rq->lock);
 
@@ -3531,6 +3539,7 @@ static int active_load_balance_cpu_stop(void *data)
 	double_lock_balance(busiest_rq, target_rq);
 
 	/* Search for an sd spanning us and the target CPU. */
+	rcu_read_lock();
 	for_each_domain(target_cpu, sd) {
 		if ((sd->flags & SD_LOAD_BALANCE) &&
 		    cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
@@ -3546,6 +3555,7 @@ static int active_load_balance_cpu_stop(void *data)
 		else
 			schedstat_inc(sd, alb_failed);
 	}
+	rcu_read_unlock();
 	double_unlock_balance(busiest_rq, target_rq);
 out_unlock:
 	busiest_rq->active_balance = 0;
@@ -3672,6 +3682,7 @@ static int find_new_ilb(int cpu)
 {
 	struct sched_domain *sd;
 	struct sched_group *ilb_group;
+	int ilb = nr_cpu_ids;
 
 	/*
 	 * Have idle load balancer selection from semi-idle packages only
@@ -3687,20 +3698,25 @@ static int find_new_ilb(int cpu)
 	if (cpumask_weight(nohz.idle_cpus_mask) < 2)
 		goto out_done;
 
+	rcu_read_lock();
 	for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
 		ilb_group = sd->groups;
 
 		do {
-			if (is_semi_idle_group(ilb_group))
-				return cpumask_first(nohz.grp_idle_mask);
+			if (is_semi_idle_group(ilb_group)) {
+				ilb = cpumask_first(nohz.grp_idle_mask);
+				goto unlock;
+			}
 
 			ilb_group = ilb_group->next;
 
 		} while (ilb_group != sd->groups);
 	}
+unlock:
+	rcu_read_unlock();
 
 out_done:
-	return nr_cpu_ids;
+	return ilb;
 }
 #else /*  (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
 static inline int find_new_ilb(int call_cpu)
@@ -3845,6 +3861,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
 
 	update_shares(cpu);
 
+	rcu_read_lock();
 	for_each_domain(cpu, sd) {
 		if (!(sd->flags & SD_LOAD_BALANCE))
 			continue;
@@ -3890,6 +3907,7 @@ out:
 		if (!balance)
 			break;
 	}
+	rcu_read_unlock();
 
 	/*
 	 * next_balance will be updated only when there is a need.
-- 
cgit v1.2.3-71-gd317


From 3859173d43658d51a749bc0201b943922577d39c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 7 Apr 2011 14:09:53 +0200
Subject: sched: Reduce some allocation pressure

Since we now allocate SD_LV_MAX * nr_cpu_ids sched_domain/sched_group
structures when rebuilding the scheduler toplogy it might make sense
to shrink that depending on the CONFIG_ options.

This is only needed until we get rid of SD_LV_* alltogether and
provide a full dynamic topology interface.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110407122942.406226449@chello.nl
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 020b79d6c486..5a9168b01db8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -897,12 +897,20 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
 
 enum sched_domain_level {
 	SD_LV_NONE = 0,
+#ifdef CONFIG_SCHED_SMT
 	SD_LV_SIBLING,
+#endif
+#ifdef CONFIG_SCHED_MC
 	SD_LV_MC,
+#endif
+#ifdef CONFIG_SCHED_BOOK
 	SD_LV_BOOK,
+#endif
 	SD_LV_CPU,
+#ifdef CONFIG_NUMA
 	SD_LV_NODE,
 	SD_LV_ALLNODES,
+#endif
 	SD_LV_MAX
 };
 
-- 
cgit v1.2.3-71-gd317


From 7dd04b730749f957c116f363524fd622b05e5141 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 7 Apr 2011 14:09:56 +0200
Subject: sched: Remove some dead code

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110407122942.553814623@chello.nl
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  6 ------
 kernel/sched.c        | 16 ----------------
 2 files changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5a9168b01db8..09d9e02f2b61 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -883,9 +883,6 @@ struct sched_group {
 	 * NOTE: this field is variable length. (Allocated dynamically
 	 * by attaching extra space to the end of the structure,
 	 * depending on how many CPUs the kernel has booted up with)
-	 *
-	 * It is also be embedded into static data structures at build
-	 * time. (See 'struct static_sched_group' in kernel/sched.c)
 	 */
 	unsigned long cpumask[0];
 };
@@ -994,9 +991,6 @@ struct sched_domain {
 	 * NOTE: this field is variable length. (Allocated dynamically
 	 * by attaching extra space to the end of the structure,
 	 * depending on how many CPUs the kernel has booted up with)
-	 *
-	 * It is also be embedded into static data structures at build
-	 * time. (See 'struct static_sched_domain' in kernel/sched.c)
 	 */
 	unsigned long span[0];
 };
diff --git a/kernel/sched.c b/kernel/sched.c
index f4d3a624c50a..5ec685ce516a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6816,22 +6816,6 @@ static void sched_domain_node_span(int node, struct cpumask *span)
 
 int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
 
-/*
- * The cpus mask in sched_group and sched_domain hangs off the end.
- *
- * ( See the the comments in include/linux/sched.h:struct sched_group
- *   and struct sched_domain. )
- */
-struct static_sched_group {
-	struct sched_group sg;
-	DECLARE_BITMAP(cpus, CONFIG_NR_CPUS);
-};
-
-struct static_sched_domain {
-	struct sched_domain sd;
-	DECLARE_BITMAP(span, CONFIG_NR_CPUS);
-};
-
 struct sd_data {
 	struct sched_domain **__percpu sd;
 	struct sched_group **__percpu sg;
-- 
cgit v1.2.3-71-gd317


From 60495e7760d8ee364695006af37309b0755e0e17 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 7 Apr 2011 14:10:04 +0200
Subject: sched: Dynamic sched_domain::level

Remove the SD_LV_ enum and use dynamic level assignments.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110407122942.969433965@chello.nl
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 23 +++--------------------
 kernel/cpuset.c       |  2 +-
 kernel/sched.c        |  9 ++++++---
 3 files changed, 10 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 09d9e02f2b61..e43e5b0ab0b5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -892,25 +892,6 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
 	return to_cpumask(sg->cpumask);
 }
 
-enum sched_domain_level {
-	SD_LV_NONE = 0,
-#ifdef CONFIG_SCHED_SMT
-	SD_LV_SIBLING,
-#endif
-#ifdef CONFIG_SCHED_MC
-	SD_LV_MC,
-#endif
-#ifdef CONFIG_SCHED_BOOK
-	SD_LV_BOOK,
-#endif
-	SD_LV_CPU,
-#ifdef CONFIG_NUMA
-	SD_LV_NODE,
-	SD_LV_ALLNODES,
-#endif
-	SD_LV_MAX
-};
-
 struct sched_domain_attr {
 	int relax_domain_level;
 };
@@ -919,6 +900,8 @@ struct sched_domain_attr {
 	.relax_domain_level = -1,			\
 }
 
+extern int sched_domain_level_max;
+
 struct sched_domain {
 	/* These fields must be setup */
 	struct sched_domain *parent;	/* top domain must be null terminated */
@@ -936,7 +919,7 @@ struct sched_domain {
 	unsigned int forkexec_idx;
 	unsigned int smt_gain;
 	int flags;			/* See SD_* */
-	enum sched_domain_level level;
+	int level;
 
 	/* Runtime fields. */
 	unsigned long last_balance;	/* init to jiffies. units in jiffies */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 33eee16addb8..2bb8c2e98fff 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1159,7 +1159,7 @@ int current_cpuset_is_being_rebound(void)
 static int update_relax_domain_level(struct cpuset *cs, s64 val)
 {
 #ifdef CONFIG_SMP
-	if (val < -1 || val >= SD_LV_MAX)
+	if (val < -1 || val >= sched_domain_level_max)
 		return -EINVAL;
 #endif
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 3231e1997426..506cb8147c70 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6966,7 +6966,6 @@ sd_init_##type(struct sched_domain_topology_level *tl, int cpu) 	\
 {									\
 	struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);	\
 	*sd = SD_##type##_INIT;						\
-	sd->level = SD_LV_##type;					\
 	SD_INIT_NAME(sd, type);						\
 	sd->private = &tl->data;					\
 	return sd;							\
@@ -6988,13 +6987,14 @@ SD_INIT_FUNC(CPU)
 #endif
 
 static int default_relax_domain_level = -1;
+int sched_domain_level_max;
 
 static int __init setup_relax_domain_level(char *str)
 {
 	unsigned long val;
 
 	val = simple_strtoul(str, NULL, 0);
-	if (val < SD_LV_MAX)
+	if (val < sched_domain_level_max)
 		default_relax_domain_level = val;
 
 	return 1;
@@ -7173,8 +7173,11 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
 
 	set_domain_attribute(sd, attr);
 	cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
-	if (child)
+	if (child) {
+		sd->level = child->level + 1;
+		sched_domain_level_max = max(sched_domain_level_max, sd->level);
 		child->parent = sd;
+	}
 	sd->child = child;
 
 	return sd;
-- 
cgit v1.2.3-71-gd317


From 127fe533ae56d7f4e7b5011869870982eba25723 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 8 Apr 2011 18:01:59 +0000
Subject: v3 ethtool: add ntuple flow specifier data to network flow classifier

This change is meant to add an ntuple data extensions to the rx network flow
classification specifiers.  The idea is to allow ntuple to be displayed via
the network flow classification interface.

The first patch had some left over stuff from the original flow extension
flags I had added.  That bit is removed in this patch.

The second had some left over comments that stated we ignored bits in the
masks when we actually match them.

This work is based on input from Ben Hutchings.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 53 +++++++++++++++++++++++++++++--------------------
 net/socket.c            | 14 ++++++-------
 2 files changed, 39 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index c04d1316d221..c7eff13d2f34 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -380,27 +380,42 @@ struct ethtool_usrip4_spec {
 	__u8    proto;
 };
 
+union ethtool_flow_union {
+	struct ethtool_tcpip4_spec		tcp_ip4_spec;
+	struct ethtool_tcpip4_spec		udp_ip4_spec;
+	struct ethtool_tcpip4_spec		sctp_ip4_spec;
+	struct ethtool_ah_espip4_spec		ah_ip4_spec;
+	struct ethtool_ah_espip4_spec		esp_ip4_spec;
+	struct ethtool_usrip4_spec		usr_ip4_spec;
+	struct ethhdr				ether_spec;
+	__u8					hdata[60];
+};
+
+struct ethtool_flow_ext {
+	__be16	vlan_etype;
+	__be16	vlan_tci;
+	__be32	data[2];
+};
+
 /**
  * struct ethtool_rx_flow_spec - specification for RX flow filter
  * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW
  * @h_u: Flow fields to match (dependent on @flow_type)
- * @m_u: Masks for flow field bits to be ignored
+ * @h_ext: Additional fields to match
+ * @m_u: Masks for flow field bits to be matched
+ * @m_ext: Masks for additional field bits to be matched
+ *	Note, all additional fields must be ignored unless @flow_type
+ *	includes the %FLOW_EXT flag.
  * @ring_cookie: RX ring/queue index to deliver to, or %RX_CLS_FLOW_DISC
  *	if packets should be discarded
  * @location: Index of filter in hardware table
  */
 struct ethtool_rx_flow_spec {
 	__u32		flow_type;
-	union {
-		struct ethtool_tcpip4_spec		tcp_ip4_spec;
-		struct ethtool_tcpip4_spec		udp_ip4_spec;
-		struct ethtool_tcpip4_spec		sctp_ip4_spec;
-		struct ethtool_ah_espip4_spec		ah_ip4_spec;
-		struct ethtool_ah_espip4_spec		esp_ip4_spec;
-		struct ethtool_usrip4_spec		usr_ip4_spec;
-		struct ethhdr				ether_spec;
-		__u8					hdata[72];
-	} h_u, m_u;
+	union ethtool_flow_union h_u;
+	struct ethtool_flow_ext h_ext;
+	union ethtool_flow_union m_u;
+	struct ethtool_flow_ext m_ext;
 	__u64		ring_cookie;
 	__u32		location;
 };
@@ -458,16 +473,10 @@ struct ethtool_rxnfc {
 
 struct compat_ethtool_rx_flow_spec {
 	u32		flow_type;
-	union {
-		struct ethtool_tcpip4_spec		tcp_ip4_spec;
-		struct ethtool_tcpip4_spec		udp_ip4_spec;
-		struct ethtool_tcpip4_spec		sctp_ip4_spec;
-		struct ethtool_ah_espip4_spec		ah_ip4_spec;
-		struct ethtool_ah_espip4_spec		esp_ip4_spec;
-		struct ethtool_usrip4_spec		usr_ip4_spec;
-		struct ethhdr				ether_spec;
-		u8					hdata[72];
-	} h_u, m_u;
+	union ethtool_flow_union h_u;
+	struct ethtool_flow_ext h_ext;
+	union ethtool_flow_union m_u;
+	struct ethtool_flow_ext m_ext;
 	compat_u64	ring_cookie;
 	u32		location;
 };
@@ -1072,6 +1081,8 @@ struct ethtool_ops {
 #define	IPV4_FLOW	0x10	/* hash only */
 #define	IPV6_FLOW	0x11	/* hash only */
 #define	ETHER_FLOW	0x12	/* spec only (ether_spec) */
+/* Flag to enable additional fields in struct ethtool_rx_flow_spec */
+#define	FLOW_EXT	0x80000000
 
 /* L3-L4 network traffic flow hash options */
 #define	RXH_L2DA	(1 << 1)
diff --git a/net/socket.c b/net/socket.c
index 5212447c86e7..575c84f2af19 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2643,13 +2643,13 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 		return -EFAULT;
 
 	if (convert_in) {
-		/* We expect there to be holes between fs.m_u and
+		/* We expect there to be holes between fs.m_ext and
 		 * fs.ring_cookie and at the end of fs, but nowhere else.
 		 */
-		BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_u) +
-			     sizeof(compat_rxnfc->fs.m_u) !=
-			     offsetof(struct ethtool_rxnfc, fs.m_u) +
-			     sizeof(rxnfc->fs.m_u));
+		BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
+			     sizeof(compat_rxnfc->fs.m_ext) !=
+			     offsetof(struct ethtool_rxnfc, fs.m_ext) +
+			     sizeof(rxnfc->fs.m_ext));
 		BUILD_BUG_ON(
 			offsetof(struct compat_ethtool_rxnfc, fs.location) -
 			offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
@@ -2657,7 +2657,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 			offsetof(struct ethtool_rxnfc, fs.ring_cookie));
 
 		if (copy_in_user(rxnfc, compat_rxnfc,
-				 (void *)(&rxnfc->fs.m_u + 1) -
+				 (void *)(&rxnfc->fs.m_ext + 1) -
 				 (void *)rxnfc) ||
 		    copy_in_user(&rxnfc->fs.ring_cookie,
 				 &compat_rxnfc->fs.ring_cookie,
@@ -2674,7 +2674,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 
 	if (convert_out) {
 		if (copy_in_user(compat_rxnfc, rxnfc,
-				 (const void *)(&rxnfc->fs.m_u + 1) -
+				 (const void *)(&rxnfc->fs.m_ext + 1) -
 				 (const void *)rxnfc) ||
 		    copy_in_user(&compat_rxnfc->fs.ring_cookie,
 				 &rxnfc->fs.ring_cookie,
-- 
cgit v1.2.3-71-gd317


From 1f112cee07b314e244ee9e71d9c1e6950dc13327 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 11 Apr 2011 22:54:42 +0200
Subject: PM / Hibernate: Introduce CONFIG_HIBERNATE_CALLBACKS

Xen save/restore is going to use hibernate device callbacks for
quiescing devices and putting them back to normal operations and it
would need to select CONFIG_HIBERNATION for this purpose.  However,
that also would cause the hibernate interfaces for user space to be
enabled, which might confuse user space, because the Xen kernels
don't support hibernation.  Moreover, it would be wasteful, as it
would make the Xen kernels include a substantial amount of code that
they would never use.

To address this issue introduce new power management Kconfig option
CONFIG_HIBERNATE_CALLBACKS, such that it will only select the code
that is necessary for the hibernate device callbacks to work and make
CONFIG_HIBERNATION select it.  Then, Xen save/restore will be able to
select CONFIG_HIBERNATE_CALLBACKS without dragging the entire
hibernate code along with it.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Tested-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
---
 arch/powerpc/kernel/ibmebus.c |  6 +++---
 drivers/amba/bus.c            |  6 +++---
 drivers/base/platform.c       |  6 +++---
 drivers/base/power/main.c     |  8 ++++----
 drivers/pci/pci-driver.c      |  6 +++---
 drivers/xen/manage.c          |  6 +++---
 include/linux/suspend.h       | 11 +++--------
 kernel/power/Kconfig          |  6 +++++-
 8 files changed, 27 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index c00d4ca1ee15..28581f1ad2c0 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -527,7 +527,7 @@ static int ibmebus_bus_pm_resume_noirq(struct device *dev)
 
 #endif /* !CONFIG_SUSPEND */
 
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 
 static int ibmebus_bus_pm_freeze(struct device *dev)
 {
@@ -665,7 +665,7 @@ static int ibmebus_bus_pm_restore_noirq(struct device *dev)
 	return ret;
 }
 
-#else /* !CONFIG_HIBERNATION */
+#else /* !CONFIG_HIBERNATE_CALLBACKS */
 
 #define ibmebus_bus_pm_freeze		NULL
 #define ibmebus_bus_pm_thaw		NULL
@@ -676,7 +676,7 @@ static int ibmebus_bus_pm_restore_noirq(struct device *dev)
 #define ibmebus_bus_pm_poweroff_noirq	NULL
 #define ibmebus_bus_pm_restore_noirq	NULL
 
-#endif /* !CONFIG_HIBERNATION */
+#endif /* !CONFIG_HIBERNATE_CALLBACKS */
 
 static struct dev_pm_ops ibmebus_bus_dev_pm_ops = {
 	.prepare = ibmebus_bus_pm_prepare,
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 821040503154..7025593a58c8 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -214,7 +214,7 @@ static int amba_pm_resume_noirq(struct device *dev)
 
 #endif /* !CONFIG_SUSPEND */
 
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 
 static int amba_pm_freeze(struct device *dev)
 {
@@ -352,7 +352,7 @@ static int amba_pm_restore_noirq(struct device *dev)
 	return ret;
 }
 
-#else /* !CONFIG_HIBERNATION */
+#else /* !CONFIG_HIBERNATE_CALLBACKS */
 
 #define amba_pm_freeze		NULL
 #define amba_pm_thaw		NULL
@@ -363,7 +363,7 @@ static int amba_pm_restore_noirq(struct device *dev)
 #define amba_pm_poweroff_noirq	NULL
 #define amba_pm_restore_noirq	NULL
 
-#endif /* !CONFIG_HIBERNATION */
+#endif /* !CONFIG_HIBERNATE_CALLBACKS */
 
 #ifdef CONFIG_PM
 
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index f051cfff18af..92792313d24a 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -771,7 +771,7 @@ int __weak platform_pm_resume_noirq(struct device *dev)
 
 #endif /* !CONFIG_SUSPEND */
 
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 
 static int platform_pm_freeze(struct device *dev)
 {
@@ -909,7 +909,7 @@ static int platform_pm_restore_noirq(struct device *dev)
 	return ret;
 }
 
-#else /* !CONFIG_HIBERNATION */
+#else /* !CONFIG_HIBERNATE_CALLBACKS */
 
 #define platform_pm_freeze		NULL
 #define platform_pm_thaw		NULL
@@ -920,7 +920,7 @@ static int platform_pm_restore_noirq(struct device *dev)
 #define platform_pm_poweroff_noirq	NULL
 #define platform_pm_restore_noirq	NULL
 
-#endif /* !CONFIG_HIBERNATION */
+#endif /* !CONFIG_HIBERNATE_CALLBACKS */
 
 #ifdef CONFIG_PM_RUNTIME
 
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 052dc53eef38..fbc5b6e7c591 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -233,7 +233,7 @@ static int pm_op(struct device *dev,
 		}
 		break;
 #endif /* CONFIG_SUSPEND */
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 	case PM_EVENT_FREEZE:
 	case PM_EVENT_QUIESCE:
 		if (ops->freeze) {
@@ -260,7 +260,7 @@ static int pm_op(struct device *dev,
 			suspend_report_result(ops->restore, error);
 		}
 		break;
-#endif /* CONFIG_HIBERNATION */
+#endif /* CONFIG_HIBERNATE_CALLBACKS */
 	default:
 		error = -EINVAL;
 	}
@@ -308,7 +308,7 @@ static int pm_noirq_op(struct device *dev,
 		}
 		break;
 #endif /* CONFIG_SUSPEND */
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 	case PM_EVENT_FREEZE:
 	case PM_EVENT_QUIESCE:
 		if (ops->freeze_noirq) {
@@ -335,7 +335,7 @@ static int pm_noirq_op(struct device *dev,
 			suspend_report_result(ops->restore_noirq, error);
 		}
 		break;
-#endif /* CONFIG_HIBERNATION */
+#endif /* CONFIG_HIBERNATE_CALLBACKS */
 	default:
 		error = -EINVAL;
 	}
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index d86ea8b01137..135df164a4c1 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -781,7 +781,7 @@ static int pci_pm_resume(struct device *dev)
 
 #endif /* !CONFIG_SUSPEND */
 
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 
 static int pci_pm_freeze(struct device *dev)
 {
@@ -970,7 +970,7 @@ static int pci_pm_restore(struct device *dev)
 	return error;
 }
 
-#else /* !CONFIG_HIBERNATION */
+#else /* !CONFIG_HIBERNATE_CALLBACKS */
 
 #define pci_pm_freeze		NULL
 #define pci_pm_freeze_noirq	NULL
@@ -981,7 +981,7 @@ static int pci_pm_restore(struct device *dev)
 #define pci_pm_restore		NULL
 #define pci_pm_restore_noirq	NULL
 
-#endif /* !CONFIG_HIBERNATION */
+#endif /* !CONFIG_HIBERNATE_CALLBACKS */
 
 #ifdef CONFIG_PM_RUNTIME
 
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 95143dd6904d..1ac94125bf93 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -61,7 +61,7 @@ static void xen_post_suspend(int cancelled)
 	xen_mm_unpin_all();
 }
 
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 static int xen_suspend(void *data)
 {
 	struct suspend_info *si = data;
@@ -173,7 +173,7 @@ out:
 #endif
 	shutting_down = SHUTDOWN_INVALID;
 }
-#endif	/* CONFIG_HIBERNATION */
+#endif	/* CONFIG_HIBERNATE_CALLBACKS */
 
 struct shutdown_handler {
 	const char *command;
@@ -202,7 +202,7 @@ static void shutdown_handler(struct xenbus_watch *watch,
 		{ "poweroff",	do_poweroff },
 		{ "halt",	do_poweroff },
 		{ "reboot",	do_reboot   },
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 		{ "suspend",	do_suspend  },
 #endif
 		{NULL, NULL},
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 5a89e3612875..083ffea7ba18 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -249,6 +249,8 @@ extern void hibernation_set_ops(const struct platform_hibernation_ops *ops);
 extern int hibernate(void);
 extern bool system_entering_hibernation(void);
 #else /* CONFIG_HIBERNATION */
+static inline void register_nosave_region(unsigned long b, unsigned long e) {}
+static inline void register_nosave_region_late(unsigned long b, unsigned long e) {}
 static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
 static inline void swsusp_set_page_free(struct page *p) {}
 static inline void swsusp_unset_page_free(struct page *p) {}
@@ -297,14 +299,7 @@ static inline bool pm_wakeup_pending(void) { return false; }
 
 extern struct mutex pm_mutex;
 
-#ifndef CONFIG_HIBERNATION
-static inline void register_nosave_region(unsigned long b, unsigned long e)
-{
-}
-static inline void register_nosave_region_late(unsigned long b, unsigned long e)
-{
-}
-
+#ifndef CONFIG_HIBERNATE_CALLBACKS
 static inline void lock_system_sleep(void) {}
 static inline void unlock_system_sleep(void) {}
 
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 4603f08dc47b..049791468d37 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -18,9 +18,13 @@ config SUSPEND_FREEZER
 
 	  Turning OFF this setting is NOT recommended! If in doubt, say Y.
 
+config HIBERNATE_CALLBACKS
+	bool
+
 config HIBERNATION
 	bool "Hibernation (aka 'suspend to disk')"
 	depends on SWAP && ARCH_HIBERNATION_POSSIBLE
+	select HIBERNATE_CALLBACKS
 	select LZO_COMPRESS
 	select LZO_DECOMPRESS
 	---help---
@@ -85,7 +89,7 @@ config PM_STD_PARTITION
 
 config PM_SLEEP
 	def_bool y
-	depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE
+	depends on SUSPEND || HIBERNATE_CALLBACKS || XEN_SAVE_RESTORE
 
 config PM_SLEEP_SMP
 	def_bool y
-- 
cgit v1.2.3-71-gd317


From f75664570d8b75469cc468f23c2b27220984983b Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Tue, 12 Apr 2011 10:17:31 +0200
Subject: block: add callback function for unplug notification

MD would like to know when a queue is unplugged, so it can flush
it's bitmap writes. Add such a callback.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c       |  3 +++
 block/blk-settings.c   | 16 ++++++++++++++++
 include/linux/blkdev.h |  3 +++
 3 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 0c0ea10e61ea..76850fc9cf23 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2672,6 +2672,9 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth)
 {
 	trace_block_unplug_io(q, depth);
 	__blk_run_queue(q, false);
+
+	if (q->unplugged_fn)
+		q->unplugged_fn(q);
 }
 
 static void flush_plug_list(struct blk_plug *plug)
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 1fa769293597..eb949045bb12 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -790,6 +790,22 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush)
 }
 EXPORT_SYMBOL_GPL(blk_queue_flush);
 
+/**
+ * blk_queue_unplugged - register a callback for an unplug event
+ * @q:		the request queue for the device
+ * @fn:		the function to call
+ *
+ * Some stacked drivers may need to know when IO is dispatched on an
+ * unplug event. By registrering a callback here, they will be notified
+ * when someone flushes their on-stack queue plug. The function will be
+ * called with the queue lock held.
+ */
+void blk_queue_unplugged(struct request_queue *q, unplugged_fn *fn)
+{
+	q->unplugged_fn = fn;
+}
+EXPORT_SYMBOL(blk_queue_unplugged);
+
 static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 32176cc8e715..c07ffafac5d4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -196,6 +196,7 @@ typedef void (request_fn_proc) (struct request_queue *q);
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
+typedef void (unplugged_fn) (struct request_queue *);
 
 struct bio_vec;
 struct bvec_merge_data {
@@ -283,6 +284,7 @@ struct request_queue
 	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
 	lld_busy_fn		*lld_busy_fn;
+	unplugged_fn		*unplugged_fn;
 
 	/*
 	 * Dispatch queue sorting
@@ -841,6 +843,7 @@ extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
+extern void blk_queue_unplugged(struct request_queue *, unplugged_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
-- 
cgit v1.2.3-71-gd317


From e710d7d5a9cab1041b7a3cf9e655b75d92786857 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Fri, 8 Apr 2011 00:43:01 +0200
Subject: mfd: Fetch cell pointer from platform_device->mfd_cell

In order for MFD drivers to fetch their cell pointer but also their
platform data one, an mfd cell pointer is added to the platform_device
structure.
That allows all MFD sub devices drivers to be MFD agnostic, unless
they really need to access their MFD cell data. Most of them don't,
especially the ones for IPs used by both MFD and non MFD SoCs.

Cc: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Greg KH <gregkh@suse.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/base/platform.c         |  1 +
 drivers/mfd/mfd-core.c          | 16 ++++++++++++++--
 include/linux/mfd/core.h        | 13 +++++++++++--
 include/linux/platform_device.h |  5 +++++
 4 files changed, 31 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index f051cfff18af..6c3a2bdc527a 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -149,6 +149,7 @@ static void platform_device_release(struct device *dev)
 
 	of_device_node_put(&pa->pdev.dev);
 	kfree(pa->pdev.dev.platform_data);
+	kfree(pa->pdev.mfd_cell);
 	kfree(pa->pdev.resource);
 	kfree(pa);
 }
diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index d01574d98870..f4c8c844b913 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -55,6 +55,19 @@ int mfd_cell_disable(struct platform_device *pdev)
 }
 EXPORT_SYMBOL(mfd_cell_disable);
 
+static int mfd_platform_add_cell(struct platform_device *pdev,
+				 const struct mfd_cell *cell)
+{
+	if (!cell)
+		return 0;
+
+	pdev->mfd_cell = kmemdup(cell, sizeof(*cell), GFP_KERNEL);
+	if (!pdev->mfd_cell)
+		return -ENOMEM;
+
+	return 0;
+}
+
 static int mfd_add_device(struct device *parent, int id,
 			  const struct mfd_cell *cell,
 			  struct resource *mem_base,
@@ -75,7 +88,7 @@ static int mfd_add_device(struct device *parent, int id,
 
 	pdev->dev.parent = parent;
 
-	ret = platform_device_add_data(pdev, cell, sizeof(*cell));
+	ret = mfd_platform_add_cell(pdev, cell);
 	if (ret)
 		goto fail_res;
 
@@ -123,7 +136,6 @@ static int mfd_add_device(struct device *parent, int id,
 
 	return 0;
 
-/*	platform_device_del(pdev); */
 fail_res:
 	kfree(res);
 fail_device:
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index ad1b19aa6508..aef23309a742 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -86,16 +86,25 @@ extern int mfd_clone_cell(const char *cell, const char **clones,
  */
 static inline const struct mfd_cell *mfd_get_cell(struct platform_device *pdev)
 {
-	return pdev->dev.platform_data;
+	return pdev->mfd_cell;
 }
 
 /*
  * Given a platform device that's been created by mfd_add_devices(), fetch
  * the .mfd_data entry from the mfd_cell that created it.
+ * Otherwise just return the platform_data pointer.
+ * This maintains compatibility with platform drivers whose devices aren't
+ * created by the mfd layer, and expect platform_data to contain what would've
+ * otherwise been in mfd_data.
  */
 static inline void *mfd_get_data(struct platform_device *pdev)
 {
-	return mfd_get_cell(pdev)->mfd_data;
+	const struct mfd_cell *cell = mfd_get_cell(pdev);
+
+	if (cell)
+		return cell->mfd_data;
+	else
+		return pdev->dev.platform_data;
 }
 
 extern int mfd_add_devices(struct device *parent, int id,
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index d96db9825708..744942c95fec 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -14,6 +14,8 @@
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
 
+struct mfd_cell;
+
 struct platform_device {
 	const char	* name;
 	int		id;
@@ -23,6 +25,9 @@ struct platform_device {
 
 	const struct platform_device_id	*id_entry;
 
+	/* MFD cell pointer */
+	struct mfd_cell *mfd_cell;
+
 	/* arch specific additions */
 	struct pdev_archdata	archdata;
 };
-- 
cgit v1.2.3-71-gd317


From be85bccaa5aa5a11dcaf85f9e945ffefd253f631 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 12 Apr 2011 13:35:56 -0700
Subject: Revert "vfs: Export file system uuid via /proc/<pid>/mountinfo"

This reverts commit 93f1c20bc8cdb757be50566eff88d65c3b26881f.

It turns out that libmount misparses it because it adds a '-' character
in the uuid string, which libmount then incorrectly confuses with the
separator string (" - ") at the end of all the optional arguments.

Upstream libmount (in the util-linux tree) has been fixed, but until
that fix actually percolates up to users, we'd better not expose this
change in the kernel.

Let's revisit this later (possibly by exposing the UUID without any '-'
characters in it, avoiding the user-space bug).

Reported-by: Dave Jones <davej@redhat.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Karel Zak <kzak@redhat.com>
Cc: Ram Pai <linuxram@us.ibm.com>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Cc: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namespace.c     | 16 ----------------
 include/linux/fs.h |  1 -
 2 files changed, 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 7dba2ed03429..d99bcf59e4c2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1030,18 +1030,6 @@ const struct seq_operations mounts_op = {
 	.show	= show_vfsmnt
 };
 
-static int uuid_is_nil(u8 *uuid)
-{
-	int i;
-	u8  *cp = (u8 *)uuid;
-
-	for (i = 0; i < 16; i++) {
-		if (*cp++)
-			return 0;
-	}
-	return 1;
-}
-
 static int show_mountinfo(struct seq_file *m, void *v)
 {
 	struct proc_mounts *p = m->private;
@@ -1085,10 +1073,6 @@ static int show_mountinfo(struct seq_file *m, void *v)
 	if (IS_MNT_UNBINDABLE(mnt))
 		seq_puts(m, " unbindable");
 
-	if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
-		/* print the uuid */
-		seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
-
 	/* Filesystem specific data */
 	seq_puts(m, " - ");
 	show_type(m, sb);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dbd860af0804..5b14843af9f9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1403,7 +1403,6 @@ struct super_block {
 	wait_queue_head_t	s_wait_unfrozen;
 
 	char s_id[32];				/* Informational name */
-	u8 s_uuid[16];				/* UUID */
 
 	void 			*s_fs_info;	/* Filesystem private info */
 	fmode_t			s_mode;
-- 
cgit v1.2.3-71-gd317


From 581a8b0feeed8877aab3a8ca4c972419790cd07f Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Thu, 7 Apr 2011 15:08:27 -0700
Subject: nl80211: rename NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE

To NL80211_MESH_SETUP_IE. This reflects our ability to insert any ie
into a mesh beacon, not simply path selection ies.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    |  9 +++++----
 include/net/cfg80211.h     |  8 ++++----
 net/mac80211/cfg.c         | 15 +++++++--------
 net/mac80211/ieee80211_i.h |  4 ++--
 net/mac80211/mesh.c        |  6 +++---
 net/mac80211/mesh_plink.c  |  2 +-
 net/mac80211/tx.c          |  2 +-
 net/wireless/mesh.c        |  4 ++--
 net/wireless/nl80211.c     | 11 ++++++-----
 9 files changed, 31 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 16eea7229e99..ecf6b68a96da 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -545,6 +545,7 @@ enum nl80211_commands {
 /* source-level API compatibility */
 #define NL80211_CMD_GET_MESH_PARAMS NL80211_CMD_GET_MESH_CONFIG
 #define NL80211_CMD_SET_MESH_PARAMS NL80211_CMD_SET_MESH_CONFIG
+#define NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE NL80211_MESH_SETUP_IE
 
 /**
  * enum nl80211_attrs - nl80211 netlink attributes
@@ -1719,9 +1720,9 @@ enum nl80211_meshconf_params {
  * vendor specific path metric or disable it to use the default Airtime
  * metric.
  *
- * @NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE: A vendor specific information
- * element that vendors will use to identify the path selection methods and
- * metrics in use.
+ * @NL80211_MESH_SETUP_IE: Information elements for this mesh, for instance, a
+ * robust security network ie, or a vendor specific information element that
+ * vendors will use to identify the path selection methods and metrics in use.
  *
  * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number
  * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use
@@ -1730,7 +1731,7 @@ enum nl80211_mesh_setup_params {
 	__NL80211_MESH_SETUP_INVALID,
 	NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL,
 	NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC,
-	NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE,
+	NL80211_MESH_SETUP_IE,
 
 	/* keep last */
 	__NL80211_MESH_SETUP_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ba7384acf4e0..1d02ddf5a8a3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -689,8 +689,8 @@ struct mesh_config {
  * @mesh_id_len: length of the mesh ID, at least 1 and at most 32 bytes
  * @path_sel_proto: which path selection protocol to use
  * @path_metric: which metric to use
- * @vendor_ie: vendor information elements (optional)
- * @vendor_ie_len: length of vendor information elements
+ * @ie: vendor information elements (optional)
+ * @ie_len: length of vendor information elements
  *
  * These parameters are fixed when the mesh is created.
  */
@@ -699,8 +699,8 @@ struct mesh_setup {
 	u8 mesh_id_len;
 	u8  path_sel_proto;
 	u8  path_metric;
-	const u8 *vendor_ie;
-	u8 vendor_ie_len;
+	const u8 *ie;
+	u8 ie_len;
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index bf5d28da46e6..d9428afd8bf6 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1034,26 +1034,25 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
 	u8 *new_ie;
 	const u8 *old_ie;
 
-	/* first allocate the new vendor information element */
+	/* allocate information elements */
 	new_ie = NULL;
-	old_ie = ifmsh->vendor_ie;
+	old_ie = ifmsh->ie;
 
-	ifmsh->vendor_ie_len = setup->vendor_ie_len;
-	if (setup->vendor_ie_len) {
-		new_ie = kmemdup(setup->vendor_ie, setup->vendor_ie_len,
+	if (setup->ie_len) {
+		new_ie = kmemdup(setup->ie, setup->ie_len,
 				GFP_KERNEL);
 		if (!new_ie)
 			return -ENOMEM;
 	}
+	ifmsh->ie_len = setup->ie_len;
+	ifmsh->ie = new_ie;
+	kfree(old_ie);
 
 	/* now copy the rest of the setup parameters */
 	ifmsh->mesh_id_len = setup->mesh_id_len;
 	memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len);
 	ifmsh->mesh_pp_id = setup->path_sel_proto;
 	ifmsh->mesh_pm_id = setup->path_metric;
-	ifmsh->vendor_ie = new_ie;
-
-	kfree(old_ie);
 
 	return 0;
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6eb2c8523eeb..6450100594ba 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -488,8 +488,8 @@ struct ieee80211_if_mesh {
 	struct mesh_config mshcfg;
 	u32 mesh_seqnum;
 	bool accepting_plinks;
-	const u8 *vendor_ie;
-	u8 vendor_ie_len;
+	const u8 *ie;
+	u8 ie_len;
 };
 
 #ifdef CONFIG_MAC80211_MESH
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 2a57cc02c618..1c244c0c7664 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -279,9 +279,9 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	    MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
 	*pos++ = 0x00;
 
-	if (sdata->u.mesh.vendor_ie) {
-		int len = sdata->u.mesh.vendor_ie_len;
-		const u8 *data = sdata->u.mesh.vendor_ie;
+	if (sdata->u.mesh.ie) {
+		int len = sdata->u.mesh.ie_len;
+		const u8 *data = sdata->u.mesh.ie;
 		if (skb_tailroom(skb) > len)
 			memcpy(skb_put(skb, len), data, len);
 	}
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 44b53931ba5e..c705b20e1acb 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -161,7 +161,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 		__le16 reason) {
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400 +
-			sdata->u.mesh.vendor_ie_len);
+			sdata->u.mesh.ie_len);
 	struct ieee80211_mgmt *mgmt;
 	bool include_plid = false;
 	static const u8 meshpeeringproto[] = { 0x00, 0x0F, 0xAC, 0x2A };
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index ce4596ed1268..17b10be31f55 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2262,7 +2262,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 
 		/* headroom, head length, tail length and maximum TIM length */
 		skb = dev_alloc_skb(local->tx_headroom + 400 +
-				sdata->u.mesh.vendor_ie_len);
+				sdata->u.mesh.ie_len);
 		if (!skb)
 			goto out;
 
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 73e39c171ffb..0d4b2260f96f 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -53,8 +53,8 @@ const struct mesh_config default_mesh_config = {
 const struct mesh_setup default_mesh_setup = {
 	.path_sel_proto = IEEE80211_PATH_PROTOCOL_HWMP,
 	.path_metric = IEEE80211_PATH_METRIC_AIRTIME,
-	.vendor_ie = NULL,
-	.vendor_ie_len = 0,
+	.ie = NULL,
+	.ie_len = 0,
 };
 
 int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 297d7ce4117b..ccd825a5857e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2823,7 +2823,7 @@ static const struct nla_policy
 	nl80211_mesh_setup_params_policy[NL80211_MESH_SETUP_ATTR_MAX+1] = {
 	[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 },
 	[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 },
-	[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE] = { .type = NLA_BINARY,
+	[NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY,
 		.len = IEEE80211_MAX_DATA_LEN },
 };
 
@@ -2925,13 +2925,14 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
 		 IEEE80211_PATH_METRIC_VENDOR :
 		 IEEE80211_PATH_METRIC_AIRTIME;
 
-	if (tb[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE]) {
+
+	if (tb[NL80211_MESH_SETUP_IE]) {
 		struct nlattr *ieattr =
-			tb[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE];
+			tb[NL80211_MESH_SETUP_IE];
 		if (!is_valid_ie_attr(ieattr))
 			return -EINVAL;
-		setup->vendor_ie = nla_data(ieattr);
-		setup->vendor_ie_len = nla_len(ieattr);
+		setup->ie = nla_data(ieattr);
+		setup->ie_len = nla_len(ieattr);
 	}
 
 	return 0;
-- 
cgit v1.2.3-71-gd317


From 15d5dda623139bbf6165030fc251bbd5798f4130 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Thu, 7 Apr 2011 15:08:28 -0700
Subject: cfg80211/nl80211: Add userspace authentication flag to mesh setup

During mesh setup, use NL80211_MESH_SETUP_USERSPACE_AUTH flag to create
a secure mesh and route management frames to userspace.

Also, NL80211_CMD_GET_WIPHY now returns a flag NL80211_SUPPORT_MESH_AUTH
if the wiphy's mesh implementation supports routing of mesh auth frames
to userspace.  This is useful for forward compatibility between old
kernels and new userspace tools.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: Thomas Pedersen <thomas@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 9 +++++++++
 include/net/cfg80211.h  | 5 +++++
 net/wireless/mesh.c     | 4 ++++
 net/wireless/nl80211.c  | 5 +++++
 4 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index ecf6b68a96da..0e652d860819 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -887,6 +887,9 @@ enum nl80211_commands {
  *	changed once the mesh is active.
  * @NL80211_ATTR_MESH_CONFIG: Mesh configuration parameters, a nested attribute
  *	containing attributes from &enum nl80211_meshconf_params.
+ * @NL80211_ATTR_SUPPORT_MESH_AUTH: Currently, this means the underlying driver
+ *	allows auth frames in a mesh to be passed to userspace for processing via
+ *	the @NL80211_MESH_SETUP_USERSPACE_AUTH flag.
  *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -1075,6 +1078,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
 	NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
 
+	NL80211_ATTR_SUPPORT_MESH_AUTH,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1724,6 +1729,9 @@ enum nl80211_meshconf_params {
  * robust security network ie, or a vendor specific information element that
  * vendors will use to identify the path selection methods and metrics in use.
  *
+ * @NL80211_MESH_SETUP_USERSPACE_AUTH: Enable this option if an authentication
+ * daemon will be authenticating mesh candidates.
+ *
  * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number
  * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use
  */
@@ -1732,6 +1740,7 @@ enum nl80211_mesh_setup_params {
 	NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL,
 	NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC,
 	NL80211_MESH_SETUP_IE,
+	NL80211_MESH_SETUP_USERSPACE_AUTH,
 
 	/* keep last */
 	__NL80211_MESH_SETUP_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 1d02ddf5a8a3..e77603bd1630 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -691,6 +691,7 @@ struct mesh_config {
  * @path_metric: which metric to use
  * @ie: vendor information elements (optional)
  * @ie_len: length of vendor information elements
+ * @is_secure: or not
  *
  * These parameters are fixed when the mesh is created.
  */
@@ -701,6 +702,7 @@ struct mesh_setup {
 	u8  path_metric;
 	const u8 *ie;
 	u8 ie_len;
+	bool is_secure;
 };
 
 /**
@@ -1451,6 +1453,8 @@ struct cfg80211_ops {
  * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
  * @WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS: The device supports separate
  *	unicast and multicast TX keys.
+ * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing
+ *	auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH.
  */
 enum wiphy_flags {
 	WIPHY_FLAG_CUSTOM_REGULATORY		= BIT(0),
@@ -1463,6 +1467,7 @@ enum wiphy_flags {
 	WIPHY_FLAG_CONTROL_PORT_PROTOCOL	= BIT(7),
 	WIPHY_FLAG_IBSS_RSN			= BIT(8),
 	WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS= BIT(9),
+	WIPHY_FLAG_MESH_AUTH			= BIT(10),
 };
 
 struct mac_address {
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 0d4b2260f96f..0e5c122ce324 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -72,6 +72,10 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
 		return -EOPNOTSUPP;
 
+	if (!(rdev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) &&
+	      setup->is_secure)
+		return -EOPNOTSUPP;
+
 	if (wdev->mesh_id_len)
 		return -EALREADY;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ccd825a5857e..cbedfc2a42a2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -124,6 +124,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_BSS_HT_OPMODE] = { .type = NLA_U16 },
 
 	[NL80211_ATTR_MESH_CONFIG] = { .type = NLA_NESTED },
+	[NL80211_ATTR_SUPPORT_MESH_AUTH] = { .type = NLA_FLAG },
 
 	[NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY,
 					 .len = NL80211_HT_CAPABILITY_LEN },
@@ -594,6 +595,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 
 	if (dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)
 		NLA_PUT_FLAG(msg, NL80211_ATTR_SUPPORT_IBSS_RSN);
+	if (dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH)
+		NLA_PUT_FLAG(msg, NL80211_ATTR_SUPPORT_MESH_AUTH);
 
 	NLA_PUT(msg, NL80211_ATTR_CIPHER_SUITES,
 		sizeof(u32) * dev->wiphy.n_cipher_suites,
@@ -2823,6 +2826,7 @@ static const struct nla_policy
 	nl80211_mesh_setup_params_policy[NL80211_MESH_SETUP_ATTR_MAX+1] = {
 	[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 },
 	[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 },
+	[NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG },
 	[NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY,
 		.len = IEEE80211_MAX_DATA_LEN },
 };
@@ -2934,6 +2938,7 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
 		setup->ie = nla_data(ieattr);
 		setup->ie_len = nla_len(ieattr);
 	}
+	setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]);
 
 	return 0;
 }
-- 
cgit v1.2.3-71-gd317


From b39c48fac1fc915a5dcd024bf6e9aabc855ed591 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Thu, 7 Apr 2011 15:08:30 -0700
Subject: nl80211/mac80211: let userspace authenticate stations

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 2 ++
 net/mac80211/cfg.c      | 6 ++++++
 net/wireless/nl80211.c  | 5 ++++-
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 0e652d860819..5ec4ac3a0ef4 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1174,6 +1174,7 @@ enum nl80211_iftype {
  *	with short barker preamble
  * @NL80211_STA_FLAG_WME: station is WME/QoS capable
  * @NL80211_STA_FLAG_MFP: station uses management frame protection
+ * @NL80211_STA_FLAG_AUTHENTICATED: station is authenticated
  * @NL80211_STA_FLAG_MAX: highest station flag number currently defined
  * @__NL80211_STA_FLAG_AFTER_LAST: internal use
  */
@@ -1183,6 +1184,7 @@ enum nl80211_sta_flags {
 	NL80211_STA_FLAG_SHORT_PREAMBLE,
 	NL80211_STA_FLAG_WME,
 	NL80211_STA_FLAG_MFP,
+	NL80211_STA_FLAG_AUTHENTICATED,
 
 	/* keep last */
 	__NL80211_STA_FLAG_AFTER_LAST,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index dc623d884d02..1c25723eacda 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -686,6 +686,12 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 		if (set & BIT(NL80211_STA_FLAG_MFP))
 			sta->flags |= WLAN_STA_MFP;
 	}
+
+	if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) {
+		sta->flags &= ~WLAN_STA_AUTH;
+		if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED))
+			sta->flags |= WLAN_STA_AUTH;
+	}
 	spin_unlock_irqrestore(&sta->flaglock, flags);
 
 	/*
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index cbedfc2a42a2..ce29a0d0e88e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1925,6 +1925,7 @@ static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = {
 	[NL80211_STA_FLAG_SHORT_PREAMBLE] = { .type = NLA_FLAG },
 	[NL80211_STA_FLAG_WME] = { .type = NLA_FLAG },
 	[NL80211_STA_FLAG_MFP] = { .type = NLA_FLAG },
+	[NL80211_STA_FLAG_AUTHENTICATED] = { .type = NLA_FLAG },
 };
 
 static int parse_station_flags(struct genl_info *info,
@@ -2284,7 +2285,9 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 			err = -EINVAL;
 		if (params.supported_rates)
 			err = -EINVAL;
-		if (params.sta_flags_mask)
+		if (params.sta_flags_mask &
+				~(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+				  BIT(NL80211_STA_FLAG_AUTHORIZED)))
 			err = -EINVAL;
 		break;
 	default:
-- 
cgit v1.2.3-71-gd317


From c93b5e717ec47b57abfe0229360bc11e77520984 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Thu, 7 Apr 2011 15:08:34 -0700
Subject: nl80211: New notification to discover mesh peer candidates.

Notify userspace when a beacon/presp is received from a suitable mesh
peer candidate for whom no sta information exists.  Userspace can then
decide to create a sta info for the candidate.  If userspace is not
ready to authenticate the peer right away, it can create the sta info
with the authenticated flag unset and set it later.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 12 ++++++++++++
 include/net/cfg80211.h  | 16 ++++++++++++++++
 net/wireless/mesh.c     | 14 ++++++++++++++
 net/wireless/nl80211.c  | 38 ++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h  |  4 ++++
 5 files changed, 84 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 5ec4ac3a0ef4..b87481866dde 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -410,6 +410,16 @@
  *	notification. This event is used to indicate that an unprotected
  *	disassociation frame was dropped when MFP is in use.
  *
+ * @NL80211_CMD_NEW_PEER_CANDIDATE: Notification on the reception of a
+ *      beacon or probe response from a compatible mesh peer.  This is only
+ *      sent while no station information (sta_info) exists for the new peer
+ *      candidate and when @NL80211_MESH_SETUP_USERSPACE_AUTH is set.  On
+ *      reception of this notification, userspace may decide to create a new
+ *      station (@NL80211_CMD_NEW_STATION).  To stop this notification from
+ *      reoccurring, the userspace authentication daemon may want to create the
+ *      new station with the AUTHENTICATED flag unset and maybe change it later
+ *      depending on the authentication result.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -522,6 +532,8 @@ enum nl80211_commands {
 	NL80211_CMD_UNPROT_DEAUTHENTICATE,
 	NL80211_CMD_UNPROT_DISASSOCIATE,
 
+	NL80211_CMD_NEW_PEER_CANDIDATE,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e77603bd1630..f40cd30847de 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2488,6 +2488,22 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
  */
 void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp);
 
+/**
+ * cfg80211_notify_new_candidate - notify cfg80211 of a new mesh peer candidate
+ *
+ * @dev: network device
+ * @macaddr: the MAC address of the new candidate
+ * @ie: information elements advertised by the peer candidate
+ * @ie_len: lenght of the information elements buffer
+ * @gfp: allocation flags
+ *
+ * This function notifies cfg80211 that the mesh peer candidate has been
+ * detected, most likely via a beacon or, less likely, via a probe response.
+ * cfg80211 then sends a notification to userspace.
+ */
+void cfg80211_notify_new_peer_candidate(struct net_device *dev,
+		const u8 *macaddr, const u8 *ie, u8 ie_len, gfp_t gfp);
+
 /**
  * DOC: RFkill integration
  *
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index e0226e8265a3..5c116083eeca 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -1,5 +1,6 @@
 #include <linux/ieee80211.h>
 #include <net/cfg80211.h>
+#include "nl80211.h"
 #include "core.h"
 
 /* Default values, timeouts in ms */
@@ -110,6 +111,19 @@ int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 	return err;
 }
 
+void cfg80211_notify_new_peer_candidate(struct net_device *dev,
+		const u8 *macaddr, const u8* ie, u8 ie_len, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_MESH_POINT))
+		return;
+
+	nl80211_send_new_peer_candidate(wiphy_to_dev(wdev->wiphy), dev,
+			macaddr, ie, ie_len, gfp);
+}
+EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);
+
 static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
 				 struct net_device *dev)
 {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f4cb8efe2e5f..58f501a35022 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5818,6 +5818,44 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
+void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
+		struct net_device *netdev,
+		const u8 *macaddr, const u8* ie, u8 ie_len,
+		gfp_t gfp)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NEW_PEER_CANDIDATE);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, macaddr);
+	if (ie_len && ie)
+		NLA_PUT(msg, NL80211_ATTR_IE, ie_len , ie);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
 void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 				 struct net_device *netdev, const u8 *addr,
 				 enum nl80211_key_type key_type, int key_id,
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index dcac5cd6f017..f2af6955a665 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -50,6 +50,10 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
 			       struct net_device *netdev, u16 reason,
 			       const u8 *ie, size_t ie_len, bool from_ap);
 
+void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
+				     struct net_device *netdev,
+				     const u8 *macaddr, const u8* ie, u8 ie_len,
+				     gfp_t gfp);
 void
 nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 			    struct net_device *netdev, const u8 *addr,
-- 
cgit v1.2.3-71-gd317


From ebe27c91af8b7f4810ae906fbd3eeb2d87850026 Mon Sep 17 00:00:00 2001
From: Mohammed Shafi Shajakhan <mshajakhan@atheros.com>
Date: Fri, 8 Apr 2011 21:24:24 +0530
Subject: {mac|nl}80211: Add station connected time

Add station connected time in debugfs. This will be helpful to get a
measure of stability of the connection and for debugging stress issues

Cc: Senthilkumar Balasubramanian <Senthilkumar.Balasubramanian@Atheros.com>
Signed-off-by: Mohammed Shafi Shajakhan <mshajakhan@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    |  2 ++
 include/net/cfg80211.h     |  4 ++++
 net/mac80211/cfg.c         |  7 ++++++-
 net/mac80211/debugfs_sta.c | 26 ++++++++++++++++++++++++++
 net/mac80211/sta_info.c    |  3 +++
 net/mac80211/sta_info.h    |  2 ++
 net/wireless/nl80211.c     |  3 +++
 7 files changed, 46 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index b87481866dde..be8df57b789d 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1297,6 +1297,7 @@ enum nl80211_sta_bss_param {
  *	attribute, like NL80211_STA_INFO_TX_BITRATE.
  * @NL80211_STA_INFO_BSS_PARAM: current station's view of BSS, nested attribute
  *     containing info as possible, see &enum nl80211_sta_bss_param
+ * @NL80211_STA_INFO_CONNECTED_TIME: time since the station is last connected
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -1317,6 +1318,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_SIGNAL_AVG,
 	NL80211_STA_INFO_RX_BITRATE,
 	NL80211_STA_INFO_BSS_PARAM,
+	NL80211_STA_INFO_CONNECTED_TIME,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f40cd30847de..d30eada7c6cd 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -423,6 +423,7 @@ struct station_parameters {
  * @STATION_INFO_SIGNAL_AVG: @signal_avg filled
  * @STATION_INFO_RX_BITRATE: @rxrate fields are filled
  * @STATION_INFO_BSS_PARAM: @bss_param filled
+ * @STATION_INFO_CONNECTED_TIME: @connected_time filled
  */
 enum station_info_flags {
 	STATION_INFO_INACTIVE_TIME	= 1<<0,
@@ -441,6 +442,7 @@ enum station_info_flags {
 	STATION_INFO_SIGNAL_AVG		= 1<<13,
 	STATION_INFO_RX_BITRATE		= 1<<14,
 	STATION_INFO_BSS_PARAM          = 1<<15,
+	STATION_INFO_CONNECTED_TIME	= 1<<16
 };
 
 /**
@@ -511,6 +513,7 @@ struct sta_bss_parameters {
  * Station information filled by driver for get_station() and dump_station.
  *
  * @filled: bitflag of flags from &enum station_info_flags
+ * @connected_time: time(in secs) since a station is last connected
  * @inactive_time: time since last station activity (tx/rx) in milliseconds
  * @rx_bytes: bytes received from this station
  * @tx_bytes: bytes transmitted to this station
@@ -533,6 +536,7 @@ struct sta_bss_parameters {
  */
 struct station_info {
 	u32 filled;
+	u32 connected_time;
 	u32 inactive_time;
 	u32 rx_bytes;
 	u32 tx_bytes;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 1c25723eacda..a6d191f2a0fe 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -330,6 +330,7 @@ static void rate_idx_to_bitrate(struct rate_info *rate, struct sta_info *sta, in
 static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct timespec uptime;
 
 	sinfo->generation = sdata->local->sta_generation;
 
@@ -343,7 +344,11 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 			STATION_INFO_TX_BITRATE |
 			STATION_INFO_RX_BITRATE |
 			STATION_INFO_RX_DROP_MISC |
-			STATION_INFO_BSS_PARAM;
+			STATION_INFO_BSS_PARAM |
+			STATION_INFO_CONNECTED_TIME;
+
+	do_posix_clock_monotonic_gettime(&uptime);
+	sinfo->connected_time = uptime.tv_sec - sta->last_connected;
 
 	sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
 	sinfo->rx_bytes = sta->rx_bytes;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index c04a1396cf8d..c008232731eb 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -92,6 +92,31 @@ static ssize_t sta_inactive_ms_read(struct file *file, char __user *userbuf,
 }
 STA_OPS(inactive_ms);
 
+
+static ssize_t sta_connected_time_read(struct file *file, char __user *userbuf,
+					size_t count, loff_t *ppos)
+{
+	struct sta_info *sta = file->private_data;
+	struct timespec uptime;
+	struct tm result;
+	long connected_time_secs;
+	char buf[100];
+	int res;
+	do_posix_clock_monotonic_gettime(&uptime);
+	connected_time_secs = uptime.tv_sec - sta->last_connected;
+	time_to_tm(connected_time_secs, 0, &result);
+	result.tm_year -= 70;
+	result.tm_mday -= 1;
+	res = scnprintf(buf, sizeof(buf),
+		"years  - %d\nmonths - %d\ndays   - %d\nclock  - %d:%d:%d\n\n",
+			result.tm_year, result.tm_mon, result.tm_mday,
+			result.tm_hour, result.tm_min, result.tm_sec);
+	return simple_read_from_buffer(userbuf, count, ppos, buf, res);
+}
+STA_OPS(connected_time);
+
+
+
 static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf,
 				      size_t count, loff_t *ppos)
 {
@@ -324,6 +349,7 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
 	DEBUGFS_ADD(flags);
 	DEBUGFS_ADD(num_ps_buf_frames);
 	DEBUGFS_ADD(inactive_ms);
+	DEBUGFS_ADD(connected_time);
 	DEBUGFS_ADD(last_seq_ctrl);
 	DEBUGFS_ADD(agg_status);
 	DEBUGFS_ADD(dev);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 999f8fbf0b4b..8a9068ac0673 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -228,6 +228,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
+	struct timespec uptime;
 	int i;
 
 	sta = kzalloc(sizeof(*sta) + local->hw.sta_data_size, gfp);
@@ -245,6 +246,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	sta->sdata = sdata;
 	sta->last_rx = jiffies;
 
+	do_posix_clock_monotonic_gettime(&uptime);
+	sta->last_connected = uptime.tv_sec;
 	ewma_init(&sta->avg_signal, 1024, 8);
 
 	if (sta_prepare_rate_control(local, sta, gfp)) {
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 43238e99cfb3..984a03db3553 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -226,6 +226,7 @@ enum plink_state {
  * @rx_bytes: Number of bytes received from this STA
  * @wep_weak_iv_count: number of weak WEP IVs received from this station
  * @last_rx: time (in jiffies) when last frame was received from this STA
+ * @last_connected: time (in seconds) when a station got connected
  * @num_duplicates: number of duplicate frames received from this STA
  * @rx_fragments: number of received MPDUs
  * @rx_dropped: number of dropped MPDUs from this STA
@@ -295,6 +296,7 @@ struct sta_info {
 	unsigned long rx_packets, rx_bytes;
 	unsigned long wep_weak_iv_count;
 	unsigned long last_rx;
+	long last_connected;
 	unsigned long num_duplicates;
 	unsigned long rx_fragments;
 	unsigned long rx_dropped;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 58f501a35022..0efa7fd01150 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2020,6 +2020,9 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	sinfoattr = nla_nest_start(msg, NL80211_ATTR_STA_INFO);
 	if (!sinfoattr)
 		goto nla_put_failure;
+	if (sinfo->filled & STATION_INFO_CONNECTED_TIME)
+		NLA_PUT_U32(msg, NL80211_STA_INFO_CONNECTED_TIME,
+			    sinfo->connected_time);
 	if (sinfo->filled & STATION_INFO_INACTIVE_TIME)
 		NLA_PUT_U32(msg, NL80211_STA_INFO_INACTIVE_TIME,
 			    sinfo->inactive_time);
-- 
cgit v1.2.3-71-gd317


From bcc6d47903612c3861201cc3a866fb604f26b8b2 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 7 Apr 2011 19:48:33 +0000
Subject: net: vlan: make non-hw-accel rx path similar to hw-accel

Now there are 2 paths for rx vlan frames. When rx-vlan-hw-accel is
enabled, skb is untagged by NIC, vlan_tci is set and the skb gets into
vlan code in __netif_receive_skb - vlan_hwaccel_do_receive.

For non-rx-vlan-hw-accel however, tagged skb goes thru whole
__netif_receive_skb, it's untagged in ptype_base hander and reinjected

This incosistency is fixed by this patch. Vlan untagging happens early in
__netif_receive_skb so the rest of code (ptype_all handlers, rx_handlers)
see the skb like it was untagged by hw.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>

v1->v2:
	remove "inline" from vlan_core.c functions
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h |  10 ++-
 net/8021q/vlan.c        |   8 ---
 net/8021q/vlan.h        |   2 -
 net/8021q/vlan_core.c   |  85 +++++++++++++++++++++++-
 net/8021q/vlan_dev.c    | 173 ------------------------------------------------
 net/core/dev.c          |   8 ++-
 6 files changed, 99 insertions(+), 187 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 635e1faec412..998b29930b80 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -132,7 +132,8 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev);
 
 extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 			     u16 vlan_tci, int polling);
-extern bool vlan_hwaccel_do_receive(struct sk_buff **skb);
+extern bool vlan_do_receive(struct sk_buff **skb);
+extern struct sk_buff *vlan_untag(struct sk_buff *skb);
 extern gro_result_t
 vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 		 unsigned int vlan_tci, struct sk_buff *skb);
@@ -166,13 +167,18 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 	return NET_XMIT_SUCCESS;
 }
 
-static inline bool vlan_hwaccel_do_receive(struct sk_buff **skb)
+static inline bool vlan_do_receive(struct sk_buff **skb)
 {
 	if ((*skb)->vlan_tci & VLAN_VID_MASK)
 		(*skb)->pkt_type = PACKET_OTHERHOST;
 	return false;
 }
 
+inline struct sk_buff *vlan_untag(struct sk_buff *skb)
+{
+	return skb;
+}
+
 static inline gro_result_t
 vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 		 unsigned int vlan_tci, struct sk_buff *skb)
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index e47600b4e2e3..14ef5efbc653 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -49,11 +49,6 @@ const char vlan_version[] = DRV_VERSION;
 static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>";
 static const char vlan_buggyright[] = "David S. Miller <davem@redhat.com>";
 
-static struct packet_type vlan_packet_type __read_mostly = {
-	.type = cpu_to_be16(ETH_P_8021Q),
-	.func = vlan_skb_recv, /* VLAN receive method */
-};
-
 /* End of global variables definitions. */
 
 static void vlan_group_free(struct vlan_group *grp)
@@ -684,7 +679,6 @@ static int __init vlan_proto_init(void)
 	if (err < 0)
 		goto err4;
 
-	dev_add_pack(&vlan_packet_type);
 	vlan_ioctl_set(vlan_ioctl_handler);
 	return 0;
 
@@ -705,8 +699,6 @@ static void __exit vlan_cleanup_module(void)
 
 	unregister_netdevice_notifier(&vlan_notifier_block);
 
-	dev_remove_pack(&vlan_packet_type);
-
 	unregister_pernet_subsys(&vlan_net_ops);
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 5687c9b95f33..c3408def8a19 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -75,8 +75,6 @@ static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
 }
 
 /* found in vlan_dev.c */
-int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
-		  struct packet_type *ptype, struct net_device *orig_dev);
 void vlan_dev_set_ingress_priority(const struct net_device *dev,
 				   u32 skb_prio, u16 vlan_prio);
 int vlan_dev_set_egress_priority(const struct net_device *dev,
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index ce8e3ab3e7a5..41495dc2a4c9 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -4,7 +4,7 @@
 #include <linux/netpoll.h>
 #include "vlan.h"
 
-bool vlan_hwaccel_do_receive(struct sk_buff **skbp)
+bool vlan_do_receive(struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
 	u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
@@ -88,3 +88,86 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
 	return napi_gro_frags(napi);
 }
 EXPORT_SYMBOL(vlan_gro_frags);
+
+static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
+{
+	if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
+		if (skb_cow(skb, skb_headroom(skb)) < 0)
+			skb = NULL;
+		if (skb) {
+			/* Lifted from Gleb's VLAN code... */
+			memmove(skb->data - ETH_HLEN,
+				skb->data - VLAN_ETH_HLEN, 12);
+			skb->mac_header += VLAN_HLEN;
+		}
+	}
+	return skb;
+}
+
+static void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr)
+{
+	__be16 proto;
+	unsigned char *rawp;
+
+	/*
+	 * Was a VLAN packet, grab the encapsulated protocol, which the layer
+	 * three protocols care about.
+	 */
+
+	proto = vhdr->h_vlan_encapsulated_proto;
+	if (ntohs(proto) >= 1536) {
+		skb->protocol = proto;
+		return;
+	}
+
+	rawp = skb->data;
+	if (*(unsigned short *) rawp == 0xFFFF)
+		/*
+		 * This is a magic hack to spot IPX packets. Older Novell
+		 * breaks the protocol design and runs IPX over 802.3 without
+		 * an 802.2 LLC layer. We look for FFFF which isn't a used
+		 * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
+		 * but does for the rest.
+		 */
+		skb->protocol = htons(ETH_P_802_3);
+	else
+		/*
+		 * Real 802.2 LLC
+		 */
+		skb->protocol = htons(ETH_P_802_2);
+}
+
+struct sk_buff *vlan_untag(struct sk_buff *skb)
+{
+	struct vlan_hdr *vhdr;
+	u16 vlan_tci;
+
+	if (unlikely(vlan_tx_tag_present(skb))) {
+		/* vlan_tci is already set-up so leave this for another time */
+		return skb;
+	}
+
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		goto err_free;
+
+	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
+		goto err_free;
+
+	vhdr = (struct vlan_hdr *) skb->data;
+	vlan_tci = ntohs(vhdr->h_vlan_TCI);
+	__vlan_hwaccel_put_tag(skb, vlan_tci);
+
+	skb_pull_rcsum(skb, VLAN_HLEN);
+	vlan_set_encap_proto(skb, vhdr);
+
+	skb = vlan_check_reorder_header(skb);
+	if (unlikely(!skb))
+		goto err_free;
+
+	return skb;
+
+err_free:
+	kfree_skb(skb);
+	return NULL;
+}
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b84a46b30c0c..d174c312b7f1 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -65,179 +65,6 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb)
 	return 0;
 }
 
-static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
-{
-	if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
-		if (skb_cow(skb, skb_headroom(skb)) < 0)
-			skb = NULL;
-		if (skb) {
-			/* Lifted from Gleb's VLAN code... */
-			memmove(skb->data - ETH_HLEN,
-				skb->data - VLAN_ETH_HLEN, 12);
-			skb->mac_header += VLAN_HLEN;
-		}
-	}
-
-	return skb;
-}
-
-static inline void vlan_set_encap_proto(struct sk_buff *skb,
-		struct vlan_hdr *vhdr)
-{
-	__be16 proto;
-	unsigned char *rawp;
-
-	/*
-	 * Was a VLAN packet, grab the encapsulated protocol, which the layer
-	 * three protocols care about.
-	 */
-
-	proto = vhdr->h_vlan_encapsulated_proto;
-	if (ntohs(proto) >= 1536) {
-		skb->protocol = proto;
-		return;
-	}
-
-	rawp = skb->data;
-	if (*(unsigned short *)rawp == 0xFFFF)
-		/*
-		 * This is a magic hack to spot IPX packets. Older Novell
-		 * breaks the protocol design and runs IPX over 802.3 without
-		 * an 802.2 LLC layer. We look for FFFF which isn't a used
-		 * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
-		 * but does for the rest.
-		 */
-		skb->protocol = htons(ETH_P_802_3);
-	else
-		/*
-		 * Real 802.2 LLC
-		 */
-		skb->protocol = htons(ETH_P_802_2);
-}
-
-/*
- *	Determine the packet's protocol ID. The rule here is that we
- *	assume 802.3 if the type field is short enough to be a length.
- *	This is normal practice and works for any 'now in use' protocol.
- *
- *  Also, at this point we assume that we ARE dealing exclusively with
- *  VLAN packets, or packets that should be made into VLAN packets based
- *  on a default VLAN ID.
- *
- *  NOTE:  Should be similar to ethernet/eth.c.
- *
- *  SANITY NOTE:  This method is called when a packet is moving up the stack
- *                towards userland.  To get here, it would have already passed
- *                through the ethernet/eth.c eth_type_trans() method.
- *  SANITY NOTE 2: We are referencing to the VLAN_HDR frields, which MAY be
- *                 stored UNALIGNED in the memory.  RISC systems don't like
- *                 such cases very much...
- *  SANITY NOTE 2a: According to Dave Miller & Alexey, it will always be
- *  		    aligned, so there doesn't need to be any of the unaligned
- *  		    stuff.  It has been commented out now...  --Ben
- *
- */
-int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
-		  struct packet_type *ptype, struct net_device *orig_dev)
-{
-	struct vlan_hdr *vhdr;
-	struct vlan_pcpu_stats *rx_stats;
-	struct net_device *vlan_dev;
-	u16 vlan_id;
-	u16 vlan_tci;
-
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (skb == NULL)
-		goto err_free;
-
-	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
-		goto err_free;
-
-	vhdr = (struct vlan_hdr *)skb->data;
-	vlan_tci = ntohs(vhdr->h_vlan_TCI);
-	vlan_id = vlan_tci & VLAN_VID_MASK;
-
-	rcu_read_lock();
-	vlan_dev = vlan_find_dev(dev, vlan_id);
-
-	/* If the VLAN device is defined, we use it.
-	 * If not, and the VID is 0, it is a 802.1p packet (not
-	 * really a VLAN), so we will just netif_rx it later to the
-	 * original interface, but with the skb->proto set to the
-	 * wrapped proto: we do nothing here.
-	 */
-
-	if (!vlan_dev) {
-		if (vlan_id) {
-			pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
-				 __func__, vlan_id, dev->name);
-			goto err_unlock;
-		}
-		rx_stats = NULL;
-	} else {
-		skb->dev = vlan_dev;
-
-		rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_pcpu_stats);
-
-		u64_stats_update_begin(&rx_stats->syncp);
-		rx_stats->rx_packets++;
-		rx_stats->rx_bytes += skb->len;
-
-		skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
-
-		pr_debug("%s: priority: %u for TCI: %hu\n",
-			 __func__, skb->priority, vlan_tci);
-
-		switch (skb->pkt_type) {
-		case PACKET_BROADCAST:
-			/* Yeah, stats collect these together.. */
-			/* stats->broadcast ++; // no such counter :-( */
-			break;
-
-		case PACKET_MULTICAST:
-			rx_stats->rx_multicast++;
-			break;
-
-		case PACKET_OTHERHOST:
-			/* Our lower layer thinks this is not local, let's make
-			 * sure.
-			 * This allows the VLAN to have a different MAC than the
-			 * underlying device, and still route correctly.
-			 */
-			if (!compare_ether_addr(eth_hdr(skb)->h_dest,
-						skb->dev->dev_addr))
-				skb->pkt_type = PACKET_HOST;
-			break;
-		default:
-			break;
-		}
-		u64_stats_update_end(&rx_stats->syncp);
-	}
-
-	skb_pull_rcsum(skb, VLAN_HLEN);
-	vlan_set_encap_proto(skb, vhdr);
-
-	if (vlan_dev) {
-		skb = vlan_check_reorder_header(skb);
-		if (!skb) {
-			rx_stats->rx_errors++;
-			goto err_unlock;
-		}
-	}
-
-	netif_rx(skb);
-
-	rcu_read_unlock();
-	return NET_RX_SUCCESS;
-
-err_unlock:
-	rcu_read_unlock();
-err_free:
-	atomic_long_inc(&dev->rx_dropped);
-	kfree_skb(skb);
-	return NET_RX_DROP;
-}
-
 static inline u16
 vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index 95897ff3a76f..d1aebf7c6494 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3130,6 +3130,12 @@ another_round:
 
 	__this_cpu_inc(softnet_data.processed);
 
+	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
+		skb = vlan_untag(skb);
+		if (unlikely(!skb))
+			goto out;
+	}
+
 #ifdef CONFIG_NET_CLS_ACT
 	if (skb->tc_verd & TC_NCLS) {
 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
@@ -3177,7 +3183,7 @@ ncls:
 			ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
-		if (vlan_hwaccel_do_receive(&skb)) {
+		if (vlan_do_receive(&skb)) {
 			ret = __netif_receive_skb(skb);
 			goto out;
 		} else if (unlikely(!skb))
-- 
cgit v1.2.3-71-gd317


From 1aac62671686e6234c91b5f6fc4caaa850419d5d Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 12 Apr 2011 04:07:39 +0000
Subject: net: vlan_features comment clarification
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 09d262415769..cb8178ab3c52 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1035,7 +1035,7 @@ struct net_device {
 	u32			hw_features;
 	/* user-requested features */
 	u32			wanted_features;
-	/* VLAN feature mask */
+	/* mask of features inheritable by VLAN devices */
 	u32			vlan_features;
 
 	/* Net device feature bits; if you change something,
-- 
cgit v1.2.3-71-gd317


From 0bba01695b74fdd2f9286243bb39f88544d81401 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 12 Apr 2011 15:21:04 -0700
Subject: vfs: Re-introduce s_uuid in the superblock

Gaah.  When commit be85bccaa5aa reverted the export of file system uuid
via /proc/<pid>/mountinfo, it also unintentionally removed the s_uuid
field in struct super_block.

I didn't mean to do that, since filesystems have been taught to fill it
in (and we want to keep it for future re-introduction in the mountinfo
file).

Stupid of me. This adds it back in.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5b14843af9f9..dbd860af0804 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1403,6 +1403,7 @@ struct super_block {
 	wait_queue_head_t	s_wait_unfrozen;
 
 	char s_id[32];				/* Informational name */
+	u8 s_uuid[16];				/* UUID */
 
 	void 			*s_fs_info;	/* Filesystem private info */
 	fmode_t			s_mode;
-- 
cgit v1.2.3-71-gd317


From 6139e75f4a413bdc8f366fc11e437347be8abc59 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 12 Apr 2011 19:27:51 -0700
Subject: net: Missing 'inline' in vlan-disabled vlan_untag()

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 998b29930b80..546d9d35fbd4 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -174,7 +174,7 @@ static inline bool vlan_do_receive(struct sk_buff **skb)
 	return false;
 }
 
-inline struct sk_buff *vlan_untag(struct sk_buff *skb)
+static inline struct sk_buff *vlan_untag(struct sk_buff *skb)
 {
 	return skb;
 }
-- 
cgit v1.2.3-71-gd317


From ba6a078b77e0dc1309d7e6e2ee034b92ab91f88c Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Tue, 12 Apr 2011 23:13:08 -0700
Subject: Input: add KEY_IMAGES specifically for AL Image Browser

Many media center remotes have buttons intended for jumping straight to
one type of media browser or another -- commonly, images/photos/pictures,
audio/music, television, and movies. At present, remotes with an images
or photos or pictures button use any number of different keycodes which
sort of maybe fit. I've seen at least KEY_MEDIA, KEY_CAMERA,
KEY_GRAPHICSEDITOR and KEY_PRESENTATION. None of those seem quite right.
In my mind, KEY_MEDIA should be something more like a media center
application launcher (and I'd like to standardize on that for things
like the windows media center button on the mce remotes). KEY_CAMERA is
used in a lot of webcams, and typically means "take a picture now".
KEY_GRAPHICSEDITOR implies an editor, not a browser. KEY_PRESENTATION
might be the closest fit here, if you think "photo slide show", but it
may well be more intended for "run application in full-screen
presentation mode" or to launch something like magicpoint, I dunno.
And thus, I'd like to have a KEY_IMAGES, which matches the HID Usage AL
Image Browser, the meaning of which I think is crystal-clear. I believe
AL Audio Browser is already covered by KEY_AUDIO, and AL Movie Browser
by KEY_VIDEO, so I'm also adding appropriate comments next to those
keys.

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/input.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index 056ae8a5bd9b..0cc25e4ce2ab 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -553,8 +553,8 @@ struct input_keymap_entry {
 #define KEY_DVD			0x185	/* Media Select DVD */
 #define KEY_AUX			0x186
 #define KEY_MP3			0x187
-#define KEY_AUDIO		0x188
-#define KEY_VIDEO		0x189
+#define KEY_AUDIO		0x188	/* AL Audio Browser */
+#define KEY_VIDEO		0x189	/* AL Movie Browser */
 #define KEY_DIRECTORY		0x18a
 #define KEY_LIST		0x18b
 #define KEY_MEMO		0x18c	/* Media Select Messages */
@@ -603,8 +603,9 @@ struct input_keymap_entry {
 #define KEY_FRAMEFORWARD	0x1b5
 #define KEY_CONTEXT_MENU	0x1b6	/* GenDesc - system context menu */
 #define KEY_MEDIA_REPEAT	0x1b7	/* Consumer - transport control */
-#define KEY_10CHANNELSUP        0x1b8   /* 10 channels up (10+) */
-#define KEY_10CHANNELSDOWN      0x1b9   /* 10 channels down (10-) */
+#define KEY_10CHANNELSUP	0x1b8	/* 10 channels up (10+) */
+#define KEY_10CHANNELSDOWN	0x1b9	/* 10 channels down (10-) */
+#define KEY_IMAGES		0x1ba	/* AL Image Browser */
 
 #define KEY_DEL_EOL		0x1c0
 #define KEY_DEL_EOS		0x1c1
-- 
cgit v1.2.3-71-gd317


From 9fb0f14e31b6101a0cc69a333b43541044f9b0a6 Mon Sep 17 00:00:00 2001
From: Jeff Brown <jeffbrown@google.com>
Date: Tue, 12 Apr 2011 23:29:38 -0700
Subject: Input: evdev - indicate buffer overrun with SYN_DROPPED

Add a new EV_SYN code, SYN_DROPPED, to inform the client when input
events have been dropped from the evdev input buffer due to a
buffer overrun.  The client should use this event as a hint to
reset its state or ignore all following events until the next
packet begins.

Signed-off-by: Jeff Brown <jeffbrown@android.com>
[dtor@mail.ru: Implement Henrik's suggestion and drop old events in
 case of overflow.]
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 Documentation/input/event-codes.txt |  6 ++++++
 drivers/input/evdev.c               | 33 +++++++++++++++++++++------------
 include/linux/input.h               |  1 +
 3 files changed, 28 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/input/event-codes.txt b/Documentation/input/event-codes.txt
index f13aee550409..23fcb05175be 100644
--- a/Documentation/input/event-codes.txt
+++ b/Documentation/input/event-codes.txt
@@ -85,6 +85,12 @@ sent in the evdev event stream.
   - Used to synchronize and separate touch events. See the
     multi-touch-protocol.txt document for more information.
 
+* SYN_DROPPED:
+  - Used to indicate buffer overrun in the evdev client's event queue.
+    Client should ignore all events up to and including next SYN_REPORT
+    event and query the device (using EVIOCG* ioctls) to obtain its
+    current state.
+
 EV_KEY:
 ----------
 EV_KEY events take the form KEY_<name> or BTN_<name>. For example, KEY_A is used
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 7f42d3a454d2..88d8e4cb419a 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -39,13 +39,13 @@ struct evdev {
 };
 
 struct evdev_client {
-	int head;
-	int tail;
+	unsigned int head;
+	unsigned int tail;
 	spinlock_t buffer_lock; /* protects access to buffer, head and tail */
 	struct fasync_struct *fasync;
 	struct evdev *evdev;
 	struct list_head node;
-	int bufsize;
+	unsigned int bufsize;
 	struct input_event buffer[];
 };
 
@@ -55,16 +55,25 @@ static DEFINE_MUTEX(evdev_table_mutex);
 static void evdev_pass_event(struct evdev_client *client,
 			     struct input_event *event)
 {
-	/*
-	 * Interrupts are disabled, just acquire the lock.
-	 * Make sure we don't leave with the client buffer
-	 * "empty" by having client->head == client->tail.
-	 */
+	/* Interrupts are disabled, just acquire the lock. */
 	spin_lock(&client->buffer_lock);
-	do {
-		client->buffer[client->head++] = *event;
-		client->head &= client->bufsize - 1;
-	} while (client->head == client->tail);
+
+	client->buffer[client->head++] = *event;
+	client->head &= client->bufsize - 1;
+
+	if (unlikely(client->head == client->tail)) {
+		/*
+		 * This effectively "drops" all unconsumed events, leaving
+		 * EV_SYN/SYN_DROPPED plus the newest event in the queue.
+		 */
+		client->tail = (client->head - 2) & (client->bufsize - 1);
+
+		client->buffer[client->tail].time = event->time;
+		client->buffer[client->tail].type = EV_SYN;
+		client->buffer[client->tail].code = SYN_DROPPED;
+		client->buffer[client->tail].value = 0;
+	}
+
 	spin_unlock(&client->buffer_lock);
 
 	if (event->type == EV_SYN)
diff --git a/include/linux/input.h b/include/linux/input.h
index 0cc25e4ce2ab..73a8c6ee595b 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -167,6 +167,7 @@ struct input_keymap_entry {
 #define SYN_REPORT		0
 #define SYN_CONFIG		1
 #define SYN_MT_REPORT		2
+#define SYN_DROPPED		3
 
 /*
  * Keys and buttons
-- 
cgit v1.2.3-71-gd317


From edb2fd9524cc2a55fb5a2e878b6e4e83f9e63fd0 Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Wed, 13 Apr 2011 09:45:45 +0200
Subject: include/linux/leds-regulator.h: fix syntax in example code

Fix struct field initializer syntax in some example code from a comment,
this will make copying and pasting the code more straightforward.

Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/leds-regulator.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/leds-regulator.h b/include/linux/leds-regulator.h
index 5a8eb389aab8..e2337a8c90b0 100644
--- a/include/linux/leds-regulator.h
+++ b/include/linux/leds-regulator.h
@@ -16,7 +16,7 @@
  * Use "vled" as supply id when declaring the regulator consumer:
  *
  * static struct regulator_consumer_supply pcap_regulator_VVIB_consumers [] = {
- * 	{ .dev_name = "leds-regulator.0", supply = "vled" },
+ * 	{ .dev_name = "leds-regulator.0", .supply = "vled" },
  * };
  *
  * If you have several regulator driven LEDs, you can append a numerical id to
-- 
cgit v1.2.3-71-gd317


From 91eb7c08c6cb3b8eeba1c61f5753c56dcb77f018 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Wed, 13 Apr 2011 13:51:38 +0200
Subject: netfilter: ipset: SCTP, UDPLITE support added

SCTP and UDPLITE port support added to the hash:*port* set types.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/ipset/ip_set_getport.h |  2 ++
 net/netfilter/ipset/ip_set_getport.c           | 16 +++++++++++++++-
 net/netfilter/ipset/ip_set_hash_ipport.c       |  2 +-
 net/netfilter/ipset/ip_set_hash_ipportip.c     |  2 +-
 net/netfilter/ipset/ip_set_hash_ipportnet.c    |  2 +-
 net/netfilter/ipset/ip_set_hash_netport.c      |  2 +-
 6 files changed, 21 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set_getport.h b/include/linux/netfilter/ipset/ip_set_getport.h
index 5aebd170f899..90d09300e954 100644
--- a/include/linux/netfilter/ipset/ip_set_getport.h
+++ b/include/linux/netfilter/ipset/ip_set_getport.h
@@ -22,7 +22,9 @@ static inline bool ip_set_proto_with_ports(u8 proto)
 {
 	switch (proto) {
 	case IPPROTO_TCP:
+	case IPPROTO_SCTP:
 	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
 		return true;
 	}
 	return false;
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 8d5227212686..757143b2240a 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -11,6 +11,7 @@
 #include <linux/skbuff.h>
 #include <linux/icmp.h>
 #include <linux/icmpv6.h>
+#include <linux/sctp.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
@@ -35,7 +36,20 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
 		*port = src ? th->source : th->dest;
 		break;
 	}
-	case IPPROTO_UDP: {
+	case IPPROTO_SCTP: {
+		sctp_sctphdr_t _sh;
+		const sctp_sctphdr_t *sh;
+
+		sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh);
+		if (sh == NULL)
+			/* No choice either */
+			return false;
+
+		*port = src ? sh->source : sh->dest;
+		break;
+	}
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE: {
 		struct udphdr _udph;
 		const struct udphdr *uh;
 
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index b9214145d357..14281b6b8074 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -491,7 +491,7 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= AF_UNSPEC,
-	.revision	= 0,
+	.revision	= 1,
 	.create		= hash_ipport_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 4642872df6e1..401c8a2531db 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -509,7 +509,7 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
 	.dimension	= IPSET_DIM_THREE,
 	.family		= AF_UNSPEC,
-	.revision	= 0,
+	.revision	= 1,
 	.create		= hash_ipportip_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 2cb84a54b7ad..4743e5402522 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -574,7 +574,7 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
 	.dimension	= IPSET_DIM_THREE,
 	.family		= AF_UNSPEC,
-	.revision	= 0,
+	.revision	= 1,
 	.create		= hash_ipportnet_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 8598676f2a05..d2a40362dd3a 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -526,7 +526,7 @@ static struct ip_set_type hash_netport_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= AF_UNSPEC,
-	.revision	= 0,
+	.revision	= 1,
 	.create		= hash_netport_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
-- 
cgit v1.2.3-71-gd317


From 8b5933c380fc66a6311739f9b36a812383f82141 Mon Sep 17 00:00:00 2001
From: amit salecha <amit.salecha@qlogic.com>
Date: Thu, 7 Apr 2011 01:58:42 +0000
Subject: net: ethtool support to configure number of channels

Ethtool support to configure RX, TX and other channels. combined field
in struct ethtool_channels to reflect set of channel (RX, TX or other).
Other channel can be link interrupts, SR-IOV coordination etc.

ETHTOOL_GCHANNELS will report max and current number of RX channels,
max and current number of TX channels, max and current number of other channel
or max and current number of combined channel.

Number of channel can be modify upto max number of channel through
ETHTOOL_SCHANNELS command.

Ben Hutchings:
o define 'combined' and 'other' types.  Most multiqueue drivers pair up RX and TX
  queues so that most channels combine RX and TX work.
o Please could you use a kernel-doc comment to describe the structure.

Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 36 ++++++++++++++++++++++++++++++++++++
 net/core/ethtool.c      | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 12cfbd0be2ee..ad22a68c2e5d 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -229,6 +229,34 @@ struct ethtool_ringparam {
 	__u32	tx_pending;
 };
 
+/**
+ * struct ethtool_channels - configuring number of network channel
+ * @cmd: ETHTOOL_{G,S}CHANNELS
+ * @max_rx: Read only. Maximum number of receive channel the driver support.
+ * @max_tx: Read only. Maximum number of transmit channel the driver support.
+ * @max_other: Read only. Maximum number of other channel the driver support.
+ * @max_combined: Read only. Maximum number of combined channel the driver
+ *	support. Set of queues RX, TX or other.
+ * @rx_count: Valid values are in the range 1 to the max_rx.
+ * @tx_count: Valid values are in the range 1 to the max_tx.
+ * @other_count: Valid values are in the range 1 to the max_other.
+ * @combined_count: Valid values are in the range 1 to the max_combined.
+ *
+ * This can be used to configure RX, TX and other channels.
+ */
+
+struct ethtool_channels {
+	__u32	cmd;
+	__u32	max_rx;
+	__u32	max_tx;
+	__u32	max_other;
+	__u32	max_combined;
+	__u32	rx_count;
+	__u32	tx_count;
+	__u32	other_count;
+	__u32	combined_count;
+};
+
 /* for configuring link flow control parameters */
 struct ethtool_pauseparam {
 	__u32	cmd;	/* ETHTOOL_{G,S}PAUSEPARAM */
@@ -818,6 +846,9 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  *	Returns a negative error code or zero.
  * @set_rxfh_indir: Set the contents of the RX flow hash indirection table.
  *	Returns a negative error code or zero.
+ * @get_channels: Get number of channels.
+ * @set_channels: Set number of channels.  Returns a negative error code or
+ *	zero.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -891,6 +922,9 @@ struct ethtool_ops {
 				  struct ethtool_rxfh_indir *);
 	int	(*set_rxfh_indir)(struct net_device *,
 				  const struct ethtool_rxfh_indir *);
+	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
+	int	(*set_channels)(struct net_device *, struct ethtool_channels *);
+
 };
 #endif /* __KERNEL__ */
 
@@ -959,6 +993,8 @@ struct ethtool_ops {
 
 #define ETHTOOL_GFEATURES	0x0000003a /* Get device offload settings */
 #define ETHTOOL_SFEATURES	0x0000003b /* Change device offload settings */
+#define ETHTOOL_GCHANNELS	0x0000003c /* Get no of channels */
+#define ETHTOOL_SCHANNELS	0x0000003d /* Set no of channels */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 43ef09fedd6e..41dee2de13ad 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1446,6 +1446,35 @@ static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr)
 	return dev->ethtool_ops->set_ringparam(dev, &ringparam);
 }
 
+static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
+						   void __user *useraddr)
+{
+	struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
+
+	if (!dev->ethtool_ops->get_channels)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_channels(dev, &channels);
+
+	if (copy_to_user(useraddr, &channels, sizeof(channels)))
+		return -EFAULT;
+	return 0;
+}
+
+static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
+						   void __user *useraddr)
+{
+	struct ethtool_channels channels;
+
+	if (!dev->ethtool_ops->set_channels)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&channels, useraddr, sizeof(channels)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_channels(dev, &channels);
+}
+
 static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM };
@@ -2007,6 +2036,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_SGRO:
 		rc = ethtool_set_one_feature(dev, useraddr, ethcmd);
 		break;
+	case ETHTOOL_GCHANNELS:
+		rc = ethtool_get_channels(dev, useraddr);
+		break;
+	case ETHTOOL_SCHANNELS:
+		rc = ethtool_set_channels(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3-71-gd317


From 4d42d417be75d750b82798922b6e775915e11bce Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 13 Apr 2011 14:48:55 -0700
Subject: rndis_host: Poll status before control channel where necessary

Some RNDIS devices don't respond on the control channel until polled
on the status channel.  In particular, this was reported to be the
case for the 2Wire HomePortal 1000SW and for some Windows Mobile
devices.

This is roughly based on a patch by John Carr <john.carr@unrouted.co.uk>
which is currently applied by Mandriva.

Reported-by: Mark Glassberg <vzeeaxwl@myfairpoint.net>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/rndis_host.c   | 39 ++++++++++++++++++++++++++++++++-------
 include/linux/usb/rndis_host.h |  2 ++
 2 files changed, 34 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c
index 5994a25c56ac..6d6c1da68a36 100644
--- a/drivers/net/usb/rndis_host.c
+++ b/drivers/net/usb/rndis_host.c
@@ -104,8 +104,10 @@ static void rndis_msg_indicate(struct usbnet *dev, struct rndis_indicate *msg,
 int rndis_command(struct usbnet *dev, struct rndis_msg_hdr *buf, int buflen)
 {
 	struct cdc_state	*info = (void *) &dev->data;
+	struct usb_cdc_notification notification;
 	int			master_ifnum;
 	int			retval;
+	int			partial;
 	unsigned		count;
 	__le32			rsp;
 	u32			xid = 0, msg_len, request_id;
@@ -133,13 +135,20 @@ int rndis_command(struct usbnet *dev, struct rndis_msg_hdr *buf, int buflen)
 	if (unlikely(retval < 0 || xid == 0))
 		return retval;
 
-	// FIXME Seems like some devices discard responses when
-	// we time out and cancel our "get response" requests...
-	// so, this is fragile.  Probably need to poll for status.
+	/* Some devices don't respond on the control channel until
+	 * polled on the status channel, so do that first. */
+	if (dev->driver_info->data & RNDIS_DRIVER_DATA_POLL_STATUS) {
+		retval = usb_interrupt_msg(
+			dev->udev,
+			usb_rcvintpipe(dev->udev,
+				       dev->status->desc.bEndpointAddress),
+			&notification, sizeof(notification), &partial,
+			RNDIS_CONTROL_TIMEOUT_MS);
+		if (unlikely(retval < 0))
+			return retval;
+	}
 
-	/* ignore status endpoint, just poll the control channel;
-	 * the request probably completed immediately
-	 */
+	/* Poll the control channel; the request probably completed immediately */
 	rsp = buf->msg_type | RNDIS_MSG_COMPLETION;
 	for (count = 0; count < 10; count++) {
 		memset(buf, 0, CONTROL_BUFFER_SIZE);
@@ -581,17 +590,33 @@ static const struct driver_info	rndis_info = {
 	.tx_fixup =	rndis_tx_fixup,
 };
 
+static const struct driver_info	rndis_poll_status_info = {
+	.description =	"RNDIS device (poll status before control)",
+	.flags =	FLAG_ETHER | FLAG_FRAMING_RN | FLAG_NO_SETINT,
+	.data =		RNDIS_DRIVER_DATA_POLL_STATUS,
+	.bind =		rndis_bind,
+	.unbind =	rndis_unbind,
+	.status =	rndis_status,
+	.rx_fixup =	rndis_rx_fixup,
+	.tx_fixup =	rndis_tx_fixup,
+};
+
 /*-------------------------------------------------------------------------*/
 
 static const struct usb_device_id	products [] = {
 {
+	/* 2Wire HomePortal 1000SW */
+	USB_DEVICE_AND_INTERFACE_INFO(0x1630, 0x0042,
+				      USB_CLASS_COMM, 2 /* ACM */, 0x0ff),
+	.driver_info = (unsigned long) &rndis_poll_status_info,
+}, {
 	/* RNDIS is MSFT's un-official variant of CDC ACM */
 	USB_INTERFACE_INFO(USB_CLASS_COMM, 2 /* ACM */, 0x0ff),
 	.driver_info = (unsigned long) &rndis_info,
 }, {
 	/* "ActiveSync" is an undocumented variant of RNDIS, used in WM5 */
 	USB_INTERFACE_INFO(USB_CLASS_MISC, 1, 1),
-	.driver_info = (unsigned long) &rndis_info,
+	.driver_info = (unsigned long) &rndis_poll_status_info,
 }, {
 	/* RNDIS for tethering */
 	USB_INTERFACE_INFO(USB_CLASS_WIRELESS_CONTROLLER, 1, 3),
diff --git a/include/linux/usb/rndis_host.h b/include/linux/usb/rndis_host.h
index 05ef52861988..88fceb718c77 100644
--- a/include/linux/usb/rndis_host.h
+++ b/include/linux/usb/rndis_host.h
@@ -256,6 +256,8 @@ struct rndis_keepalive_c {	/* IN (optionally OUT) */
 #define FLAG_RNDIS_PHYM_NOT_WIRELESS	0x0001
 #define FLAG_RNDIS_PHYM_WIRELESS	0x0002
 
+/* Flags for driver_info::data */
+#define RNDIS_DRIVER_DATA_POLL_STATUS	1	/* poll status before control */
 
 extern void rndis_status(struct usbnet *dev, struct urb *urb);
 extern int
-- 
cgit v1.2.3-71-gd317


From f1407d5c66240b33d11a7f1a41d55ccf6a9d7647 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 4 Apr 2011 13:44:59 +0900
Subject: usb: renesas_usbhs: Add Renesas USBHS common code

Renesas SuperH has USBHS IP which can switch Host / Function.
This driver is designed so that Host / Function may dynamically change.
This patch add usb/renesas_usbhs and common code for SuperH USBHS.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/Makefile                   |   1 +
 drivers/usb/Kconfig                |   2 +
 drivers/usb/renesas_usbhs/Kconfig  |  15 +
 drivers/usb/renesas_usbhs/Makefile |   7 +
 drivers/usb/renesas_usbhs/common.c | 394 +++++++++++++++++
 drivers/usb/renesas_usbhs/common.h | 225 ++++++++++
 drivers/usb/renesas_usbhs/mod.c    | 261 +++++++++++
 drivers/usb/renesas_usbhs/mod.h    | 106 +++++
 drivers/usb/renesas_usbhs/pipe.c   | 880 +++++++++++++++++++++++++++++++++++++
 drivers/usb/renesas_usbhs/pipe.h   | 105 +++++
 include/linux/usb/renesas_usbhs.h  | 149 +++++++
 11 files changed, 2145 insertions(+)
 create mode 100644 drivers/usb/renesas_usbhs/Kconfig
 create mode 100644 drivers/usb/renesas_usbhs/Makefile
 create mode 100644 drivers/usb/renesas_usbhs/common.c
 create mode 100644 drivers/usb/renesas_usbhs/common.h
 create mode 100644 drivers/usb/renesas_usbhs/mod.c
 create mode 100644 drivers/usb/renesas_usbhs/mod.h
 create mode 100644 drivers/usb/renesas_usbhs/pipe.c
 create mode 100644 drivers/usb/renesas_usbhs/pipe.h
 create mode 100644 include/linux/usb/renesas_usbhs.h

(limited to 'include/linux')

diff --git a/drivers/Makefile b/drivers/Makefile
index 3f135b6fb014..ad67b7d4c271 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_UWB)		+= uwb/
 obj-$(CONFIG_USB_OTG_UTILS)	+= usb/otg/
 obj-$(CONFIG_USB)		+= usb/
 obj-$(CONFIG_USB_MUSB_HDRC)	+= usb/musb/
+obj-$(CONFIG_USB_RENESAS_USBHS)	+= usb/renesas_usbhs/
 obj-$(CONFIG_PCI)		+= usb/
 obj-$(CONFIG_USB_GADGET)	+= usb/gadget/
 obj-$(CONFIG_SERIO)		+= input/serio/
diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index 41b6e51188e4..d299906e4f00 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -115,6 +115,8 @@ source "drivers/usb/host/Kconfig"
 
 source "drivers/usb/musb/Kconfig"
 
+source "drivers/usb/renesas_usbhs/Kconfig"
+
 source "drivers/usb/class/Kconfig"
 
 source "drivers/usb/storage/Kconfig"
diff --git a/drivers/usb/renesas_usbhs/Kconfig b/drivers/usb/renesas_usbhs/Kconfig
new file mode 100644
index 000000000000..481490e5500a
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/Kconfig
@@ -0,0 +1,15 @@
+#
+# Renesas USB Controller Drivers
+#
+
+config USB_RENESAS_USBHS
+	tristate 'Renesas USBHS controller'
+	default n
+	help
+	   Renesas USBHS is a discrete USB host and peripheral controller chip
+	   that supports both full and high speed USB 2.0 data transfers.
+	   It has nine or more configurable endpoints, and endpoint zero.
+
+	   Say "y" to link the driver statically, or "m" to build a
+	   dynamically linked module called "renesas_usbhs" and force all
+	   gadget drivers to also be dynamically linked.
diff --git a/drivers/usb/renesas_usbhs/Makefile b/drivers/usb/renesas_usbhs/Makefile
new file mode 100644
index 000000000000..d76f3dd3b9d1
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/Makefile
@@ -0,0 +1,7 @@
+#
+# for Renesas USB
+#
+
+obj-$(CONFIG_USB_RENESAS_USBHS)	+= renesas_usbhs.o
+
+renesas_usbhs-y			:= common.o mod.o pipe.o
diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
new file mode 100644
index 000000000000..d9ad60d1c156
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -0,0 +1,394 @@
+/*
+ * Renesas USB driver
+ *
+ * Copyright (C) 2011 Renesas Solutions Corp.
+ * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include "./common.h"
+
+/*
+ * platform call back
+ *
+ * renesas usb support platform callback function.
+ * Below macro call it.
+ * if platform doesn't have callback, it return 0 (no error)
+ */
+#define usbhs_platform_call(priv, func, args...)\
+	(!(priv) ? -ENODEV :			\
+	 !((priv)->pfunc->func) ? 0 :		\
+	 (priv)->pfunc->func(args))
+
+/*
+ *		common functions
+ */
+u16 usbhs_read(struct usbhs_priv *priv, u32 reg)
+{
+	return ioread16(priv->base + reg);
+}
+
+void usbhs_write(struct usbhs_priv *priv, u32 reg, u16 data)
+{
+	iowrite16(data, priv->base + reg);
+}
+
+void usbhs_bset(struct usbhs_priv *priv, u32 reg, u16 mask, u16 data)
+{
+	u16 val = usbhs_read(priv, reg);
+
+	val &= ~mask;
+	val |= data & mask;
+
+	usbhs_write(priv, reg, val);
+}
+
+/*
+ *		syscfg functions
+ */
+void usbhs_sys_clock_ctrl(struct usbhs_priv *priv, int enable)
+{
+	usbhs_bset(priv, SYSCFG, SCKE, enable ? SCKE : 0);
+}
+
+void usbhs_sys_hispeed_ctrl(struct usbhs_priv *priv, int enable)
+{
+	usbhs_bset(priv, SYSCFG, HSE, enable ? HSE : 0);
+}
+
+void usbhs_sys_usb_ctrl(struct usbhs_priv *priv, int enable)
+{
+	usbhs_bset(priv, SYSCFG, USBE, enable ? USBE : 0);
+}
+
+void usbhs_sys_host_ctrl(struct usbhs_priv *priv, int enable)
+{
+	u16 mask = DCFM | DRPD | DPRPU;
+	u16 val  = DCFM | DRPD;
+
+	/*
+	 * if enable
+	 *
+	 * - select Host mode
+	 * - D+ Line/D- Line Pull-down
+	 */
+	usbhs_bset(priv, SYSCFG, mask, enable ? val : 0);
+}
+
+void usbhs_sys_function_ctrl(struct usbhs_priv *priv, int enable)
+{
+	u16 mask = DCFM | DRPD | DPRPU;
+	u16 val  = DPRPU;
+
+	/*
+	 * if enable
+	 *
+	 * - select Function mode
+	 * - D+ Line Pull-up
+	 */
+	usbhs_bset(priv, SYSCFG, mask, enable ? val : 0);
+}
+
+/*
+ *		frame functions
+ */
+int usbhs_frame_get_num(struct usbhs_priv *priv)
+{
+	return usbhs_read(priv, FRMNUM) & FRNM_MASK;
+}
+
+/*
+ *		local functions
+ */
+static struct usbhs_priv *usbhsc_pdev_to_priv(struct platform_device *pdev)
+{
+	return dev_get_drvdata(&pdev->dev);
+}
+
+static void usbhsc_bus_ctrl(struct usbhs_priv *priv, int enable)
+{
+	int wait = usbhs_get_dparam(priv, buswait_bwait);
+	u16 data = 0;
+
+	if (enable) {
+		/* set bus wait if platform have */
+		if (wait)
+			usbhs_bset(priv, BUSWAIT, 0x000F, wait);
+	}
+	usbhs_write(priv, DVSTCTR, data);
+}
+
+/*
+ *		platform default param
+ */
+static u32 usbhsc_default_pipe_type[] = {
+		USB_ENDPOINT_XFER_CONTROL,
+		USB_ENDPOINT_XFER_ISOC,
+		USB_ENDPOINT_XFER_ISOC,
+		USB_ENDPOINT_XFER_BULK,
+		USB_ENDPOINT_XFER_BULK,
+		USB_ENDPOINT_XFER_BULK,
+		USB_ENDPOINT_XFER_INT,
+		USB_ENDPOINT_XFER_INT,
+		USB_ENDPOINT_XFER_INT,
+		USB_ENDPOINT_XFER_INT,
+};
+
+/*
+ *		driver callback functions
+ */
+static void usbhsc_notify_hotplug(struct work_struct *work)
+{
+	struct usbhs_priv *priv = container_of(work,
+					       struct usbhs_priv,
+					       notify_hotplug_work);
+	struct platform_device *pdev = usbhs_priv_to_pdev(priv);
+	struct usbhs_mod *mod = usbhs_mod_get_current(priv);
+	int id;
+	int enable;
+	int ret;
+
+	/*
+	 * get vbus status from platform
+	 */
+	enable = usbhs_platform_call(priv, get_vbus, pdev);
+
+	/*
+	 * get id from platform
+	 */
+	id = usbhs_platform_call(priv, get_id, pdev);
+
+	if (enable && !mod) {
+		ret = usbhs_mod_change(priv, id);
+		if (ret < 0)
+			return;
+
+		dev_dbg(&pdev->dev, "%s enable\n", __func__);
+
+		/* enable PM */
+		pm_runtime_get_sync(&pdev->dev);
+
+		/* USB on */
+		usbhs_sys_clock_ctrl(priv, enable);
+		usbhsc_bus_ctrl(priv, enable);
+
+		/* module start */
+		usbhs_mod_call(priv, start, priv);
+
+	} else if (!enable && mod) {
+		dev_dbg(&pdev->dev, "%s disable\n", __func__);
+
+		/* module stop */
+		usbhs_mod_call(priv, stop, priv);
+
+		/* USB off */
+		usbhsc_bus_ctrl(priv, enable);
+		usbhs_sys_clock_ctrl(priv, enable);
+
+		/* disable PM */
+		pm_runtime_put_sync(&pdev->dev);
+
+		usbhs_mod_change(priv, -1);
+
+		/* reset phy for next connection */
+		usbhs_platform_call(priv, phy_reset, pdev);
+	}
+}
+
+static int usbhsc_drvcllbck_notify_hotplug(struct platform_device *pdev)
+{
+	struct usbhs_priv *priv = usbhsc_pdev_to_priv(pdev);
+
+	/*
+	 * This functions will be called in interrupt.
+	 * To make sure safety context,
+	 * use workqueue for usbhs_notify_hotplug
+	 */
+	schedule_work(&priv->notify_hotplug_work);
+	return 0;
+}
+
+/*
+ *		platform functions
+ */
+static int __devinit usbhs_probe(struct platform_device *pdev)
+{
+	struct renesas_usbhs_platform_info *info = pdev->dev.platform_data;
+	struct renesas_usbhs_driver_callback *dfunc;
+	struct usbhs_priv *priv;
+	struct resource *res;
+	unsigned int irq;
+	int ret;
+
+	/* check platform information */
+	if (!info ||
+	    !info->platform_callback.get_id ||
+	    !info->platform_callback.get_vbus) {
+		dev_err(&pdev->dev, "no platform information\n");
+		return -EINVAL;
+	}
+
+	/* platform data */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	irq = platform_get_irq(pdev, 0);
+	if (!res || (int)irq <= 0) {
+		dev_err(&pdev->dev, "Not enough Renesas USB platform resources.\n");
+		return -ENODEV;
+	}
+
+	/* usb private data */
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		dev_err(&pdev->dev, "Could not allocate priv\n");
+		return -ENOMEM;
+	}
+
+	priv->base = ioremap_nocache(res->start, resource_size(res));
+	if (!priv->base) {
+		dev_err(&pdev->dev, "ioremap error.\n");
+		ret = -ENOMEM;
+		goto probe_end_kfree;
+	}
+
+	/*
+	 * care platform info
+	 */
+	priv->pfunc	= &info->platform_callback;
+	priv->dparam	= &info->driver_param;
+
+	/* set driver callback functions for platform */
+	dfunc			= &info->driver_callback;
+	dfunc->notify_hotplug	= usbhsc_drvcllbck_notify_hotplug;
+
+	/* set default param if platform doesn't have */
+	if (!priv->dparam->pipe_type) {
+		priv->dparam->pipe_type = usbhsc_default_pipe_type;
+		priv->dparam->pipe_size = ARRAY_SIZE(usbhsc_default_pipe_type);
+	}
+
+	/*
+	 * priv settings
+	 */
+	priv->irq	= irq;
+	priv->pdev	= pdev;
+	INIT_WORK(&priv->notify_hotplug_work, usbhsc_notify_hotplug);
+	spin_lock_init(usbhs_priv_to_lock(priv));
+
+	/* call pipe and module init */
+	ret = usbhs_pipe_probe(priv);
+	if (ret < 0)
+		goto probe_end_mod_exit;
+
+	ret = usbhs_mod_probe(priv);
+	if (ret < 0)
+		goto probe_end_iounmap;
+
+	/* dev_set_drvdata should be called after usbhs_mod_init */
+	dev_set_drvdata(&pdev->dev, priv);
+
+	/*
+	 * deviece reset here because
+	 * USB device might be used in boot loader.
+	 */
+	usbhs_sys_clock_ctrl(priv, 0);
+
+	/*
+	 * platform call
+	 *
+	 * USB phy setup might depend on CPU/Board.
+	 * If platform has its callback functions,
+	 * call it here.
+	 */
+	ret = usbhs_platform_call(priv, hardware_init, pdev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "platform prove failed.\n");
+		goto probe_end_pipe_exit;
+	}
+
+	/* reset phy for connection */
+	usbhs_platform_call(priv, phy_reset, pdev);
+
+	/*
+	 * manual call notify_hotplug for cold plug
+	 */
+	pm_runtime_enable(&pdev->dev);
+	ret = usbhsc_drvcllbck_notify_hotplug(pdev);
+	if (ret < 0)
+		goto probe_end_call_remove;
+
+	dev_info(&pdev->dev, "probed\n");
+
+	return ret;
+
+probe_end_call_remove:
+	usbhs_platform_call(priv, hardware_exit, pdev);
+probe_end_pipe_exit:
+	usbhs_pipe_remove(priv);
+probe_end_mod_exit:
+	usbhs_mod_remove(priv);
+probe_end_iounmap:
+	iounmap(priv->base);
+probe_end_kfree:
+	kfree(priv);
+
+	dev_info(&pdev->dev, "probe failed\n");
+
+	return ret;
+}
+
+static int __devexit usbhs_remove(struct platform_device *pdev)
+{
+	struct usbhs_priv *priv = usbhsc_pdev_to_priv(pdev);
+
+	dev_dbg(&pdev->dev, "usb remove\n");
+
+	pm_runtime_disable(&pdev->dev);
+
+	usbhsc_bus_ctrl(priv, 0);
+
+	usbhs_platform_call(priv, hardware_exit, pdev);
+	usbhs_pipe_remove(priv);
+	usbhs_mod_remove(priv);
+	iounmap(priv->base);
+	kfree(priv);
+
+	return 0;
+}
+
+static struct platform_driver renesas_usbhs_driver = {
+	.driver		= {
+		.name	= "renesas_usbhs",
+	},
+	.probe		= usbhs_probe,
+	.remove		= __devexit_p(usbhs_remove),
+};
+
+static int __init usbhs_init(void)
+{
+	return platform_driver_register(&renesas_usbhs_driver);
+}
+
+static void __exit usbhs_exit(void)
+{
+	platform_driver_unregister(&renesas_usbhs_driver);
+}
+
+module_init(usbhs_init);
+module_exit(usbhs_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Renesas USB driver");
+MODULE_AUTHOR("Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>");
diff --git a/drivers/usb/renesas_usbhs/common.h b/drivers/usb/renesas_usbhs/common.h
new file mode 100644
index 000000000000..f1a2b62f93f9
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/common.h
@@ -0,0 +1,225 @@
+/*
+ * Renesas USB driver
+ *
+ * Copyright (C) 2011 Renesas Solutions Corp.
+ * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+#ifndef RENESAS_USB_DRIVER_H
+#define RENESAS_USB_DRIVER_H
+
+#include <linux/platform_device.h>
+#include <linux/usb/renesas_usbhs.h>
+
+struct usbhs_priv;
+
+#include "./mod.h"
+#include "./pipe.h"
+
+/*
+ *
+ *		register define
+ *
+ */
+#define SYSCFG		0x0000
+#define BUSWAIT		0x0002
+#define DVSTCTR		0x0008
+#define CFIFO		0x0014
+#define CFIFOSEL	0x0020
+#define CFIFOCTR	0x0022
+#define INTENB0		0x0030
+#define INTENB1		0x0032
+#define BRDYENB		0x0036
+#define NRDYENB		0x0038
+#define BEMPENB		0x003A
+#define INTSTS0		0x0040
+#define INTSTS1		0x0042
+#define BRDYSTS		0x0046
+#define NRDYSTS		0x0048
+#define BEMPSTS		0x004A
+#define FRMNUM		0x004C
+#define USBREQ		0x0054	/* USB request type register */
+#define USBVAL		0x0056	/* USB request value register */
+#define USBINDX		0x0058	/* USB request index register */
+#define USBLENG		0x005A	/* USB request length register */
+#define DCPCFG		0x005C
+#define DCPMAXP		0x005E
+#define DCPCTR		0x0060
+#define PIPESEL		0x0064
+#define PIPECFG		0x0068
+#define PIPEBUF		0x006A
+#define PIPEMAXP	0x006C
+#define PIPEPERI	0x006E
+#define PIPEnCTR	0x0070
+
+/* SYSCFG */
+#define SCKE	(1 << 10)	/* USB Module Clock Enable */
+#define HSE	(1 << 7)	/* High-Speed Operation Enable */
+#define DCFM	(1 << 6)	/* Controller Function Select */
+#define DRPD	(1 << 5)	/* D+ Line/D- Line Resistance Control */
+#define DPRPU	(1 << 4)	/* D+ Line Resistance Control */
+#define USBE	(1 << 0)	/* USB Module Operation Enable */
+
+/* DVSTCTR */
+#define EXTLP	(1 << 10)	/* Controls the EXTLP pin output state */
+#define PWEN	(1 << 9)	/* Controls the PWEN pin output state */
+#define RHST	(0x7)		/* Reset Handshake */
+#define  RHST_LOW_SPEED  1	/* Low-speed connection */
+#define  RHST_FULL_SPEED 2	/* Full-speed connection */
+#define  RHST_HIGH_SPEED 3	/* High-speed connection */
+
+/* CFIFOSEL */
+#define MBW_32	(0x2 << 10)	/* CFIFO Port Access Bit Width */
+
+/* CFIFOCTR */
+#define BVAL	(1 << 15)	/* Buffer Memory Enable Flag */
+#define BCLR	(1 << 14)	/* CPU buffer clear */
+#define FRDY	(1 << 13)	/* FIFO Port Ready */
+#define DTLN_MASK (0x0FFF)	/* Receive Data Length */
+
+/* INTENB0 */
+#define VBSE	(1 << 15)	/* Enable IRQ VBUS_0 and VBUSIN_0 */
+#define RSME	(1 << 14)	/* Enable IRQ Resume */
+#define SOFE	(1 << 13)	/* Enable IRQ Frame Number Update */
+#define DVSE	(1 << 12)	/* Enable IRQ Device State Transition */
+#define CTRE	(1 << 11)	/* Enable IRQ Control Stage Transition */
+#define BEMPE	(1 << 10)	/* Enable IRQ Buffer Empty */
+#define NRDYE	(1 << 9)	/* Enable IRQ Buffer Not Ready Response */
+#define BRDYE	(1 << 8)	/* Enable IRQ Buffer Ready */
+
+/* INTENB1 */
+#define BCHGE	(1 << 14)	/* USB Bus Change Interrupt Enable */
+#define DTCHE	(1 << 12)	/* Disconnection Detect Interrupt Enable */
+#define ATTCHE	(1 << 11)	/* Connection Detect Interrupt Enable */
+#define EOFERRE	(1 << 6)	/* EOF Error Detect Interrupt Enable */
+#define SIGNE	(1 << 5)	/* Setup Transaction Error Interrupt Enable */
+#define SACKE	(1 << 4)	/* Setup Transaction ACK Interrupt Enable */
+
+/* INTSTS0 */
+#define DVST	(1 << 12)	/* Device State Transition Interrupt Status */
+#define CTRT	(1 << 11)	/* Control Stage Interrupt Status */
+#define BEMP	(1 << 10)	/* Buffer Empty Interrupt Status */
+#define BRDY	(1 << 8)	/* Buffer Ready Interrupt Status */
+#define VBSTS	(1 << 7)	/* VBUS_0 and VBUSIN_0 Input Status */
+#define VALID	(1 << 3)	/* USB Request Receive */
+
+#define DVSQ_MASK		(0x3 << 4)	/* Device State */
+#define  POWER_STATE		(0 << 4)
+#define  DEFAULT_STATE		(1 << 4)
+#define  ADDRESS_STATE		(2 << 4)
+#define  CONFIGURATION_STATE	(3 << 4)
+
+#define CTSQ_MASK		(0x7)	/* Control Transfer Stage */
+#define  IDLE_SETUP_STAGE	0	/* Idle stage or setup stage */
+#define  READ_DATA_STAGE	1	/* Control read data stage */
+#define  READ_STATUS_STAGE	2	/* Control read status stage */
+#define  WRITE_DATA_STAGE	3	/* Control write data stage */
+#define  WRITE_STATUS_STAGE	4	/* Control write status stage */
+#define  NODATA_STATUS_STAGE	5	/* Control write NoData status stage */
+#define  SEQUENCE_ERROR		6	/* Control transfer sequence error */
+
+/* PIPECFG */
+/* DCPCFG */
+#define TYPE_NONE	(0 << 14)	/* Transfer Type */
+#define TYPE_BULK	(1 << 14)
+#define TYPE_INT	(2 << 14)
+#define TYPE_ISO	(3 << 14)
+#define DBLB		(1 << 9)	/* Double Buffer Mode */
+#define SHTNAK		(1 << 7)	/* Pipe Disable in Transfer End */
+#define DIR_OUT		(1 << 4)	/* Transfer Direction */
+
+/* PIPEMAXP */
+/* DCPMAXP */
+#define DEVSEL_MASK	(0xF << 12)	/* Device Select */
+#define DCP_MAXP_MASK	(0x7F)
+#define PIPE_MAXP_MASK	(0x7FF)
+
+/* PIPEBUF */
+#define BUFSIZE_SHIFT	10
+#define BUFSIZE_MASK	(0x1F << BUFSIZE_SHIFT)
+#define BUFNMB_MASK	(0xFF)
+
+/* PIPEnCTR */
+/* DCPCTR */
+#define BSTS		(1 << 15)	/* Buffer Status */
+#define CSSTS		(1 << 12)	/* CSSTS Status */
+#define SQCLR		(1 << 8)	/* Toggle Bit Clear */
+#define	ACLRM		(1 << 9)	/* Buffer Auto-Clear Mode */
+#define PBUSY		(1 << 5)	/* Pipe Busy */
+#define PID_MASK	(0x3)		/* Response PID */
+#define  PID_NAK	0
+#define  PID_BUF	1
+#define  PID_STALL10	2
+#define  PID_STALL11	3
+
+#define CCPL		(1 << 2)	/* Control Transfer End Enable */
+
+/* FRMNUM */
+#define FRNM_MASK	(0x7FF)
+
+/*
+ *		struct
+ */
+struct usbhs_priv {
+
+	void __iomem *base;
+	unsigned int irq;
+
+	struct renesas_usbhs_platform_callback	*pfunc;
+	struct renesas_usbhs_driver_param	*dparam;
+
+	struct work_struct notify_hotplug_work;
+	struct platform_device *pdev;
+
+	spinlock_t		lock;
+
+	/*
+	 * module control
+	 */
+	struct usbhs_mod_info mod_info;
+
+	/*
+	 * pipe control
+	 */
+	struct usbhs_pipe_info pipe_info;
+};
+
+/*
+ * common
+ */
+u16 usbhs_read(struct usbhs_priv *priv, u32 reg);
+void usbhs_write(struct usbhs_priv *priv, u32 reg, u16 data);
+void usbhs_bset(struct usbhs_priv *priv, u32 reg, u16 mask, u16 data);
+
+/*
+ * sysconfig
+ */
+void usbhs_sys_clock_ctrl(struct usbhs_priv *priv, int enable);
+void usbhs_sys_hispeed_ctrl(struct usbhs_priv *priv, int enable);
+void usbhs_sys_usb_ctrl(struct usbhs_priv *priv, int enable);
+void usbhs_sys_host_ctrl(struct usbhs_priv *priv, int enable);
+void usbhs_sys_function_ctrl(struct usbhs_priv *priv, int enable);
+
+/*
+ * frame
+ */
+int usbhs_frame_get_num(struct usbhs_priv *priv);
+
+/*
+ * data
+ */
+#define usbhs_get_dparam(priv, param)	(priv->dparam->param)
+#define usbhs_priv_to_pdev(priv)	(priv->pdev)
+#define usbhs_priv_to_dev(priv)		(&priv->pdev->dev)
+#define usbhs_priv_to_lock(priv)	(&priv->lock)
+
+#endif /* RENESAS_USB_DRIVER_H */
diff --git a/drivers/usb/renesas_usbhs/mod.c b/drivers/usb/renesas_usbhs/mod.c
new file mode 100644
index 000000000000..4a3398484cd7
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/mod.c
@@ -0,0 +1,261 @@
+/*
+ * Renesas USB driver
+ *
+ * Copyright (C) 2011 Renesas Solutions Corp.
+ * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+#include <linux/interrupt.h>
+
+#include "./common.h"
+#include "./mod.h"
+
+#define usbhs_priv_to_modinfo(priv) (&priv->mod_info)
+
+/*
+ *		host / gadget functions
+ *
+ * renesas_usbhs host/gadget can register itself by below functions.
+ * these functions are called when probe
+ *
+ */
+void usbhs_mod_register(struct usbhs_priv *priv, struct usbhs_mod *mod, int id)
+{
+	struct usbhs_mod_info *info = usbhs_priv_to_modinfo(priv);
+
+	info->mod[id]	= mod;
+	mod->priv	= priv;
+}
+
+struct usbhs_mod *usbhs_mod_get(struct usbhs_priv *priv, int id)
+{
+	struct usbhs_mod_info *info = usbhs_priv_to_modinfo(priv);
+	struct usbhs_mod *ret = NULL;
+
+	switch (id) {
+	case USBHS_HOST:
+	case USBHS_GADGET:
+		ret = info->mod[id];
+		break;
+	}
+
+	return ret;
+}
+
+int usbhs_mod_is_host(struct usbhs_priv *priv, struct usbhs_mod *mod)
+{
+	struct usbhs_mod_info *info = usbhs_priv_to_modinfo(priv);
+
+	if (!mod)
+		return -EINVAL;
+
+	return info->mod[USBHS_HOST] == mod;
+}
+
+struct usbhs_mod *usbhs_mod_get_current(struct usbhs_priv *priv)
+{
+	struct usbhs_mod_info *info = usbhs_priv_to_modinfo(priv);
+
+	return info->curt;
+}
+
+int usbhs_mod_change(struct usbhs_priv *priv, int id)
+{
+	struct usbhs_mod_info *info = usbhs_priv_to_modinfo(priv);
+	struct usbhs_mod *mod = NULL;
+	int ret = 0;
+
+	/* id < 0 mean no current */
+	switch (id) {
+	case USBHS_HOST:
+	case USBHS_GADGET:
+		mod = info->mod[id];
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	info->curt = mod;
+
+	return ret;
+}
+
+static irqreturn_t usbhs_interrupt(int irq, void *data);
+int usbhs_mod_probe(struct usbhs_priv *priv)
+{
+	struct device *dev = usbhs_priv_to_dev(priv);
+	int ret;
+
+	/* irq settings */
+	ret = request_irq(priv->irq, usbhs_interrupt,
+			  IRQF_DISABLED, dev_name(dev), priv);
+	if (ret)
+		dev_err(dev, "irq request err\n");
+
+	return ret;
+}
+
+void usbhs_mod_remove(struct usbhs_priv *priv)
+{
+	free_irq(priv->irq, priv);
+}
+
+/*
+ *		status functions
+ */
+int usbhs_status_get_usb_speed(struct usbhs_irq_state *irq_state)
+{
+	switch (irq_state->dvstctr & RHST) {
+	case RHST_LOW_SPEED:
+		return USB_SPEED_LOW;
+	case RHST_FULL_SPEED:
+		return USB_SPEED_FULL;
+	case RHST_HIGH_SPEED:
+		return USB_SPEED_HIGH;
+	}
+
+	return USB_SPEED_UNKNOWN;
+}
+
+int usbhs_status_get_device_state(struct usbhs_irq_state *irq_state)
+{
+	int state = irq_state->intsts0 & DVSQ_MASK;
+
+	switch (state) {
+	case POWER_STATE:
+	case DEFAULT_STATE:
+	case ADDRESS_STATE:
+	case CONFIGURATION_STATE:
+		return state;
+	}
+
+	return -EIO;
+}
+
+int usbhs_status_get_ctrl_stage(struct usbhs_irq_state *irq_state)
+{
+	/*
+	 * return value
+	 *
+	 * IDLE_SETUP_STAGE
+	 * READ_DATA_STAGE
+	 * READ_STATUS_STAGE
+	 * WRITE_DATA_STAGE
+	 * WRITE_STATUS_STAGE
+	 * NODATA_STATUS_STAGE
+	 * SEQUENCE_ERROR
+	 */
+	return (int)irq_state->intsts0 & CTSQ_MASK;
+}
+
+static void usbhs_status_get_each_irq(struct usbhs_priv *priv,
+				      struct usbhs_irq_state *state)
+{
+	struct usbhs_mod *mod = usbhs_mod_get_current(priv);
+
+	state->intsts0 = usbhs_read(priv, INTSTS0);
+	state->intsts1 = usbhs_read(priv, INTSTS1);
+
+	state->brdysts = usbhs_read(priv, BRDYSTS);
+	state->nrdysts = usbhs_read(priv, NRDYSTS);
+	state->bempsts = usbhs_read(priv, BEMPSTS);
+
+	state->dvstctr = usbhs_read(priv, DVSTCTR);
+
+	/* mask */
+	state->bempsts &= mod->irq_bempsts;
+	state->brdysts &= mod->irq_brdysts;
+}
+
+/*
+ *		interrupt
+ */
+#define INTSTS0_MAGIC 0xF800 /* acknowledge magical interrupt sources */
+#define INTSTS1_MAGIC 0xA870 /* acknowledge magical interrupt sources */
+static irqreturn_t usbhs_interrupt(int irq, void *data)
+{
+	struct usbhs_priv *priv = data;
+	struct usbhs_irq_state irq_state;
+
+	usbhs_status_get_each_irq(priv, &irq_state);
+
+	/*
+	 * clear interrupt
+	 *
+	 * The hardware is _very_ picky to clear interrupt bit.
+	 * Especially INTSTS0_MAGIC, INTSTS1_MAGIC value.
+	 *
+	 * see
+	 *	"Operation"
+	 *	 - "Control Transfer (DCP)"
+	 *	   - Function :: VALID bit should 0
+	 */
+	usbhs_write(priv, INTSTS0, ~irq_state.intsts0 & INTSTS0_MAGIC);
+	usbhs_write(priv, INTSTS1, ~irq_state.intsts1 & INTSTS1_MAGIC);
+
+	usbhs_write(priv, BRDYSTS, 0);
+	usbhs_write(priv, NRDYSTS, 0);
+	usbhs_write(priv, BEMPSTS, 0);
+
+	/*
+	 * call irq callback functions
+	 * see also
+	 *	usbhs_irq_setting_update
+	 */
+	if (irq_state.intsts0 & DVST)
+		usbhs_mod_call(priv, irq_dev_state, priv, &irq_state);
+
+	if (irq_state.intsts0 & CTRT)
+		usbhs_mod_call(priv, irq_ctrl_stage, priv, &irq_state);
+
+	if (irq_state.intsts0 & BEMP)
+		usbhs_mod_call(priv, irq_empty, priv, &irq_state);
+
+	if (irq_state.intsts0 & BRDY)
+		usbhs_mod_call(priv, irq_ready, priv, &irq_state);
+
+	return IRQ_HANDLED;
+}
+
+void usbhs_irq_callback_update(struct usbhs_priv *priv, struct usbhs_mod *mod)
+{
+	u16 intenb0 = 0;
+
+	usbhs_write(priv, INTENB0, 0);
+
+	usbhs_write(priv, BEMPENB, 0);
+	usbhs_write(priv, BRDYENB, 0);
+
+	/*
+	 * see also
+	 *	usbhs_interrupt
+	 */
+
+	/*
+	 * it don't enable DVSE (intenb0) here
+	 * but "mod->irq_dev_state" will be called.
+	 */
+
+	if (mod->irq_ctrl_stage)
+		intenb0 |= CTRE;
+
+	if (mod->irq_empty && mod->irq_bempsts) {
+		usbhs_write(priv, BEMPENB, mod->irq_bempsts);
+		intenb0 |= BEMPE;
+	}
+
+	if (mod->irq_ready && mod->irq_brdysts) {
+		usbhs_write(priv, BRDYENB, mod->irq_brdysts);
+		intenb0 |= BRDYE;
+	}
+
+	usbhs_write(priv, INTENB0, intenb0);
+}
diff --git a/drivers/usb/renesas_usbhs/mod.h b/drivers/usb/renesas_usbhs/mod.h
new file mode 100644
index 000000000000..bd873d5b432a
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/mod.h
@@ -0,0 +1,106 @@
+/*
+ * Renesas USB driver
+ *
+ * Copyright (C) 2011 Renesas Solutions Corp.
+ * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+#ifndef RENESAS_USB_MOD_H
+#define RENESAS_USB_MOD_H
+
+#include <linux/spinlock.h>
+#include <linux/usb/renesas_usbhs.h>
+#include "./common.h"
+
+/*
+ *	struct
+ */
+struct usbhs_irq_state {
+	u16 intsts0;
+	u16 intsts1;
+	u16 brdysts;
+	u16 nrdysts;
+	u16 bempsts;
+	u16 dvstctr;
+};
+
+struct usbhs_mod {
+	char *name;
+
+	/*
+	 * entry point from common.c
+	 */
+	int (*start)(struct usbhs_priv *priv);
+	int (*stop)(struct usbhs_priv *priv);
+
+	/* INTSTS0 :: DVST (DVSQ) */
+	int (*irq_dev_state)(struct usbhs_priv *priv,
+			     struct usbhs_irq_state *irq_state);
+
+	/* INTSTS0 :: CTRT (CTSQ) */
+	int (*irq_ctrl_stage)(struct usbhs_priv *priv,
+			      struct usbhs_irq_state *irq_state);
+
+	/* INTSTS0 :: BEMP */
+	/* BEMPSTS */
+	int (*irq_empty)(struct usbhs_priv *priv,
+			 struct usbhs_irq_state *irq_state);
+	u16 irq_bempsts;
+
+	/* INTSTS0 :: BRDY */
+	/* BRDYSTS */
+	int (*irq_ready)(struct usbhs_priv *priv,
+			 struct usbhs_irq_state *irq_state);
+	u16 irq_brdysts;
+
+	struct usbhs_priv *priv;
+};
+
+struct usbhs_mod_info {
+	struct usbhs_mod *mod[USBHS_MAX];
+	struct usbhs_mod *curt; /* current mod */
+};
+
+/*
+ *		for host/gadget module
+ */
+struct usbhs_mod *usbhs_mod_get(struct usbhs_priv *priv, int id);
+struct usbhs_mod *usbhs_mod_get_current(struct usbhs_priv *priv);
+void usbhs_mod_register(struct usbhs_priv *priv, struct usbhs_mod *usb, int id);
+int usbhs_mod_is_host(struct usbhs_priv *priv, struct usbhs_mod *mod);
+int usbhs_mod_change(struct usbhs_priv *priv, int id);
+int usbhs_mod_probe(struct usbhs_priv *priv);
+void usbhs_mod_remove(struct usbhs_priv *priv);
+
+/*
+ *		status functions
+ */
+int usbhs_status_get_usb_speed(struct usbhs_irq_state *irq_state);
+int usbhs_status_get_device_state(struct usbhs_irq_state *irq_state);
+int usbhs_status_get_ctrl_stage(struct usbhs_irq_state *irq_state);
+
+/*
+ *		callback functions
+ */
+void usbhs_irq_callback_update(struct usbhs_priv *priv, struct usbhs_mod *mod);
+
+
+#define usbhs_mod_call(priv, func, param...)		\
+	({						\
+		struct usbhs_mod *mod;			\
+		mod = usbhs_mod_get_current(priv);	\
+		!mod		? -ENODEV :		\
+		!mod->func	? 0 :			\
+		 mod->func(param);			\
+	})
+
+#endif /* RENESAS_USB_MOD_H */
diff --git a/drivers/usb/renesas_usbhs/pipe.c b/drivers/usb/renesas_usbhs/pipe.c
new file mode 100644
index 000000000000..b7a9137f599b
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/pipe.c
@@ -0,0 +1,880 @@
+/*
+ * Renesas USB driver
+ *
+ * Copyright (C) 2011 Renesas Solutions Corp.
+ * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include "./common.h"
+#include "./pipe.h"
+
+/*
+ *		macros
+ */
+#define usbhsp_priv_to_pipeinfo(pr)	(&(pr)->pipe_info)
+#define usbhsp_pipe_to_priv(p)		((p)->priv)
+
+#define usbhsp_addr_offset(p)	((usbhs_pipe_number(p) - 1) * 2)
+
+#define usbhsp_is_dcp(p)	((p)->priv->pipe_info.pipe == (p))
+
+#define usbhsp_flags_set(p, f)	((p)->flags |=  USBHS_PIPE_FLAGS_##f)
+#define usbhsp_flags_clr(p, f)	((p)->flags &= ~USBHS_PIPE_FLAGS_##f)
+#define usbhsp_flags_has(p, f)	((p)->flags &   USBHS_PIPE_FLAGS_##f)
+#define usbhsp_flags_init(p)	do {(p)->flags = 0; } while (0)
+
+#define usbhsp_type(p)		((p)->pipe_type)
+#define usbhsp_type_is(p, t)	((p)->pipe_type == t)
+
+/*
+ * for debug
+ */
+static char *usbhsp_pipe_name[] = {
+	[USB_ENDPOINT_XFER_CONTROL]	= "DCP",
+	[USB_ENDPOINT_XFER_BULK]	= "BULK",
+	[USB_ENDPOINT_XFER_INT]		= "INT",
+	[USB_ENDPOINT_XFER_ISOC]	= "ISO",
+};
+
+/*
+ *		usb request functions
+ */
+void usbhs_usbreq_get_val(struct usbhs_priv *priv, struct usb_ctrlrequest *req)
+{
+	u16 val;
+
+	val = usbhs_read(priv, USBREQ);
+	req->bRequest		= (val >> 8) & 0xFF;
+	req->bRequestType	= (val >> 0) & 0xFF;
+
+	req->wValue	= usbhs_read(priv, USBVAL);
+	req->wIndex	= usbhs_read(priv, USBINDX);
+	req->wLength	= usbhs_read(priv, USBLENG);
+}
+
+void usbhs_usbreq_set_val(struct usbhs_priv *priv, struct usb_ctrlrequest *req)
+{
+	usbhs_write(priv, USBREQ,  (req->bRequest << 8) | req->bRequestType);
+	usbhs_write(priv, USBVAL,  req->wValue);
+	usbhs_write(priv, USBINDX, req->wIndex);
+	usbhs_write(priv, USBLENG, req->wLength);
+}
+
+/*
+ *		DCPCTR/PIPEnCTR functions
+ */
+static void usbhsp_pipectrl_set(struct usbhs_pipe *pipe, u16 mask, u16 val)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+	int offset = usbhsp_addr_offset(pipe);
+
+	if (usbhsp_is_dcp(pipe))
+		usbhs_bset(priv, DCPCTR, mask, val);
+	else
+		usbhs_bset(priv, PIPEnCTR + offset, mask, val);
+}
+
+static u16 usbhsp_pipectrl_get(struct usbhs_pipe *pipe)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+	int offset = usbhsp_addr_offset(pipe);
+
+	if (usbhsp_is_dcp(pipe))
+		return usbhs_read(priv, DCPCTR);
+	else
+		return usbhs_read(priv, PIPEnCTR + offset);
+}
+
+/*
+ *		DCP/PIPE functions
+ */
+static void __usbhsp_pipe_xxx_set(struct usbhs_pipe *pipe,
+				  u16 dcp_reg, u16 pipe_reg,
+				  u16 mask, u16 val)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+
+	if (usbhsp_is_dcp(pipe))
+		usbhs_bset(priv, dcp_reg, mask, val);
+	else
+		usbhs_bset(priv, pipe_reg, mask, val);
+}
+
+static u16 __usbhsp_pipe_xxx_get(struct usbhs_pipe *pipe,
+				 u16 dcp_reg, u16 pipe_reg)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+
+	if (usbhsp_is_dcp(pipe))
+		return usbhs_read(priv, dcp_reg);
+	else
+		return usbhs_read(priv, pipe_reg);
+}
+
+/*
+ *		DCPCFG/PIPECFG functions
+ */
+static void usbhsp_pipe_cfg_set(struct usbhs_pipe *pipe, u16 mask, u16 val)
+{
+	__usbhsp_pipe_xxx_set(pipe, DCPCFG, PIPECFG, mask, val);
+}
+
+/*
+ *		PIPEBUF
+ */
+static void usbhsp_pipe_buf_set(struct usbhs_pipe *pipe, u16 mask, u16 val)
+{
+	if (usbhsp_is_dcp(pipe))
+		return;
+
+	__usbhsp_pipe_xxx_set(pipe, 0, PIPEBUF, mask, val);
+}
+
+/*
+ *		DCPMAXP/PIPEMAXP
+ */
+static void usbhsp_pipe_maxp_set(struct usbhs_pipe *pipe, u16 mask, u16 val)
+{
+	__usbhsp_pipe_xxx_set(pipe, DCPMAXP, PIPEMAXP, mask, val);
+}
+
+static u16 usbhsp_pipe_maxp_get(struct usbhs_pipe *pipe)
+{
+	return __usbhsp_pipe_xxx_get(pipe, DCPMAXP, PIPEMAXP);
+}
+
+/*
+ *		pipe control functions
+ */
+static void usbhsp_pipe_select(struct usbhs_pipe *pipe)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+
+	/*
+	 * On pipe, this is necessary before
+	 * accesses to below registers.
+	 *
+	 * PIPESEL	: usbhsp_pipe_select
+	 * PIPECFG	: usbhsp_pipe_cfg_xxx
+	 * PIPEBUF	: usbhsp_pipe_buf_xxx
+	 * PIPEMAXP	: usbhsp_pipe_maxp_xxx
+	 * PIPEPERI
+	 */
+
+	/*
+	 * if pipe is dcp, no pipe is selected.
+	 * it is no problem, because dcp have its register
+	 */
+	usbhs_write(priv, PIPESEL, 0xF & usbhs_pipe_number(pipe));
+}
+
+static int usbhsp_pipe_barrier(struct usbhs_pipe *pipe)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+	struct device *dev = usbhs_priv_to_dev(priv);
+	int timeout = 1024;
+	u16 val;
+
+	/*
+	 * make sure....
+	 *
+	 * Modify these bits when CSSTS = 0, PID = NAK, and no pipe number is
+	 * specified by the CURPIPE bits.
+	 * When changing the setting of this bit after changing
+	 * the PID bits for the selected pipe from BUF to NAK,
+	 * check that CSSTS = 0 and PBUSY = 0.
+	 */
+
+	/*
+	 * CURPIPE bit = 0
+	 *
+	 * see also
+	 *  "Operation"
+	 *  - "Pipe Control"
+	 *   - "Pipe Control Registers Switching Procedure"
+	 */
+	usbhs_write(priv, CFIFOSEL, 0);
+
+	do {
+		val  = usbhsp_pipectrl_get(pipe);
+		val &= CSSTS | PID_MASK;
+		if (!val)
+			return 0;
+
+		udelay(10);
+
+	} while (timeout--);
+
+	/*
+	 * force NAK
+	 */
+	timeout = 1024;
+	usbhs_fifo_disable(pipe);
+	do {
+		val  = usbhsp_pipectrl_get(pipe);
+		val &= PBUSY;
+		if (!val)
+			return 0;
+
+	} while (timeout--);
+
+	dev_err(dev, "pipe barrier failed\n");
+
+	return -EBUSY;
+}
+
+static int usbhsp_pipe_is_accessible(struct usbhs_pipe *pipe)
+{
+	u16 val;
+
+	val = usbhsp_pipectrl_get(pipe);
+	if (val & BSTS)
+		return 0;
+
+	return -EBUSY;
+}
+
+/*
+ *		PID ctrl
+ */
+static void __usbhsp_pid_try_nak_if_stall(struct usbhs_pipe *pipe)
+{
+	u16 pid = usbhsp_pipectrl_get(pipe);
+
+	pid &= PID_MASK;
+
+	/*
+	 * see
+	 * "Pipe n Control Register" - "PID"
+	 */
+	switch (pid) {
+	case PID_STALL11:
+		usbhsp_pipectrl_set(pipe, PID_MASK, PID_STALL10);
+		/* fall-through */
+	case PID_STALL10:
+		usbhsp_pipectrl_set(pipe, PID_MASK, PID_NAK);
+	}
+}
+
+void usbhs_fifo_disable(struct usbhs_pipe *pipe)
+{
+	/* see "Pipe n Control Register" - "PID" */
+	__usbhsp_pid_try_nak_if_stall(pipe);
+
+	usbhsp_pipectrl_set(pipe, PID_MASK, PID_NAK);
+}
+
+void usbhs_fifo_enable(struct usbhs_pipe *pipe)
+{
+	/* see "Pipe n Control Register" - "PID" */
+	__usbhsp_pid_try_nak_if_stall(pipe);
+
+	usbhsp_pipectrl_set(pipe, PID_MASK, PID_BUF);
+}
+
+void usbhs_fifo_stall(struct usbhs_pipe *pipe)
+{
+	u16 pid = usbhsp_pipectrl_get(pipe);
+
+	pid &= PID_MASK;
+
+	/*
+	 * see
+	 * "Pipe n Control Register" - "PID"
+	 */
+	switch (pid) {
+	case PID_NAK:
+		usbhsp_pipectrl_set(pipe, PID_MASK, PID_STALL10);
+		break;
+	case PID_BUF:
+		usbhsp_pipectrl_set(pipe, PID_MASK, PID_STALL11);
+		break;
+	}
+}
+
+/*
+ *		CFIFO ctrl
+ */
+void usbhs_fifo_send_terminator(struct usbhs_pipe *pipe)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+
+	usbhs_bset(priv, CFIFOCTR, BVAL, BVAL);
+}
+
+static void usbhsp_fifo_clear(struct usbhs_pipe *pipe)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+
+	usbhs_write(priv, CFIFOCTR, BCLR);
+}
+
+static int usbhsp_fifo_barrier(struct usbhs_priv *priv)
+{
+	int timeout = 1024;
+
+	do {
+		/* The FIFO port is accessible */
+		if (usbhs_read(priv, CFIFOCTR) & FRDY)
+			return 0;
+
+		udelay(10);
+	} while (timeout--);
+
+	return -EBUSY;
+}
+
+static int usbhsp_fifo_rcv_len(struct usbhs_priv *priv)
+{
+	return usbhs_read(priv, CFIFOCTR) & DTLN_MASK;
+}
+
+static int usbhsp_fifo_select(struct usbhs_pipe *pipe, int write)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+	struct device *dev = usbhs_priv_to_dev(priv);
+	int timeout = 1024;
+	u16 mask = ((1 << 5) | 0xF);		/* mask of ISEL | CURPIPE */
+	u16 base = usbhs_pipe_number(pipe);	/* CURPIPE */
+
+	if (usbhsp_is_dcp(pipe))
+		base |= (1 == write) << 5;	/* ISEL */
+
+	/* "base" will be used below  */
+	usbhs_write(priv, CFIFOSEL, base | MBW_32);
+
+	/* check ISEL and CURPIPE value */
+	while (timeout--) {
+		if (base == (mask & usbhs_read(priv, CFIFOSEL)))
+			return 0;
+		udelay(10);
+	}
+
+	dev_err(dev, "fifo select error\n");
+
+	return -EIO;
+}
+
+int usbhs_fifo_prepare_write(struct usbhs_pipe *pipe)
+{
+	int ret;
+
+	ret = usbhsp_fifo_select(pipe, 1);
+	if (ret < 0)
+		return ret;
+
+	usbhsp_fifo_clear(pipe);
+
+	return ret;
+}
+
+int usbhs_fifo_write(struct usbhs_pipe *pipe, u8 *buf, int len)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+	void __iomem *addr = priv->base + CFIFO;
+	int maxp = usbhs_pipe_get_maxpacket(pipe);
+	int total_len;
+	int i, ret;
+
+	ret = usbhsp_pipe_is_accessible(pipe);
+	if (ret < 0)
+		return ret;
+
+	ret = usbhs_fifo_prepare_write(pipe);
+	if (ret < 0)
+		return ret;
+
+	ret = usbhsp_fifo_barrier(priv);
+	if (ret < 0)
+		return ret;
+
+	len = min(len, maxp);
+	total_len = len;
+
+	/*
+	 * FIXME
+	 *
+	 * 32-bit access only
+	 */
+	if (len >= 4 &&
+	    !((unsigned long)buf & 0x03)) {
+		iowrite32_rep(addr, buf, len / 4);
+		len %= 4;
+		buf += total_len - len;
+	}
+
+	/* the rest operation */
+	for (i = 0; i < len; i++)
+		iowrite8(buf[i], addr + (0x03 - (i & 0x03)));
+
+	if (total_len < maxp)
+		usbhs_fifo_send_terminator(pipe);
+
+	return total_len;
+}
+
+int usbhs_fifo_prepare_read(struct usbhs_pipe *pipe)
+{
+	int ret;
+
+	/*
+	 * select pipe and enable it to prepare packet receive
+	 */
+	ret = usbhsp_fifo_select(pipe, 0);
+	if (ret < 0)
+		return ret;
+
+	usbhs_fifo_enable(pipe);
+
+	return ret;
+}
+
+int usbhs_fifo_read(struct usbhs_pipe *pipe, u8 *buf, int len)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+	void __iomem *addr = priv->base + CFIFO;
+	int rcv_len;
+	int i, ret;
+	int total_len;
+	u32 data = 0;
+
+	ret = usbhsp_fifo_select(pipe, 0);
+	if (ret < 0)
+		return ret;
+
+	ret = usbhsp_fifo_barrier(priv);
+	if (ret < 0)
+		return ret;
+
+	rcv_len = usbhsp_fifo_rcv_len(priv);
+
+	/*
+	 * Buffer clear if Zero-Length packet
+	 *
+	 * see
+	 * "Operation" - "FIFO Buffer Memory" - "FIFO Port Function"
+	 */
+	if (0 == rcv_len) {
+		usbhsp_fifo_clear(pipe);
+		return 0;
+	}
+
+	len = min(rcv_len, len);
+	total_len = len;
+
+	/*
+	 * FIXME
+	 *
+	 * 32-bit access only
+	 */
+	if (len >= 4 &&
+	    !((unsigned long)buf & 0x03)) {
+		ioread32_rep(addr, buf, len / 4);
+		len %= 4;
+		buf += rcv_len - len;
+	}
+
+	/* the rest operation */
+	for (i = 0; i < len; i++) {
+		if (!(i & 0x03))
+			data = ioread32(addr);
+
+		buf[i] = (data >> ((i & 0x03) * 8)) & 0xff;
+	}
+
+	return total_len;
+}
+
+/*
+ *		pipe setup
+ */
+static int usbhsp_possible_double_buffer(struct usbhs_pipe *pipe)
+{
+	/*
+	 * only ISO / BULK pipe can use double buffer
+	 */
+	if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_BULK) ||
+	    usbhsp_type_is(pipe, USB_ENDPOINT_XFER_ISOC))
+		return 1;
+
+	return 0;
+}
+
+static u16 usbhsp_setup_pipecfg(struct usbhs_pipe *pipe,
+				const struct usb_endpoint_descriptor *desc,
+				int is_host)
+{
+	u16 type = 0;
+	u16 bfre = 0;
+	u16 dblb = 0;
+	u16 cntmd = 0;
+	u16 dir = 0;
+	u16 epnum = 0;
+	u16 shtnak = 0;
+	u16 type_array[] = {
+		[USB_ENDPOINT_XFER_BULK] = TYPE_BULK,
+		[USB_ENDPOINT_XFER_INT]  = TYPE_INT,
+		[USB_ENDPOINT_XFER_ISOC] = TYPE_ISO,
+	};
+	int is_double = usbhsp_possible_double_buffer(pipe);
+
+	if (usbhsp_is_dcp(pipe))
+		return -EINVAL;
+
+	/*
+	 * PIPECFG
+	 *
+	 * see
+	 *  - "Register Descriptions" - "PIPECFG" register
+	 *  - "Features"  - "Pipe configuration"
+	 *  - "Operation" - "Pipe Control"
+	 */
+
+	/* TYPE */
+	type = type_array[usbhsp_type(pipe)];
+
+	/* BFRE */
+	if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_ISOC) ||
+	    usbhsp_type_is(pipe, USB_ENDPOINT_XFER_BULK))
+		bfre = 0; /* FIXME */
+
+	/* DBLB */
+	if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_ISOC) ||
+	    usbhsp_type_is(pipe, USB_ENDPOINT_XFER_BULK))
+		dblb = (is_double) ? DBLB : 0;
+
+	/* CNTMD */
+	if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_BULK))
+		cntmd = 0; /* FIXME */
+
+	/* DIR */
+	if (usb_endpoint_dir_in(desc))
+		usbhsp_flags_set(pipe, IS_DIR_IN);
+
+	if ((is_host  && usb_endpoint_dir_out(desc)) ||
+	    (!is_host && usb_endpoint_dir_in(desc)))
+		dir |= DIR_OUT;
+
+	/* SHTNAK */
+	if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_BULK) &&
+	    !dir)
+		shtnak = SHTNAK;
+
+	/* EPNUM */
+	epnum = 0xF & usb_endpoint_num(desc);
+
+	return	type	|
+		bfre	|
+		dblb	|
+		cntmd	|
+		dir	|
+		shtnak	|
+		epnum;
+}
+
+static u16 usbhsp_setup_pipemaxp(struct usbhs_pipe *pipe,
+				 const struct usb_endpoint_descriptor *desc,
+				 int is_host)
+{
+	/* host should set DEVSEL */
+
+	/* reutn MXPS */
+	return PIPE_MAXP_MASK & le16_to_cpu(desc->wMaxPacketSize);
+}
+
+static u16 usbhsp_setup_pipebuff(struct usbhs_pipe *pipe,
+				 const struct usb_endpoint_descriptor *desc,
+				 int is_host)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+	struct usbhs_pipe_info *info = usbhsp_priv_to_pipeinfo(priv);
+	struct device *dev = usbhs_priv_to_dev(priv);
+	int pipe_num = usbhs_pipe_number(pipe);
+	int is_double = usbhsp_possible_double_buffer(pipe);
+	u16 buff_size;
+	u16 bufnmb;
+	u16 bufnmb_cnt;
+
+	/*
+	 * PIPEBUF
+	 *
+	 * see
+	 *  - "Register Descriptions" - "PIPEBUF" register
+	 *  - "Features"  - "Pipe configuration"
+	 *  - "Operation" - "FIFO Buffer Memory"
+	 *  - "Operation" - "Pipe Control"
+	 *
+	 * ex) if pipe6 - pipe9 are USB_ENDPOINT_XFER_INT (SH7724)
+	 *
+	 * BUFNMB:	PIPE
+	 * 0:		pipe0 (DCP 256byte)
+	 * 1:		-
+	 * 2:		-
+	 * 3:		-
+	 * 4:		pipe6 (INT 64byte)
+	 * 5:		pipe7 (INT 64byte)
+	 * 6:		pipe8 (INT 64byte)
+	 * 7:		pipe9 (INT 64byte)
+	 * 8 - xx:	free (for BULK, ISOC)
+	 */
+
+	/*
+	 * FIXME
+	 *
+	 * it doesn't have good buffer allocator
+	 *
+	 * DCP : 256 byte
+	 * BULK: 512 byte
+	 * INT :  64 byte
+	 * ISOC: 512 byte
+	 */
+	if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_CONTROL))
+		buff_size = 256;
+	else if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_INT))
+		buff_size = 64;
+	else
+		buff_size = 512;
+
+	/* change buff_size to register value */
+	bufnmb_cnt = (buff_size / 64) - 1;
+
+	/* BUFNMB has been reserved for INT pipe
+	 * see above */
+	if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_INT)) {
+		bufnmb = pipe_num - 2;
+	} else {
+		bufnmb = info->bufnmb_last;
+		info->bufnmb_last += bufnmb_cnt + 1;
+
+		/*
+		 * double buffer
+		 */
+		if (is_double)
+			info->bufnmb_last += bufnmb_cnt + 1;
+	}
+
+	dev_dbg(dev, "pipe : %d : buff_size 0x%x: bufnmb 0x%x\n",
+		pipe_num, buff_size, bufnmb);
+
+	return	(0x1f & bufnmb_cnt)	<< 10 |
+		(0xff & bufnmb)		<<  0;
+}
+
+/*
+ *		pipe control
+ */
+int usbhs_pipe_get_maxpacket(struct usbhs_pipe *pipe)
+{
+	u16 mask = usbhsp_is_dcp(pipe) ? DCP_MAXP_MASK : PIPE_MAXP_MASK;
+
+	usbhsp_pipe_select(pipe);
+
+	return (int)(usbhsp_pipe_maxp_get(pipe) & mask);
+}
+
+int usbhs_pipe_is_dir_in(struct usbhs_pipe *pipe)
+{
+	return usbhsp_flags_has(pipe, IS_DIR_IN);
+}
+
+void usbhs_pipe_clear_sequence(struct usbhs_pipe *pipe)
+{
+	usbhsp_pipectrl_set(pipe, SQCLR, SQCLR);
+}
+
+static struct usbhs_pipe *usbhsp_get_pipe(struct usbhs_priv *priv, u32 type)
+{
+	struct usbhs_pipe *pos, *pipe;
+	int i;
+
+	/*
+	 * find target pipe
+	 */
+	pipe = NULL;
+	usbhs_for_each_pipe_with_dcp(pos, priv, i) {
+		if (!usbhsp_type_is(pos, type))
+			continue;
+		if (usbhsp_flags_has(pos, IS_USED))
+			continue;
+
+		pipe = pos;
+		break;
+	}
+
+	if (!pipe)
+		return NULL;
+
+	/*
+	 * initialize pipe flags
+	 */
+	usbhsp_flags_init(pipe);
+	usbhsp_flags_set(pipe, IS_USED);
+
+	return pipe;
+}
+
+void usbhs_pipe_init(struct usbhs_priv *priv)
+{
+	struct usbhs_pipe_info *info = usbhsp_priv_to_pipeinfo(priv);
+	struct usbhs_pipe *pipe;
+	int i;
+
+	/*
+	 * FIXME
+	 *
+	 * driver needs good allocator.
+	 *
+	 * find first free buffer area (BULK, ISOC)
+	 * (DCP, INT area is fixed)
+	 *
+	 * buffer number 0 - 3 have been reserved for DCP
+	 * see
+	 *	usbhsp_to_bufnmb
+	 */
+	info->bufnmb_last = 4;
+	usbhs_for_each_pipe_with_dcp(pipe, priv, i) {
+		if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_INT))
+			info->bufnmb_last++;
+
+		usbhsp_flags_init(pipe);
+		pipe->mod_private = NULL;
+	}
+}
+
+struct usbhs_pipe *usbhs_pipe_malloc(struct usbhs_priv *priv,
+				     const struct usb_endpoint_descriptor *desc)
+{
+	struct device *dev = usbhs_priv_to_dev(priv);
+	struct usbhs_mod *mod = usbhs_mod_get_current(priv);
+	struct usbhs_pipe *pipe;
+	int is_host = usbhs_mod_is_host(priv, mod);
+	int ret;
+	u16 pipecfg, pipebuf, pipemaxp;
+
+	pipe = usbhsp_get_pipe(priv, usb_endpoint_type(desc));
+	if (!pipe)
+		return NULL;
+
+	usbhs_fifo_disable(pipe);
+
+	/* make sure pipe is not busy */
+	ret = usbhsp_pipe_barrier(pipe);
+	if (ret < 0) {
+		dev_err(dev, "pipe setup failed %d\n", usbhs_pipe_number(pipe));
+		return NULL;
+	}
+
+	pipecfg  = usbhsp_setup_pipecfg(pipe,  desc, is_host);
+	pipebuf  = usbhsp_setup_pipebuff(pipe, desc, is_host);
+	pipemaxp = usbhsp_setup_pipemaxp(pipe, desc, is_host);
+
+	/* buffer clear
+	 * see PIPECFG :: BFRE */
+	usbhsp_pipectrl_set(pipe, ACLRM, ACLRM);
+	usbhsp_pipectrl_set(pipe, ACLRM, 0);
+
+	usbhsp_pipe_select(pipe);
+	usbhsp_pipe_cfg_set(pipe, 0xFFFF, pipecfg);
+	usbhsp_pipe_buf_set(pipe, 0xFFFF, pipebuf);
+	usbhsp_pipe_maxp_set(pipe, 0xFFFF, pipemaxp);
+
+	usbhs_pipe_clear_sequence(pipe);
+
+	dev_dbg(dev, "enable pipe %d : %s (%s)\n",
+		usbhs_pipe_number(pipe),
+		usbhsp_pipe_name[usb_endpoint_type(desc)],
+		usbhs_pipe_is_dir_in(pipe) ? "in" : "out");
+
+	return pipe;
+}
+
+/*
+ *		dcp control
+ */
+struct usbhs_pipe *usbhs_dcp_malloc(struct usbhs_priv *priv)
+{
+	struct usbhs_pipe *pipe;
+
+	pipe = usbhsp_get_pipe(priv, USB_ENDPOINT_XFER_CONTROL);
+	if (!pipe)
+		return NULL;
+
+	/*
+	 * dcpcfg  : default
+	 * dcpmaxp : default
+	 * pipebuf : nothing to do
+	 */
+
+	usbhsp_pipe_select(pipe);
+	usbhs_pipe_clear_sequence(pipe);
+
+	return pipe;
+}
+
+void usbhs_dcp_control_transfer_done(struct usbhs_pipe *pipe)
+{
+	WARN_ON(!usbhsp_is_dcp(pipe));
+
+	usbhs_fifo_enable(pipe);
+	usbhsp_pipectrl_set(pipe, CCPL, CCPL);
+}
+
+
+/*
+ *		pipe module function
+ */
+int usbhs_pipe_probe(struct usbhs_priv *priv)
+{
+	struct usbhs_pipe_info *info = usbhsp_priv_to_pipeinfo(priv);
+	struct usbhs_pipe *pipe;
+	struct device *dev = usbhs_priv_to_dev(priv);
+	u32 *pipe_type = usbhs_get_dparam(priv, pipe_type);
+	int pipe_size = usbhs_get_dparam(priv, pipe_size);
+	int i;
+
+	/* This driver expects 1st pipe is DCP */
+	if (pipe_type[0] != USB_ENDPOINT_XFER_CONTROL) {
+		dev_err(dev, "1st PIPE is not DCP\n");
+		return -EINVAL;
+	}
+
+	info->pipe = kzalloc(sizeof(struct usbhs_pipe) * pipe_size, GFP_KERNEL);
+	if (!info->pipe) {
+		dev_err(dev, "Could not allocate pipe\n");
+		return -ENOMEM;
+	}
+
+	info->size = pipe_size;
+
+	/*
+	 * init pipe
+	 */
+	usbhs_for_each_pipe_with_dcp(pipe, priv, i) {
+		pipe->priv = priv;
+		usbhsp_type(pipe) = pipe_type[i] & USB_ENDPOINT_XFERTYPE_MASK;
+
+		dev_dbg(dev, "pipe %x\t: %s\n",
+			i, usbhsp_pipe_name[pipe_type[i]]);
+	}
+
+	return 0;
+}
+
+void usbhs_pipe_remove(struct usbhs_priv *priv)
+{
+	struct usbhs_pipe_info *info = usbhsp_priv_to_pipeinfo(priv);
+
+	kfree(info->pipe);
+}
diff --git a/drivers/usb/renesas_usbhs/pipe.h b/drivers/usb/renesas_usbhs/pipe.h
new file mode 100644
index 000000000000..4a60dcef9676
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/pipe.h
@@ -0,0 +1,105 @@
+/*
+ * Renesas USB driver
+ *
+ * Copyright (C) 2011 Renesas Solutions Corp.
+ * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+#ifndef RENESAS_USB_PIPE_H
+#define RENESAS_USB_PIPE_H
+
+#include "./common.h"
+
+/*
+ *	struct
+ */
+struct usbhs_pipe {
+	u32 pipe_type;	/* USB_ENDPOINT_XFER_xxx */
+
+	struct usbhs_priv *priv;
+
+	u32 flags;
+#define USBHS_PIPE_FLAGS_IS_USED		(1 << 0)
+#define USBHS_PIPE_FLAGS_IS_DIR_IN		(1 << 1)
+
+	void *mod_private;
+};
+
+struct usbhs_pipe_info {
+	struct usbhs_pipe *pipe;
+	int size;	/* array size of "pipe" */
+	int bufnmb_last;	/* FIXME : driver needs good allocator */
+};
+
+/*
+ * pipe list
+ */
+#define __usbhs_for_each_pipe(start, pos, info, i)	\
+	for (i = start, pos = (info)->pipe;		\
+	     i < (info)->size;				\
+	     i++, pos = (info)->pipe + i)
+
+#define usbhs_for_each_pipe(pos, priv, i)			\
+	__usbhs_for_each_pipe(1, pos, &((priv)->pipe_info), i)
+
+#define usbhs_for_each_pipe_with_dcp(pos, priv, i)		\
+	__usbhs_for_each_pipe(0, pos, &((priv)->pipe_info), i)
+
+/*
+ * pipe module probe / remove
+ */
+int usbhs_pipe_probe(struct usbhs_priv *priv);
+void usbhs_pipe_remove(struct usbhs_priv *priv);
+
+/*
+ * cfifo
+ */
+int usbhs_fifo_write(struct usbhs_pipe *pipe, u8 *buf, int len);
+int usbhs_fifo_read(struct usbhs_pipe *pipe, u8 *buf, int len);
+int usbhs_fifo_prepare_write(struct usbhs_pipe *pipe);
+int usbhs_fifo_prepare_read(struct usbhs_pipe *pipe);
+
+void usbhs_fifo_enable(struct usbhs_pipe *pipe);
+void usbhs_fifo_disable(struct usbhs_pipe *pipe);
+void usbhs_fifo_stall(struct usbhs_pipe *pipe);
+
+void usbhs_fifo_send_terminator(struct usbhs_pipe *pipe);
+
+
+/*
+ * usb request
+ */
+void usbhs_usbreq_get_val(struct usbhs_priv *priv, struct usb_ctrlrequest *req);
+void usbhs_usbreq_set_val(struct usbhs_priv *priv, struct usb_ctrlrequest *req);
+
+/*
+ * pipe control
+ */
+struct usbhs_pipe
+*usbhs_pipe_malloc(struct usbhs_priv *priv,
+		   const struct usb_endpoint_descriptor *desc);
+
+int usbhs_pipe_is_dir_in(struct usbhs_pipe *pipe);
+void usbhs_pipe_init(struct usbhs_priv *priv);
+int usbhs_pipe_get_maxpacket(struct usbhs_pipe *pipe);
+void usbhs_pipe_clear_sequence(struct usbhs_pipe *pipe);
+
+#define usbhs_pipe_number(p)	(((u32)(p) - (u32)(p)->priv->pipe_info.pipe) / \
+				 sizeof(struct usbhs_pipe))
+
+/*
+ * dcp control
+ */
+struct usbhs_pipe *usbhs_dcp_malloc(struct usbhs_priv *priv);
+void usbhs_dcp_control_transfer_done(struct usbhs_pipe *pipe);
+
+#endif /* RENESAS_USB_PIPE_H */
diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h
new file mode 100644
index 000000000000..565bca3aa440
--- /dev/null
+++ b/include/linux/usb/renesas_usbhs.h
@@ -0,0 +1,149 @@
+/*
+ * Renesas USB
+ *
+ * Copyright (C) 2011 Renesas Solutions Corp.
+ * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+#ifndef RENESAS_USB_H
+#define RENESAS_USB_H
+#include <linux/platform_device.h>
+#include <linux/usb/ch9.h>
+
+/*
+ * module type
+ *
+ * it will be return value from get_id
+ */
+enum {
+	USBHS_HOST = 0,
+	USBHS_GADGET,
+	USBHS_MAX,
+};
+
+/*
+ * callback functions table for driver
+ *
+ * These functions are called from platform for driver.
+ * Callback function's pointer will be set before
+ * renesas_usbhs_platform_callback :: hardware_init was called
+ */
+struct renesas_usbhs_driver_callback {
+	int (*notify_hotplug)(struct platform_device *pdev);
+};
+
+/*
+ * callback functions for platform
+ *
+ * These functions are called from driver for platform
+ */
+struct renesas_usbhs_platform_callback {
+
+	/*
+	 * option:
+	 *
+	 * Hardware init function for platform.
+	 * it is called when driver was probed.
+	 */
+	int (*hardware_init)(struct platform_device *pdev);
+
+	/*
+	 * option:
+	 *
+	 * Hardware exit function for platform.
+	 * it is called when driver was removed
+	 */
+	void (*hardware_exit)(struct platform_device *pdev);
+
+	/*
+	 * option:
+	 *
+	 * Phy reset for platform
+	 */
+	void (*phy_reset)(struct platform_device *pdev);
+
+	/*
+	 * get USB ID function
+	 *  - USBHS_HOST
+	 *  - USBHS_GADGET
+	 */
+	int (*get_id)(struct platform_device *pdev);
+
+	/*
+	 * get VBUS status function.
+	 */
+	int (*get_vbus)(struct platform_device *pdev);
+};
+
+/*
+ * parameters for renesas usbhs
+ *
+ * some register needs USB chip specific parameters.
+ * This struct show it to driver
+ */
+struct renesas_usbhs_driver_param {
+	/*
+	 * pipe settings
+	 */
+	u32 *pipe_type; /* array of USB_ENDPOINT_XFER_xxx (from ep0) */
+	int pipe_size; /* pipe_type array size */
+
+	/*
+	 * option:
+	 *
+	 * for BUSWAIT :: BWAIT
+	 * */
+	int buswait_bwait;
+};
+
+/*
+ * option:
+ *
+ * platform information for renesas_usbhs driver.
+ */
+struct renesas_usbhs_platform_info {
+	/*
+	 * option:
+	 *
+	 * platform set these functions before
+	 * call platform_add_devices if needed
+	 */
+	struct renesas_usbhs_platform_callback	platform_callback;
+
+	/*
+	 * driver set these callback functions pointer.
+	 * platform can use it on callback functions
+	 */
+	struct renesas_usbhs_driver_callback	driver_callback;
+
+	/*
+	 * option:
+	 *
+	 * driver use these param for some register
+	 */
+	struct renesas_usbhs_driver_param	driver_param;
+};
+
+/*
+ * macro for platform
+ */
+#define renesas_usbhs_get_info(pdev)\
+	((struct renesas_usbhs_platform_info *)(pdev)->dev.platform_data)
+
+#define renesas_usbhs_call_notify_hotplug(pdev)				\
+	({								\
+		struct renesas_usbhs_driver_callback *dc;		\
+		dc = &(renesas_usbhs_get_info(pdev)->driver_callback);	\
+		if (dc)							\
+			dc->notify_hotplug(pdev);			\
+	})
+#endif /* RENESAS_USB_H */
-- 
cgit v1.2.3-71-gd317


From 3ab810f19d71f4083be44b41770bcd784ff82e51 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 1 Apr 2011 11:24:30 -0700
Subject: usb gadget: fix all Section mismatch warnings

Fix 41 occurrences of this type of Section mismatch warning
in g_mass_storage, g_serial, g_cdc, g_multi, g_nokia, g_ether, g_ffs:
(the 75 number reported earlier contained some duplicates.)

WARNING: drivers/usb/gadget/g_mass_storage.o(.text+0x687a): Section mismatch in reference from the function fsg_bind() to the function .devinit.text:usb_ep_autoconfig()
The function fsg_bind() references
the function __devinit usb_ep_autoconfig().
This is often because fsg_bind lacks a __devinit
annotation or the annotation of usb_ep_autoconfig is wrong.

Also remove __devinit from usb_ep_autoconfig_reset() to prevent
possible section mismatch problems with it.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc:	Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/gadget.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index e538172c0f64..dd1571db55e7 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -890,8 +890,8 @@ static inline void usb_free_descriptors(struct usb_descriptor_header **v)
 /* utility wrapping a simple endpoint selection policy */
 
 extern struct usb_ep *usb_ep_autoconfig(struct usb_gadget *,
-			struct usb_endpoint_descriptor *) __devinit;
+			struct usb_endpoint_descriptor *);
 
-extern void usb_ep_autoconfig_reset(struct usb_gadget *) __devinit;
+extern void usb_ep_autoconfig_reset(struct usb_gadget *);
 
 #endif /* __LINUX_USB_GADGET_H */
-- 
cgit v1.2.3-71-gd317


From c326de88b8ac7ed1cd1027017ba6079dbe91be49 Mon Sep 17 00:00:00 2001
From: "Mathieu J. Poirier" <mathieu.poirier@linaro.org>
Date: Wed, 13 Apr 2011 17:13:00 -0700
Subject: net: allow shifted access in smsc911x V2

This is a revised patch that permits a shifted access to the
LAN9221 registers.  More specifically:

 It adds a shift parameter in the platform_data.
 It introduces an ops in smsc911x_data.
 A choice of access function to use at run-time.
 Four new shifted access function.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/smsc911x.c   | 156 ++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/smsc911x.h |   1 +
 2 files changed, 150 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index b8faab7780da..c6d47d10590c 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -71,6 +71,17 @@ static int debug = 3;
 module_param(debug, int, 0);
 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 
+struct smsc911x_data;
+
+struct smsc911x_ops {
+	u32 (*reg_read)(struct smsc911x_data *pdata, u32 reg);
+	void (*reg_write)(struct smsc911x_data *pdata, u32 reg, u32 val);
+	void (*rx_readfifo)(struct smsc911x_data *pdata,
+				unsigned int *buf, unsigned int wordcount);
+	void (*tx_writefifo)(struct smsc911x_data *pdata,
+				unsigned int *buf, unsigned int wordcount);
+};
+
 struct smsc911x_data {
 	void __iomem *ioaddr;
 
@@ -118,8 +129,14 @@ struct smsc911x_data {
 	unsigned int clear_bits_mask;
 	unsigned int hashhi;
 	unsigned int hashlo;
+
+	/* register access functions */
+	const struct smsc911x_ops *ops;
 };
 
+/* Easy access to information */
+#define __smsc_shift(pdata, reg) ((reg) << ((pdata)->config.shift))
+
 static inline u32 __smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg)
 {
 	if (pdata->config.flags & SMSC911X_USE_32BIT)
@@ -133,13 +150,29 @@ static inline u32 __smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg)
 	return 0;
 }
 
+static inline u32
+__smsc911x_reg_read_shift(struct smsc911x_data *pdata, u32 reg)
+{
+	if (pdata->config.flags & SMSC911X_USE_32BIT)
+		return readl(pdata->ioaddr + __smsc_shift(pdata, reg));
+
+	if (pdata->config.flags & SMSC911X_USE_16BIT)
+		return (readw(pdata->ioaddr +
+				__smsc_shift(pdata, reg)) & 0xFFFF) |
+			((readw(pdata->ioaddr +
+			__smsc_shift(pdata, reg + 2)) & 0xFFFF) << 16);
+
+	BUG();
+	return 0;
+}
+
 static inline u32 smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg)
 {
 	u32 data;
 	unsigned long flags;
 
 	spin_lock_irqsave(&pdata->dev_lock, flags);
-	data = __smsc911x_reg_read(pdata, reg);
+	data = pdata->ops->reg_read(pdata, reg);
 	spin_unlock_irqrestore(&pdata->dev_lock, flags);
 
 	return data;
@@ -162,13 +195,32 @@ static inline void __smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg,
 	BUG();
 }
 
+static inline void
+__smsc911x_reg_write_shift(struct smsc911x_data *pdata, u32 reg, u32 val)
+{
+	if (pdata->config.flags & SMSC911X_USE_32BIT) {
+		writel(val, pdata->ioaddr + __smsc_shift(pdata, reg));
+		return;
+	}
+
+	if (pdata->config.flags & SMSC911X_USE_16BIT) {
+		writew(val & 0xFFFF,
+			pdata->ioaddr + __smsc_shift(pdata, reg));
+		writew((val >> 16) & 0xFFFF,
+			pdata->ioaddr + __smsc_shift(pdata, reg + 2));
+		return;
+	}
+
+	BUG();
+}
+
 static inline void smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg,
 				      u32 val)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&pdata->dev_lock, flags);
-	__smsc911x_reg_write(pdata, reg, val);
+	pdata->ops->reg_write(pdata, reg, val);
 	spin_unlock_irqrestore(&pdata->dev_lock, flags);
 }
 
@@ -204,6 +256,40 @@ out:
 	spin_unlock_irqrestore(&pdata->dev_lock, flags);
 }
 
+/* Writes a packet to the TX_DATA_FIFO - shifted version */
+static inline void
+smsc911x_tx_writefifo_shift(struct smsc911x_data *pdata, unsigned int *buf,
+		      unsigned int wordcount)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pdata->dev_lock, flags);
+
+	if (pdata->config.flags & SMSC911X_SWAP_FIFO) {
+		while (wordcount--)
+			__smsc911x_reg_write_shift(pdata, TX_DATA_FIFO,
+					     swab32(*buf++));
+		goto out;
+	}
+
+	if (pdata->config.flags & SMSC911X_USE_32BIT) {
+		writesl(pdata->ioaddr + __smsc_shift(pdata,
+						TX_DATA_FIFO), buf, wordcount);
+		goto out;
+	}
+
+	if (pdata->config.flags & SMSC911X_USE_16BIT) {
+		while (wordcount--)
+			__smsc911x_reg_write_shift(pdata,
+						 TX_DATA_FIFO, *buf++);
+		goto out;
+	}
+
+	BUG();
+out:
+	spin_unlock_irqrestore(&pdata->dev_lock, flags);
+}
+
 /* Reads a packet out of the RX_DATA_FIFO */
 static inline void
 smsc911x_rx_readfifo(struct smsc911x_data *pdata, unsigned int *buf,
@@ -236,6 +322,40 @@ out:
 	spin_unlock_irqrestore(&pdata->dev_lock, flags);
 }
 
+/* Reads a packet out of the RX_DATA_FIFO - shifted version */
+static inline void
+smsc911x_rx_readfifo_shift(struct smsc911x_data *pdata, unsigned int *buf,
+		     unsigned int wordcount)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pdata->dev_lock, flags);
+
+	if (pdata->config.flags & SMSC911X_SWAP_FIFO) {
+		while (wordcount--)
+			*buf++ = swab32(__smsc911x_reg_read_shift(pdata,
+							    RX_DATA_FIFO));
+		goto out;
+	}
+
+	if (pdata->config.flags & SMSC911X_USE_32BIT) {
+		readsl(pdata->ioaddr + __smsc_shift(pdata,
+						RX_DATA_FIFO), buf, wordcount);
+		goto out;
+	}
+
+	if (pdata->config.flags & SMSC911X_USE_16BIT) {
+		while (wordcount--)
+			*buf++ = __smsc911x_reg_read_shift(pdata,
+								RX_DATA_FIFO);
+		goto out;
+	}
+
+	BUG();
+out:
+	spin_unlock_irqrestore(&pdata->dev_lock, flags);
+}
+
 /* waits for MAC not busy, with timeout.  Only called by smsc911x_mac_read
  * and smsc911x_mac_write, so assumes mac_lock is held */
 static int smsc911x_mac_complete(struct smsc911x_data *pdata)
@@ -500,7 +620,7 @@ static int smsc911x_phy_check_loopbackpkt(struct smsc911x_data *pdata)
 		wrsz += (u32)((ulong)pdata->loopback_tx_pkt & 0x3);
 		wrsz >>= 2;
 
-		smsc911x_tx_writefifo(pdata, (unsigned int *)bufp, wrsz);
+		pdata->ops->tx_writefifo(pdata, (unsigned int *)bufp, wrsz);
 
 		/* Wait till transmit is done */
 		i = 60;
@@ -544,7 +664,7 @@ static int smsc911x_phy_check_loopbackpkt(struct smsc911x_data *pdata)
 		rdsz += (u32)((ulong)pdata->loopback_rx_pkt & 0x3);
 		rdsz >>= 2;
 
-		smsc911x_rx_readfifo(pdata, (unsigned int *)bufp, rdsz);
+		pdata->ops->rx_readfifo(pdata, (unsigned int *)bufp, rdsz);
 
 		if (pktlength != (MIN_PACKET_SIZE + 4)) {
 			SMSC_WARN(pdata, hw, "Unexpected packet size "
@@ -1046,8 +1166,8 @@ static int smsc911x_poll(struct napi_struct *napi, int budget)
 		/* Align IP on 16B boundary */
 		skb_reserve(skb, NET_IP_ALIGN);
 		skb_put(skb, pktlength - 4);
-		smsc911x_rx_readfifo(pdata, (unsigned int *)skb->head,
-				     pktwords);
+		pdata->ops->rx_readfifo(pdata,
+				 (unsigned int *)skb->head, pktwords);
 		skb->protocol = eth_type_trans(skb, dev);
 		skb_checksum_none_assert(skb);
 		netif_receive_skb(skb);
@@ -1351,7 +1471,7 @@ static int smsc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	wrsz += (u32)((ulong)skb->data & 0x3);
 	wrsz >>= 2;
 
-	smsc911x_tx_writefifo(pdata, (unsigned int *)bufp, wrsz);
+	pdata->ops->tx_writefifo(pdata, (unsigned int *)bufp, wrsz);
 	freespace -= (skb->len + 32);
 	dev_kfree_skb(skb);
 
@@ -1957,6 +2077,22 @@ static int __devexit smsc911x_drv_remove(struct platform_device *pdev)
 	return 0;
 }
 
+/* standard register acces */
+static const struct smsc911x_ops standard_smsc911x_ops = {
+	.reg_read = __smsc911x_reg_read,
+	.reg_write = __smsc911x_reg_write,
+	.rx_readfifo = smsc911x_rx_readfifo,
+	.tx_writefifo = smsc911x_tx_writefifo,
+};
+
+/* shifted register access */
+static const struct smsc911x_ops shifted_smsc911x_ops = {
+	.reg_read = __smsc911x_reg_read_shift,
+	.reg_write = __smsc911x_reg_write_shift,
+	.rx_readfifo = smsc911x_rx_readfifo_shift,
+	.tx_writefifo = smsc911x_tx_writefifo_shift,
+};
+
 static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 {
 	struct net_device *dev;
@@ -2026,6 +2162,12 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 		goto out_free_netdev_2;
 	}
 
+	/* assume standard, non-shifted, access to HW registers */
+	pdata->ops = &standard_smsc911x_ops;
+	/* apply the right access if shifting is needed */
+	if (config->shift)
+		pdata->ops = &shifted_smsc911x_ops;
+
 	retval = smsc911x_init(dev);
 	if (retval < 0)
 		goto out_unmap_io_3;
diff --git a/include/linux/smsc911x.h b/include/linux/smsc911x.h
index 7144e8aa1e41..4dde70e74822 100644
--- a/include/linux/smsc911x.h
+++ b/include/linux/smsc911x.h
@@ -29,6 +29,7 @@ struct smsc911x_platform_config {
 	unsigned int irq_polarity;
 	unsigned int irq_type;
 	unsigned int flags;
+	unsigned int shift;
 	phy_interface_t phy_interface;
 	unsigned char mac[6];
 };
-- 
cgit v1.2.3-71-gd317


From 184748cc50b2dceb8287f9fb657eda48ff8fcfe7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Apr 2011 17:23:39 +0200
Subject: sched: Provide scheduler_ipi() callback in response to
 smp_send_reschedule()

For future rework of try_to_wake_up() we'd like to push part of that
function onto the CPU the task is actually going to run on.

In order to do so we need a generic callback from the existing scheduler IPI.

This patch introduces such a generic callback: scheduler_ipi() and
implements it as a NOP.

BenH notes: PowerPC might use this IPI on offline CPUs under rare conditions!

Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110405152728.744338123@chello.nl
---
 arch/alpha/kernel/smp.c             |  3 +--
 arch/arm/kernel/smp.c               |  5 +----
 arch/blackfin/mach-common/smp.c     |  3 +++
 arch/cris/arch-v32/kernel/smp.c     | 13 ++++++++-----
 arch/ia64/kernel/irq_ia64.c         |  2 ++
 arch/ia64/xen/irq_xen.c             | 10 +++++++++-
 arch/m32r/kernel/smp.c              |  4 +---
 arch/mips/cavium-octeon/smp.c       |  2 ++
 arch/mips/kernel/smtc.c             |  2 +-
 arch/mips/mti-malta/malta-int.c     |  2 ++
 arch/mips/pmc-sierra/yosemite/smp.c |  4 ++++
 arch/mips/sgi-ip27/ip27-irq.c       |  2 ++
 arch/mips/sibyte/bcm1480/smp.c      |  7 +++----
 arch/mips/sibyte/sb1250/smp.c       |  7 +++----
 arch/mn10300/kernel/smp.c           |  5 +----
 arch/parisc/kernel/smp.c            |  5 +----
 arch/powerpc/kernel/smp.c           |  4 ++--
 arch/s390/kernel/smp.c              |  6 +++---
 arch/sh/kernel/smp.c                |  2 ++
 arch/sparc/kernel/smp_32.c          |  4 +++-
 arch/sparc/kernel/smp_64.c          |  1 +
 arch/tile/kernel/smp.c              |  6 +-----
 arch/um/kernel/smp.c                |  2 +-
 arch/x86/kernel/smp.c               |  5 ++---
 arch/x86/xen/smp.c                  |  5 ++---
 include/linux/sched.h               |  2 ++
 26 files changed, 63 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 42aa078a5e4d..5a621c6d22ab 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -585,8 +585,7 @@ handle_ipi(struct pt_regs *regs)
 
 		switch (which) {
 		case IPI_RESCHEDULE:
-			/* Reschedule callback.  Everything to be done
-			   is done by the interrupt return path.  */
+			scheduler_ipi();
 			break;
 
 		case IPI_CALL_FUNC:
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 8fe05ad932e4..7a561eb731ea 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -560,10 +560,7 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
 		break;
 
 	case IPI_RESCHEDULE:
-		/*
-		 * nothing more to do - eveything is
-		 * done on the interrupt return path
-		 */
+		scheduler_ipi();
 		break;
 
 	case IPI_CALL_FUNC:
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 6e17a265c4d3..326bb86f4d29 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -164,6 +164,9 @@ static irqreturn_t ipi_handler_int1(int irq, void *dev_instance)
 	while (msg_queue->count) {
 		msg = &msg_queue->ipi_message[msg_queue->head];
 		switch (msg->type) {
+		case BFIN_IPI_RESCHEDULE:
+			scheduler_ipi();
+			break;
 		case BFIN_IPI_CALL_FUNC:
 			spin_unlock_irqrestore(&msg_queue->lock, flags);
 			ipi_call_function(cpu, msg);
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 4c9e3e1ba5d1..66cc75657e2f 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -342,15 +342,18 @@ irqreturn_t crisv32_ipi_interrupt(int irq, void *dev_id)
 
 	ipi = REG_RD(intr_vect, irq_regs[smp_processor_id()], rw_ipi);
 
+	if (ipi.vector & IPI_SCHEDULE) {
+		scheduler_ipi();
+	}
 	if (ipi.vector & IPI_CALL) {
-	         func(info);
+		func(info);
 	}
 	if (ipi.vector & IPI_FLUSH_TLB) {
-		     if (flush_mm == FLUSH_ALL)
-			 __flush_tlb_all();
-		     else if (flush_vma == FLUSH_ALL)
+		if (flush_mm == FLUSH_ALL)
+			__flush_tlb_all();
+		else if (flush_vma == FLUSH_ALL)
 			__flush_tlb_mm(flush_mm);
-		     else
+		else
 			__flush_tlb_page(flush_vma, flush_addr);
 	}
 
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 5b704740f160..782c3a357f24 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -31,6 +31,7 @@
 #include <linux/irq.h>
 #include <linux/ratelimit.h>
 #include <linux/acpi.h>
+#include <linux/sched.h>
 
 #include <asm/delay.h>
 #include <asm/intrinsics.h>
@@ -496,6 +497,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 			smp_local_flush_tlb();
 			kstat_incr_irqs_this_cpu(irq, desc);
 		} else if (unlikely(IS_RESCHEDULE(vector))) {
+			scheduler_ipi();
 			kstat_incr_irqs_this_cpu(irq, desc);
 		} else {
 			ia64_setreg(_IA64_REG_CR_TPR, vector);
diff --git a/arch/ia64/xen/irq_xen.c b/arch/ia64/xen/irq_xen.c
index 108bb858acf2..b279e142c633 100644
--- a/arch/ia64/xen/irq_xen.c
+++ b/arch/ia64/xen/irq_xen.c
@@ -92,6 +92,8 @@ static unsigned short saved_irq_cnt;
 static int xen_slab_ready;
 
 #ifdef CONFIG_SMP
+#include <linux/sched.h>
+
 /* Dummy stub. Though we may check XEN_RESCHEDULE_VECTOR before __do_IRQ,
  * it ends up to issue several memory accesses upon percpu data and
  * thus adds unnecessary traffic to other paths.
@@ -99,7 +101,13 @@ static int xen_slab_ready;
 static irqreturn_t
 xen_dummy_handler(int irq, void *dev_id)
 {
+	return IRQ_HANDLED;
+}
 
+static irqreturn_t
+xen_resched_handler(int irq, void *dev_id)
+{
+	scheduler_ipi();
 	return IRQ_HANDLED;
 }
 
@@ -110,7 +118,7 @@ static struct irqaction xen_ipi_irqaction = {
 };
 
 static struct irqaction xen_resched_irqaction = {
-	.handler =	xen_dummy_handler,
+	.handler =	xen_resched_handler,
 	.flags =	IRQF_DISABLED,
 	.name =		"resched"
 };
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index 31cef20b2996..fc10b39893d4 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -122,8 +122,6 @@ void smp_send_reschedule(int cpu_id)
  *
  * Description:  This routine executes on CPU which received
  *               'RESCHEDULE_IPI'.
- *               Rescheduling is processed at the exit of interrupt
- *               operation.
  *
  * Born on Date: 2002.02.05
  *
@@ -138,7 +136,7 @@ void smp_send_reschedule(int cpu_id)
  *==========================================================================*/
 void smp_reschedule_interrupt(void)
 {
-	/* nothing to do */
+	scheduler_ipi();
 }
 
 /*==========================================================================*
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index ba78b21cc8d0..76923eeb58b9 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -44,6 +44,8 @@ static irqreturn_t mailbox_interrupt(int irq, void *dev_id)
 
 	if (action & SMP_CALL_FUNCTION)
 		smp_call_function_interrupt();
+	if (action & SMP_RESCHEDULE_YOURSELF)
+		scheduler_ipi();
 
 	/* Check if we've been told to flush the icache */
 	if (action & SMP_ICACHE_FLUSH)
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 5a88cc4ccd5a..cedac4633741 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -929,7 +929,7 @@ static void post_direct_ipi(int cpu, struct smtc_ipi *pipi)
 
 static void ipi_resched_interrupt(void)
 {
-	/* Return from interrupt should be enough to cause scheduler check */
+	scheduler_ipi();
 }
 
 static void ipi_call_interrupt(void)
diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c
index 9027061f0ead..7d93e6fbfa5a 100644
--- a/arch/mips/mti-malta/malta-int.c
+++ b/arch/mips/mti-malta/malta-int.c
@@ -309,6 +309,8 @@ static void ipi_call_dispatch(void)
 
 static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
 {
+	scheduler_ipi();
+
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c
index efc9e889b349..2608752898c0 100644
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -55,6 +55,8 @@ void titan_mailbox_irq(void)
 
 		if (status & 0x2)
 			smp_call_function_interrupt();
+		if (status & 0x4)
+			scheduler_ipi();
 		break;
 
 	case 1:
@@ -63,6 +65,8 @@ void titan_mailbox_irq(void)
 
 		if (status & 0x2)
 			smp_call_function_interrupt();
+		if (status & 0x4)
+			scheduler_ipi();
 		break;
 	}
 }
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 0a04603d577c..b18b04e48577 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -147,8 +147,10 @@ static void ip27_do_irq_mask0(void)
 #ifdef CONFIG_SMP
 	if (pend0 & (1UL << CPU_RESCHED_A_IRQ)) {
 		LOCAL_HUB_CLR_INTR(CPU_RESCHED_A_IRQ);
+		scheduler_ipi();
 	} else if (pend0 & (1UL << CPU_RESCHED_B_IRQ)) {
 		LOCAL_HUB_CLR_INTR(CPU_RESCHED_B_IRQ);
+		scheduler_ipi();
 	} else if (pend0 & (1UL << CPU_CALL_A_IRQ)) {
 		LOCAL_HUB_CLR_INTR(CPU_CALL_A_IRQ);
 		smp_call_function_interrupt();
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c
index 47b347c992ea..d667875be564 100644
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -20,6 +20,7 @@
 #include <linux/delay.h>
 #include <linux/smp.h>
 #include <linux/kernel_stat.h>
+#include <linux/sched.h>
 
 #include <asm/mmu_context.h>
 #include <asm/io.h>
@@ -189,10 +190,8 @@ void bcm1480_mailbox_interrupt(void)
 	/* Clear the mailbox to clear the interrupt */
 	__raw_writeq(((u64)action)<<48, mailbox_0_clear_regs[cpu]);
 
-	/*
-	 * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the
-	 * interrupt will do the reschedule for us
-	 */
+	if (action & SMP_RESCHEDULE_YOURSELF)
+		scheduler_ipi();
 
 	if (action & SMP_CALL_FUNCTION)
 		smp_call_function_interrupt();
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c
index c00a5cb1128d..38e7f6bd7922 100644
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -21,6 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 #include <linux/kernel_stat.h>
+#include <linux/sched.h>
 
 #include <asm/mmu_context.h>
 #include <asm/io.h>
@@ -177,10 +178,8 @@ void sb1250_mailbox_interrupt(void)
 	/* Clear the mailbox to clear the interrupt */
 	____raw_writeq(((u64)action) << 48, mailbox_clear_regs[cpu]);
 
-	/*
-	 * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the
-	 * interrupt will do the reschedule for us
-	 */
+	if (action & SMP_RESCHEDULE_YOURSELF)
+		scheduler_ipi();
 
 	if (action & SMP_CALL_FUNCTION)
 		smp_call_function_interrupt();
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index 226c826a2194..83fb27912231 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -494,14 +494,11 @@ void smp_send_stop(void)
  * @irq: The interrupt number.
  * @dev_id: The device ID.
  *
- * We need do nothing here, since the scheduling will be effected on our way
- * back through entry.S.
- *
  * Returns IRQ_HANDLED to indicate we handled the interrupt successfully.
  */
 static irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
 {
-	/* do nothing */
+	scheduler_ipi();
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 69d63d354ef0..828305f19cff 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -155,10 +155,7 @@ ipi_interrupt(int irq, void *dev_id)
 				
 			case IPI_RESCHEDULE:
 				smp_debug(100, KERN_DEBUG "CPU%d IPI_RESCHEDULE\n", this_cpu);
-				/*
-				 * Reschedule callback.  Everything to be
-				 * done is done by the interrupt return path.
-				 */
+				scheduler_ipi();
 				break;
 
 			case IPI_CALL_FUNC:
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index cbdbb14be4b0..9f9c204bef69 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -116,7 +116,7 @@ void smp_message_recv(int msg)
 		generic_smp_call_function_interrupt();
 		break;
 	case PPC_MSG_RESCHEDULE:
-		/* we notice need_resched on exit */
+		scheduler_ipi();
 		break;
 	case PPC_MSG_CALL_FUNC_SINGLE:
 		generic_smp_call_function_single_interrupt();
@@ -146,7 +146,7 @@ static irqreturn_t call_function_action(int irq, void *data)
 
 static irqreturn_t reschedule_action(int irq, void *data)
 {
-	/* we just need the return path side effect of checking need_resched */
+	scheduler_ipi();
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 63a97db83f96..63c7d9ff220d 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -165,12 +165,12 @@ static void do_ext_call_interrupt(unsigned int ext_int_code,
 	kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++;
 	/*
 	 * handle bit signal external calls
-	 *
-	 * For the ec_schedule signal we have to do nothing. All the work
-	 * is done automatically when we return from the interrupt.
 	 */
 	bits = xchg(&S390_lowcore.ext_call_fast, 0);
 
+	if (test_bit(ec_schedule, &bits))
+		scheduler_ipi();
+
 	if (test_bit(ec_call_function, &bits))
 		generic_smp_call_function_interrupt();
 
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 509b36b45115..6207561ea34a 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 #include <asm/atomic.h>
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -323,6 +324,7 @@ void smp_message_recv(unsigned int msg)
 		generic_smp_call_function_interrupt();
 		break;
 	case SMP_MSG_RESCHEDULE:
+		scheduler_ipi();
 		break;
 	case SMP_MSG_FUNCTION_SINGLE:
 		generic_smp_call_function_single_interrupt();
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index 91c10fb70858..f95690c167b6 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -125,7 +125,9 @@ struct linux_prom_registers smp_penguin_ctable __cpuinitdata = { 0 };
 
 void smp_send_reschedule(int cpu)
 {
-	/* See sparc64 */
+	/*
+	 * XXX missing reschedule IPI, see scheduler_ipi()
+	 */
 }
 
 void smp_send_stop(void)
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 3e94a8c23238..9478da7fdb3e 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1368,6 +1368,7 @@ void smp_send_reschedule(int cpu)
 void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
 {
 	clear_softint(1 << irq);
+	scheduler_ipi();
 }
 
 /* This is a nop because we capture all other cpus
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
index a4293102ef81..c52224d5ed45 100644
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -189,12 +189,8 @@ void flush_icache_range(unsigned long start, unsigned long end)
 /* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */
 static irqreturn_t handle_reschedule_ipi(int irq, void *token)
 {
-	/*
-	 * Nothing to do here; when we return from interrupt, the
-	 * rescheduling will occur there. But do bump the interrupt
-	 * profiler count in the meantime.
-	 */
 	__get_cpu_var(irq_stat).irq_resched_count++;
+	scheduler_ipi();
 
 	return IRQ_HANDLED;
 }
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index 106bf27e2a9a..eefb107d2d73 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -173,7 +173,7 @@ void IPI_handler(int cpu)
 			break;
 
 		case 'R':
-			set_tsk_need_resched(current);
+			scheduler_ipi();
 			break;
 
 		case 'S':
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 513deac7228d..013e7eba83bb 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -194,14 +194,13 @@ static void native_stop_other_cpus(int wait)
 }
 
 /*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
+ * Reschedule call back.
  */
 void smp_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_APIC_irq();
 	inc_irq_stat(irq_resched_count);
+	scheduler_ipi();
 	/*
 	 * KVM uses this interrupt to force a cpu out of guest mode
 	 */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 30612441ed99..762b46ab14d5 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -46,13 +46,12 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
 
 /*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
+ * Reschedule call back.
  */
 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
 {
 	inc_irq_stat(irq_resched_count);
+	scheduler_ipi();
 
 	return IRQ_HANDLED;
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4ec2c027e92c..758e27afcda5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2189,8 +2189,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
+static inline void scheduler_ipi(void) { }
 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
+static inline void scheduler_ipi(void) { }
 static inline unsigned long wait_task_inactive(struct task_struct *p,
 					       long match_state)
 {
-- 
cgit v1.2.3-71-gd317


From 3ca7a440da394808571dad32d33d3bc0389982e6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Apr 2011 17:23:40 +0200
Subject: sched: Always provide p->on_cpu

Always provide p->on_cpu so that we can determine if its on a cpu
without having to lock the rq.

Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110405152728.785452014@chello.nl
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  4 +---
 kernel/sched.c        | 46 +++++++++++++++++++++++++++++-----------------
 2 files changed, 30 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 758e27afcda5..3435837e89ff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1200,9 +1200,7 @@ struct task_struct {
 	int lock_depth;		/* BKL lock depth */
 
 #ifdef CONFIG_SMP
-#ifdef __ARCH_WANT_UNLOCKED_CTXSW
-	int oncpu;
-#endif
+	int on_cpu;
 #endif
 
 	int prio, static_prio, normal_prio;
diff --git a/kernel/sched.c b/kernel/sched.c
index a187c3fe027b..cd2593e1a3ec 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -838,18 +838,39 @@ static inline int task_current(struct rq *rq, struct task_struct *p)
 	return rq->curr == p;
 }
 
-#ifndef __ARCH_WANT_UNLOCKED_CTXSW
 static inline int task_running(struct rq *rq, struct task_struct *p)
 {
+#ifdef CONFIG_SMP
+	return p->on_cpu;
+#else
 	return task_current(rq, p);
+#endif
 }
 
+#ifndef __ARCH_WANT_UNLOCKED_CTXSW
 static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
 {
+#ifdef CONFIG_SMP
+	/*
+	 * We can optimise this out completely for !SMP, because the
+	 * SMP rebalancing from interrupt is the only thing that cares
+	 * here.
+	 */
+	next->on_cpu = 1;
+#endif
 }
 
 static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 {
+#ifdef CONFIG_SMP
+	/*
+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
+	 * We must ensure this doesn't happen until the switch is completely
+	 * finished.
+	 */
+	smp_wmb();
+	prev->on_cpu = 0;
+#endif
 #ifdef CONFIG_DEBUG_SPINLOCK
 	/* this is a valid case when another task releases the spinlock */
 	rq->lock.owner = current;
@@ -865,15 +886,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 }
 
 #else /* __ARCH_WANT_UNLOCKED_CTXSW */
-static inline int task_running(struct rq *rq, struct task_struct *p)
-{
-#ifdef CONFIG_SMP
-	return p->oncpu;
-#else
-	return task_current(rq, p);
-#endif
-}
-
 static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
 {
 #ifdef CONFIG_SMP
@@ -882,7 +894,7 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
 	 * SMP rebalancing from interrupt is the only thing that cares
 	 * here.
 	 */
-	next->oncpu = 1;
+	next->on_cpu = 1;
 #endif
 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 	raw_spin_unlock_irq(&rq->lock);
@@ -895,12 +907,12 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 {
 #ifdef CONFIG_SMP
 	/*
-	 * After ->oncpu is cleared, the task can be moved to a different CPU.
+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
 	 * We must ensure this doesn't happen until the switch is completely
 	 * finished.
 	 */
 	smp_wmb();
-	prev->oncpu = 0;
+	prev->on_cpu = 0;
 #endif
 #ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 	local_irq_enable();
@@ -2686,8 +2698,8 @@ void sched_fork(struct task_struct *p, int clone_flags)
 	if (likely(sched_info_on()))
 		memset(&p->sched_info, 0, sizeof(p->sched_info));
 #endif
-#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
-	p->oncpu = 0;
+#if defined(CONFIG_SMP)
+	p->on_cpu = 0;
 #endif
 #ifdef CONFIG_PREEMPT
 	/* Want to start with kernel preemption disabled. */
@@ -5776,8 +5788,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	rcu_read_unlock();
 
 	rq->curr = rq->idle = idle;
-#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
-	idle->oncpu = 1;
+#if defined(CONFIG_SMP)
+	idle->on_cpu = 1;
 #endif
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 
-- 
cgit v1.2.3-71-gd317


From c6eb3dda25892f1f974f5420f63e6721aab02f6f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Apr 2011 17:23:41 +0200
Subject: mutex: Use p->on_cpu for the adaptive spin

Since we now have p->on_cpu unconditionally available, use it to
re-implement mutex_spin_on_owner.

Requested-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110405152728.826338173@chello.nl
---
 include/linux/mutex.h |  2 +-
 include/linux/sched.h |  2 +-
 kernel/mutex-debug.c  |  2 +-
 kernel/mutex-debug.h  |  2 +-
 kernel/mutex.c        |  2 +-
 kernel/mutex.h        |  2 +-
 kernel/sched.c        | 83 ++++++++++++++++++++-------------------------------
 7 files changed, 39 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 94b48bd40dd7..c75471db576e 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -51,7 +51,7 @@ struct mutex {
 	spinlock_t		wait_lock;
 	struct list_head	wait_list;
 #if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
-	struct thread_info	*owner;
+	struct task_struct	*owner;
 #endif
 #ifdef CONFIG_DEBUG_MUTEXES
 	const char 		*name;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3435837e89ff..173850479e2c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -360,7 +360,7 @@ extern signed long schedule_timeout_interruptible(signed long timeout);
 extern signed long schedule_timeout_killable(signed long timeout);
 extern signed long schedule_timeout_uninterruptible(signed long timeout);
 asmlinkage void schedule(void);
-extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner);
+extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
 
 struct nsproxy;
 struct user_namespace;
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index ec815a960b5d..73da83aff418 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -75,7 +75,7 @@ void debug_mutex_unlock(struct mutex *lock)
 		return;
 
 	DEBUG_LOCKS_WARN_ON(lock->magic != lock);
-	DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
+	DEBUG_LOCKS_WARN_ON(lock->owner != current);
 	DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
 	mutex_clear_owner(lock);
 }
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index 57d527a16f9d..0799fd3e4cfa 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -29,7 +29,7 @@ extern void debug_mutex_init(struct mutex *lock, const char *name,
 
 static inline void mutex_set_owner(struct mutex *lock)
 {
-	lock->owner = current_thread_info();
+	lock->owner = current;
 }
 
 static inline void mutex_clear_owner(struct mutex *lock)
diff --git a/kernel/mutex.c b/kernel/mutex.c
index c4195fa98900..fe4706cb0c5b 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -160,7 +160,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	 */
 
 	for (;;) {
-		struct thread_info *owner;
+		struct task_struct *owner;
 
 		/*
 		 * If we own the BKL, then don't spin. The owner of
diff --git a/kernel/mutex.h b/kernel/mutex.h
index 67578ca48f94..4115fbf83b12 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -19,7 +19,7 @@
 #ifdef CONFIG_SMP
 static inline void mutex_set_owner(struct mutex *lock)
 {
-	lock->owner = current_thread_info();
+	lock->owner = current;
 }
 
 static inline void mutex_clear_owner(struct mutex *lock)
diff --git a/kernel/sched.c b/kernel/sched.c
index cd2593e1a3ec..55cc50323ce1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4173,70 +4173,53 @@ need_resched:
 EXPORT_SYMBOL(schedule);
 
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-/*
- * Look out! "owner" is an entirely speculative pointer
- * access and not reliable.
- */
-int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
-{
-	unsigned int cpu;
-	struct rq *rq;
 
-	if (!sched_feat(OWNER_SPIN))
-		return 0;
+static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
+{
+	bool ret = false;
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
-	/*
-	 * Need to access the cpu field knowing that
-	 * DEBUG_PAGEALLOC could have unmapped it if
-	 * the mutex owner just released it and exited.
-	 */
-	if (probe_kernel_address(&owner->cpu, cpu))
-		return 0;
-#else
-	cpu = owner->cpu;
-#endif
+	rcu_read_lock();
+	if (lock->owner != owner)
+		goto fail;
 
 	/*
-	 * Even if the access succeeded (likely case),
-	 * the cpu field may no longer be valid.
+	 * Ensure we emit the owner->on_cpu, dereference _after_ checking
+	 * lock->owner still matches owner, if that fails, owner might
+	 * point to free()d memory, if it still matches, the rcu_read_lock()
+	 * ensures the memory stays valid.
 	 */
-	if (cpu >= nr_cpumask_bits)
-		return 0;
+	barrier();
 
-	/*
-	 * We need to validate that we can do a
-	 * get_cpu() and that we have the percpu area.
-	 */
-	if (!cpu_online(cpu))
-		return 0;
+	ret = owner->on_cpu;
+fail:
+	rcu_read_unlock();
 
-	rq = cpu_rq(cpu);
+	return ret;
+}
 
-	for (;;) {
-		/*
-		 * Owner changed, break to re-assess state.
-		 */
-		if (lock->owner != owner) {
-			/*
-			 * If the lock has switched to a different owner,
-			 * we likely have heavy contention. Return 0 to quit
-			 * optimistic spinning and not contend further:
-			 */
-			if (lock->owner)
-				return 0;
-			break;
-		}
+/*
+ * Look out! "owner" is an entirely speculative pointer
+ * access and not reliable.
+ */
+int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
+{
+	if (!sched_feat(OWNER_SPIN))
+		return 0;
 
-		/*
-		 * Is that owner really running on that cpu?
-		 */
-		if (task_thread_info(rq->curr) != owner || need_resched())
+	while (owner_running(lock, owner)) {
+		if (need_resched())
 			return 0;
 
 		arch_mutex_cpu_relax();
 	}
 
+	/*
+	 * If the owner changed to another task there is likely
+	 * heavy contention, stop spinning.
+	 */
+	if (lock->owner)
+		return 0;
+
 	return 1;
 }
 #endif
-- 
cgit v1.2.3-71-gd317


From fd2f4419b4cbe8fe90796df9617c355762afd6a4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Apr 2011 17:23:44 +0200
Subject: sched: Provide p->on_rq

Provide a generic p->on_rq because the p->se.on_rq semantics are
unfavourable for lockless wakeups but needed for sched_fair.

In particular, p->on_rq is only cleared when we actually dequeue the
task in schedule() and not on any random dequeue as done by things
like __migrate_task() and __sched_setscheduler().

This also allows us to remove p->se usage from !sched_fair code.

Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110405152728.949545047@chello.nl
---
 include/linux/sched.h   |  1 +
 kernel/sched.c          | 38 ++++++++++++++++++++------------------
 kernel/sched_debug.c    |  2 +-
 kernel/sched_rt.c       | 16 ++++++++--------
 kernel/sched_stoptask.c |  2 +-
 5 files changed, 31 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 173850479e2c..b33a700652dc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1202,6 +1202,7 @@ struct task_struct {
 #ifdef CONFIG_SMP
 	int on_cpu;
 #endif
+	int on_rq;
 
 	int prio, static_prio, normal_prio;
 	unsigned int rt_priority;
diff --git a/kernel/sched.c b/kernel/sched.c
index 4481638f9178..dece28e505c9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1785,7 +1785,6 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
 	update_rq_clock(rq);
 	sched_info_queued(p);
 	p->sched_class->enqueue_task(rq, p, flags);
-	p->se.on_rq = 1;
 }
 
 static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -1793,7 +1792,6 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
 	update_rq_clock(rq);
 	sched_info_dequeued(p);
 	p->sched_class->dequeue_task(rq, p, flags);
-	p->se.on_rq = 0;
 }
 
 /*
@@ -2128,7 +2126,7 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 	 * A queue event has occurred, and we're going to schedule.  In
 	 * this case, we can save a useless back to back clock update.
 	 */
-	if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr))
+	if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
 		rq->skip_clock_update = 1;
 }
 
@@ -2203,7 +2201,7 @@ static bool migrate_task(struct task_struct *p, struct rq *rq)
 	 * If the task is not on a runqueue (and not running), then
 	 * the next wake-up will properly place the task.
 	 */
-	return p->se.on_rq || task_running(rq, p);
+	return p->on_rq || task_running(rq, p);
 }
 
 /*
@@ -2263,7 +2261,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 		rq = task_rq_lock(p, &flags);
 		trace_sched_wait_task(p);
 		running = task_running(rq, p);
-		on_rq = p->se.on_rq;
+		on_rq = p->on_rq;
 		ncsw = 0;
 		if (!match_state || p->state == match_state)
 			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
@@ -2444,6 +2442,7 @@ ttwu_stat(struct rq *rq, struct task_struct *p, int cpu, int wake_flags)
 static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
 {
 	activate_task(rq, p, en_flags);
+	p->on_rq = 1;
 
 	/* if a worker is waking up, notify workqueue */
 	if (p->flags & PF_WQ_WORKER)
@@ -2506,7 +2505,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 
 	cpu = task_cpu(p);
 
-	if (p->se.on_rq)
+	if (p->on_rq)
 		goto out_running;
 
 	orig_cpu = cpu;
@@ -2583,7 +2582,7 @@ static void try_to_wake_up_local(struct task_struct *p)
 	if (!(p->state & TASK_NORMAL))
 		return;
 
-	if (!p->se.on_rq)
+	if (!p->on_rq)
 		ttwu_activate(rq, p, ENQUEUE_WAKEUP);
 
 	ttwu_post_activation(p, rq, 0);
@@ -2620,19 +2619,21 @@ int wake_up_state(struct task_struct *p, unsigned int state)
  */
 static void __sched_fork(struct task_struct *p)
 {
+	p->on_rq			= 0;
+
+	p->se.on_rq			= 0;
 	p->se.exec_start		= 0;
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
 	p->se.nr_migrations		= 0;
 	p->se.vruntime			= 0;
+	INIT_LIST_HEAD(&p->se.group_node);
 
 #ifdef CONFIG_SCHEDSTATS
 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
 #endif
 
 	INIT_LIST_HEAD(&p->rt.run_list);
-	p->se.on_rq = 0;
-	INIT_LIST_HEAD(&p->se.group_node);
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 	INIT_HLIST_HEAD(&p->preempt_notifiers);
@@ -2750,6 +2751,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 
 	rq = task_rq_lock(p, &flags);
 	activate_task(rq, p, 0);
+	p->on_rq = 1;
 	trace_sched_wakeup_new(p, true);
 	check_preempt_curr(rq, p, WF_FORK);
 #ifdef CONFIG_SMP
@@ -4051,7 +4053,7 @@ static inline void schedule_debug(struct task_struct *prev)
 
 static void put_prev_task(struct rq *rq, struct task_struct *prev)
 {
-	if (prev->se.on_rq)
+	if (prev->on_rq)
 		update_rq_clock(rq);
 	prev->sched_class->put_prev_task(rq, prev);
 }
@@ -4126,7 +4128,9 @@ need_resched:
 				if (to_wakeup)
 					try_to_wake_up_local(to_wakeup);
 			}
+
 			deactivate_task(rq, prev, DEQUEUE_SLEEP);
+			prev->on_rq = 0;
 
 			/*
 			 * If we are going to sleep and we have plugged IO queued, make
@@ -4695,7 +4699,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	trace_sched_pi_setprio(p, prio);
 	oldprio = p->prio;
 	prev_class = p->sched_class;
-	on_rq = p->se.on_rq;
+	on_rq = p->on_rq;
 	running = task_current(rq, p);
 	if (on_rq)
 		dequeue_task(rq, p, 0);
@@ -4743,7 +4747,7 @@ void set_user_nice(struct task_struct *p, long nice)
 		p->static_prio = NICE_TO_PRIO(nice);
 		goto out_unlock;
 	}
-	on_rq = p->se.on_rq;
+	on_rq = p->on_rq;
 	if (on_rq)
 		dequeue_task(rq, p, 0);
 
@@ -4877,8 +4881,6 @@ static struct task_struct *find_process_by_pid(pid_t pid)
 static void
 __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
 {
-	BUG_ON(p->se.on_rq);
-
 	p->policy = policy;
 	p->rt_priority = prio;
 	p->normal_prio = normal_prio(p);
@@ -5044,7 +5046,7 @@ recheck:
 		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 		goto recheck;
 	}
-	on_rq = p->se.on_rq;
+	on_rq = p->on_rq;
 	running = task_current(rq, p);
 	if (on_rq)
 		deactivate_task(rq, p, 0);
@@ -5965,7 +5967,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 	 * If we're not on a rq, the next wake-up will ensure we're
 	 * placed properly.
 	 */
-	if (p->se.on_rq) {
+	if (p->on_rq) {
 		deactivate_task(rq_src, p, 0);
 		set_task_cpu(p, dest_cpu);
 		activate_task(rq_dest, p, 0);
@@ -8339,7 +8341,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
 	int old_prio = p->prio;
 	int on_rq;
 
-	on_rq = p->se.on_rq;
+	on_rq = p->on_rq;
 	if (on_rq)
 		deactivate_task(rq, p, 0);
 	__setscheduler(rq, p, SCHED_NORMAL, 0);
@@ -8682,7 +8684,7 @@ void sched_move_task(struct task_struct *tsk)
 	rq = task_rq_lock(tsk, &flags);
 
 	running = task_current(rq, tsk);
-	on_rq = tsk->se.on_rq;
+	on_rq = tsk->on_rq;
 
 	if (on_rq)
 		dequeue_task(rq, tsk, 0);
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 7bacd83a4158..3669bec6e130 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -152,7 +152,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 	read_lock_irqsave(&tasklist_lock, flags);
 
 	do_each_thread(g, p) {
-		if (!p->se.on_rq || task_cpu(p) != rq_cpu)
+		if (!p->on_rq || task_cpu(p) != rq_cpu)
 			continue;
 
 		print_task(m, rq, p);
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index e7cebdc65f82..9ca4f5f879c4 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1136,7 +1136,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 	 * The previous task needs to be made eligible for pushing
 	 * if it is still active
 	 */
-	if (p->se.on_rq && p->rt.nr_cpus_allowed > 1)
+	if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
 }
 
@@ -1287,7 +1287,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 				     !cpumask_test_cpu(lowest_rq->cpu,
 						       &task->cpus_allowed) ||
 				     task_running(rq, task) ||
-				     !task->se.on_rq)) {
+				     !task->on_rq)) {
 
 				raw_spin_unlock(&lowest_rq->lock);
 				lowest_rq = NULL;
@@ -1321,7 +1321,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
 	BUG_ON(task_current(rq, p));
 	BUG_ON(p->rt.nr_cpus_allowed <= 1);
 
-	BUG_ON(!p->se.on_rq);
+	BUG_ON(!p->on_rq);
 	BUG_ON(!rt_task(p));
 
 	return p;
@@ -1467,7 +1467,7 @@ static int pull_rt_task(struct rq *this_rq)
 		 */
 		if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
 			WARN_ON(p == src_rq->curr);
-			WARN_ON(!p->se.on_rq);
+			WARN_ON(!p->on_rq);
 
 			/*
 			 * There's a chance that p is higher in priority
@@ -1538,7 +1538,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
 	 * Update the migration status of the RQ if we have an RT task
 	 * which is running AND changing its weight value.
 	 */
-	if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) {
+	if (p->on_rq && (weight != p->rt.nr_cpus_allowed)) {
 		struct rq *rq = task_rq(p);
 
 		if (!task_current(rq, p)) {
@@ -1608,7 +1608,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
 	 * we may need to handle the pulling of RT tasks
 	 * now.
 	 */
-	if (p->se.on_rq && !rq->rt.rt_nr_running)
+	if (p->on_rq && !rq->rt.rt_nr_running)
 		pull_rt_task(rq);
 }
 
@@ -1638,7 +1638,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
 	 * If that current running task is also an RT task
 	 * then see if we can move to another run queue.
 	 */
-	if (p->se.on_rq && rq->curr != p) {
+	if (p->on_rq && rq->curr != p) {
 #ifdef CONFIG_SMP
 		if (rq->rt.overloaded && push_rt_task(rq) &&
 		    /* Don't resched if we changed runqueues */
@@ -1657,7 +1657,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
 static void
 prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
 {
-	if (!p->se.on_rq)
+	if (!p->on_rq)
 		return;
 
 	if (rq->curr == p) {
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 1ba2bd40fdac..f607de42e6fc 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -26,7 +26,7 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
 {
 	struct task_struct *stop = rq->stop;
 
-	if (stop && stop->se.on_rq)
+	if (stop && stop->on_rq)
 		return stop;
 
 	return NULL;
-- 
cgit v1.2.3-71-gd317


From 7608dec2ce2004c234339bef8c8074e5e601d0e9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Apr 2011 17:23:46 +0200
Subject: sched: Drop the rq argument to sched_class::select_task_rq()

In preparation of calling select_task_rq() without rq->lock held, drop
the dependency on the rq argument.

Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110405152729.031077745@chello.nl
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  3 +--
 kernel/sched.c          | 20 +++++++++++---------
 kernel/sched_fair.c     |  2 +-
 kernel/sched_idletask.c |  2 +-
 kernel/sched_rt.c       | 38 ++++++++++++++++++++++++++------------
 kernel/sched_stoptask.c |  3 +--
 6 files changed, 41 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b33a700652dc..ff4e2f9c24a7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1067,8 +1067,7 @@ struct sched_class {
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
 
 #ifdef CONFIG_SMP
-	int  (*select_task_rq)(struct rq *rq, struct task_struct *p,
-			       int sd_flag, int flags);
+	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
 
 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
 	void (*post_schedule) (struct rq *this_rq);
diff --git a/kernel/sched.c b/kernel/sched.c
index d398f2f0a3c9..d4b815d345b3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2195,13 +2195,15 @@ static int migration_cpu_stop(void *data);
  * The task's runqueue lock must be held.
  * Returns true if you have to wait for migration thread.
  */
-static bool migrate_task(struct task_struct *p, struct rq *rq)
+static bool need_migrate_task(struct task_struct *p)
 {
 	/*
 	 * If the task is not on a runqueue (and not running), then
 	 * the next wake-up will properly place the task.
 	 */
-	return p->on_rq || task_running(rq, p);
+	bool running = p->on_rq || p->on_cpu;
+	smp_rmb(); /* finish_lock_switch() */
+	return running;
 }
 
 /*
@@ -2376,9 +2378,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
  * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
  */
 static inline
-int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags)
+int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
 {
-	int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags);
+	int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
 
 	/*
 	 * In order not to call set_task_cpu() on a blocking task we need
@@ -2533,7 +2535,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 		en_flags |= ENQUEUE_WAKING;
 	}
 
-	cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
+	cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
 	if (cpu != orig_cpu)
 		set_task_cpu(p, cpu);
 	__task_rq_unlock(rq);
@@ -2744,7 +2746,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 	 * We set TASK_WAKING so that select_task_rq() can drop rq->lock
 	 * without people poking at ->cpus_allowed.
 	 */
-	cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0);
+	cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
 	set_task_cpu(p, cpu);
 
 	p->state = TASK_RUNNING;
@@ -3474,7 +3476,7 @@ void sched_exec(void)
 	int dest_cpu;
 
 	rq = task_rq_lock(p, &flags);
-	dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0);
+	dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
 	if (dest_cpu == smp_processor_id())
 		goto unlock;
 
@@ -3482,7 +3484,7 @@ void sched_exec(void)
 	 * select_task_rq() can race against ->cpus_allowed
 	 */
 	if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
-	    likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) {
+	    likely(cpu_active(dest_cpu)) && need_migrate_task(p)) {
 		struct migration_arg arg = { p, dest_cpu };
 
 		task_rq_unlock(rq, &flags);
@@ -5911,7 +5913,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 		goto out;
 
 	dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
-	if (migrate_task(p, rq)) {
+	if (need_migrate_task(p)) {
 		struct migration_arg arg = { p, dest_cpu };
 		/* Need help from migration thread: drop lock and wait. */
 		__task_rq_unlock(rq);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 4ee50f0af8d1..96b2c95ac356 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1657,7 +1657,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
  * preempt must be disabled.
  */
 static int
-select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags)
+select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 {
 	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
 	int cpu = smp_processor_id();
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index a776a6396427..0a51882534ea 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -7,7 +7,7 @@
 
 #ifdef CONFIG_SMP
 static int
-select_task_rq_idle(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
+select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
 {
 	return task_cpu(p); /* IDLE tasks as never migrated */
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 9ca4f5f879c4..19ecb3127379 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -977,13 +977,23 @@ static void yield_task_rt(struct rq *rq)
 static int find_lowest_rq(struct task_struct *task);
 
 static int
-select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
+select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
 {
+	struct task_struct *curr;
+	struct rq *rq;
+	int cpu;
+
 	if (sd_flag != SD_BALANCE_WAKE)
 		return smp_processor_id();
 
+	cpu = task_cpu(p);
+	rq = cpu_rq(cpu);
+
+	rcu_read_lock();
+	curr = ACCESS_ONCE(rq->curr); /* unlocked access */
+
 	/*
-	 * If the current task is an RT task, then
+	 * If the current task on @p's runqueue is an RT task, then
 	 * try to see if we can wake this RT task up on another
 	 * runqueue. Otherwise simply start this RT task
 	 * on its current runqueue.
@@ -997,21 +1007,25 @@ select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
 	 * lock?
 	 *
 	 * For equal prio tasks, we just let the scheduler sort it out.
+	 *
+	 * Otherwise, just let it ride on the affined RQ and the
+	 * post-schedule router will push the preempted task away
+	 *
+	 * This test is optimistic, if we get it wrong the load-balancer
+	 * will have to sort it out.
 	 */
-	if (unlikely(rt_task(rq->curr)) &&
-	    (rq->curr->rt.nr_cpus_allowed < 2 ||
-	     rq->curr->prio < p->prio) &&
+	if (curr && unlikely(rt_task(curr)) &&
+	    (curr->rt.nr_cpus_allowed < 2 ||
+	     curr->prio < p->prio) &&
 	    (p->rt.nr_cpus_allowed > 1)) {
-		int cpu = find_lowest_rq(p);
+		int target = find_lowest_rq(p);
 
-		return (cpu == -1) ? task_cpu(p) : cpu;
+		if (target != -1)
+			cpu = target;
 	}
+	rcu_read_unlock();
 
-	/*
-	 * Otherwise, just let it ride on the affined RQ and the
-	 * post-schedule router will push the preempted task away
-	 */
-	return task_cpu(p);
+	return cpu;
 }
 
 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index f607de42e6fc..6f437632afab 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -9,8 +9,7 @@
 
 #ifdef CONFIG_SMP
 static int
-select_task_rq_stop(struct rq *rq, struct task_struct *p,
-		    int sd_flag, int flags)
+select_task_rq_stop(struct task_struct *p, int sd_flag, int flags)
 {
 	return task_cpu(p); /* stop tasks as never migrate */
 }
-- 
cgit v1.2.3-71-gd317


From 74f8e4b2335de45485b8d5b31a504747f13c8070 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Apr 2011 17:23:47 +0200
Subject: sched: Remove rq argument to sched_class::task_waking()

In preparation of calling this without rq->lock held, remove the
dependency on the rq argument.

Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110405152729.071474242@chello.nl
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 10 +++++++---
 kernel/sched.c        |  2 +-
 kernel/sched_fair.c   |  4 +++-
 3 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ff4e2f9c24a7..7f5732f8c618 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1048,8 +1048,12 @@ struct sched_domain;
 #define WF_FORK		0x02		/* child wakeup after fork */
 
 #define ENQUEUE_WAKEUP		1
-#define ENQUEUE_WAKING		2
-#define ENQUEUE_HEAD		4
+#define ENQUEUE_HEAD		2
+#ifdef CONFIG_SMP
+#define ENQUEUE_WAKING		4	/* sched_class::task_waking was called */
+#else
+#define ENQUEUE_WAKING		0
+#endif
 
 #define DEQUEUE_SLEEP		1
 
@@ -1071,7 +1075,7 @@ struct sched_class {
 
 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
 	void (*post_schedule) (struct rq *this_rq);
-	void (*task_waking) (struct rq *this_rq, struct task_struct *task);
+	void (*task_waking) (struct task_struct *task);
 	void (*task_woken) (struct rq *this_rq, struct task_struct *task);
 
 	void (*set_cpus_allowed)(struct task_struct *p,
diff --git a/kernel/sched.c b/kernel/sched.c
index d4b815d345b3..46f42cac4eb1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2531,7 +2531,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 	p->state = TASK_WAKING;
 
 	if (p->sched_class->task_waking) {
-		p->sched_class->task_waking(rq, p);
+		p->sched_class->task_waking(p);
 		en_flags |= ENQUEUE_WAKING;
 	}
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 96b2c95ac356..ad4c414f456d 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1372,11 +1372,13 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 #ifdef CONFIG_SMP
 
-static void task_waking_fair(struct rq *rq, struct task_struct *p)
+static void task_waking_fair(struct task_struct *p)
 {
 	struct sched_entity *se = &p->se;
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
+	lockdep_assert_held(&task_rq(p)->lock);
+
 	se->vruntime -= cfs_rq->min_vruntime;
 }
 
-- 
cgit v1.2.3-71-gd317


From a8e4f2eaecc9bfa4954adf79a04f4f22fddd829c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Apr 2011 17:23:49 +0200
Subject: sched: Delay task_contributes_to_load()

In prepratation of having to call task_contributes_to_load() without
holding rq->lock, we need to store the result until we do and can
update the rq accounting accordingly.

Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110405152729.151523907@chello.nl
---
 include/linux/sched.h |  1 +
 kernel/sched.c        | 16 ++++------------
 2 files changed, 5 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7f5732f8c618..25c50317ddc1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1273,6 +1273,7 @@ struct task_struct {
 
 	/* Revert to default priority/policy when forking */
 	unsigned sched_reset_on_fork:1;
+	unsigned sched_contributes_to_load:1;
 
 	pid_t pid;
 	pid_t tgid;
diff --git a/kernel/sched.c b/kernel/sched.c
index 7a5eb2620785..fd32b78c123c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2519,18 +2519,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 	if (unlikely(task_running(rq, p)))
 		goto out_activate;
 
-	/*
-	 * In order to handle concurrent wakeups and release the rq->lock
-	 * we put the task in TASK_WAKING state.
-	 *
-	 * First fix up the nr_uninterruptible count:
-	 */
-	if (task_contributes_to_load(p)) {
-		if (likely(cpu_online(orig_cpu)))
-			rq->nr_uninterruptible--;
-		else
-			this_rq()->nr_uninterruptible--;
-	}
+	p->sched_contributes_to_load = !!task_contributes_to_load(p);
 	p->state = TASK_WAKING;
 
 	if (p->sched_class->task_waking) {
@@ -2555,6 +2544,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 	WARN_ON(task_cpu(p) != cpu);
 	WARN_ON(p->state != TASK_WAKING);
 
+	if (p->sched_contributes_to_load)
+		rq->nr_uninterruptible--;
+
 out_activate:
 #endif /* CONFIG_SMP */
 	ttwu_activate(rq, p, en_flags);
-- 
cgit v1.2.3-71-gd317


From 317f394160e9beb97d19a84c39b7e5eb3d7815a8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Apr 2011 17:23:58 +0200
Subject: sched: Move the second half of ttwu() to the remote cpu

Now that we've removed the rq->lock requirement from the first part of
ttwu() and can compute placement without holding any rq->lock, ensure
we execute the second half of ttwu() on the actual cpu we want the
task to run on.

This avoids having to take rq->lock and doing the task enqueue
remotely, saving lots on cacheline transfers.

As measured using: http://oss.oracle.com/~mason/sembench.c

  $ for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor ; do echo performance > $i; done
  $ echo 4096 32000 64 128 > /proc/sys/kernel/sem
  $ ./sembench -t 2048 -w 1900 -o 0

  unpatched: run time 30 seconds 647278 worker burns per second
  patched:   run time 30 seconds 816715 worker burns per second

Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110405152729.515897185@chello.nl
---
 include/linux/sched.h   |  3 ++-
 init/Kconfig            |  5 +++++
 kernel/sched.c          | 56 +++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched_features.h |  6 ++++++
 4 files changed, 69 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 25c50317ddc1..e09dafa6e149 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1203,6 +1203,7 @@ struct task_struct {
 	int lock_depth;		/* BKL lock depth */
 
 #ifdef CONFIG_SMP
+	struct task_struct *wake_entry;
 	int on_cpu;
 #endif
 	int on_rq;
@@ -2192,7 +2193,7 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
-static inline void scheduler_ipi(void) { }
+void scheduler_ipi(void);
 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
 static inline void scheduler_ipi(void) { }
diff --git a/init/Kconfig b/init/Kconfig
index 56240e724d9a..32745bfe059e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -827,6 +827,11 @@ config SCHED_AUTOGROUP
 	  desktop applications.  Task group autogeneration is currently based
 	  upon task session.
 
+config SCHED_TTWU_QUEUE
+	bool
+	depends on !SPARC32
+	default y
+
 config MM_OWNER
 	bool
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 7d8b85fcdf06..9e3ede120e81 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -556,6 +556,10 @@ struct rq {
 	unsigned int ttwu_count;
 	unsigned int ttwu_local;
 #endif
+
+#ifdef CONFIG_SMP
+	struct task_struct *wake_list;
+#endif
 };
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -2516,10 +2520,61 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
 	return ret;
 }
 
+#ifdef CONFIG_SMP
+static void sched_ttwu_pending(void)
+{
+	struct rq *rq = this_rq();
+	struct task_struct *list = xchg(&rq->wake_list, NULL);
+
+	if (!list)
+		return;
+
+	raw_spin_lock(&rq->lock);
+
+	while (list) {
+		struct task_struct *p = list;
+		list = list->wake_entry;
+		ttwu_do_activate(rq, p, 0);
+	}
+
+	raw_spin_unlock(&rq->lock);
+}
+
+void scheduler_ipi(void)
+{
+	sched_ttwu_pending();
+}
+
+static void ttwu_queue_remote(struct task_struct *p, int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	struct task_struct *next = rq->wake_list;
+
+	for (;;) {
+		struct task_struct *old = next;
+
+		p->wake_entry = next;
+		next = cmpxchg(&rq->wake_list, old, p);
+		if (next == old)
+			break;
+	}
+
+	if (!next)
+		smp_send_reschedule(cpu);
+}
+#endif
+
 static void ttwu_queue(struct task_struct *p, int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
 
+#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE)
+	if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
+		ttwu_queue_remote(p, cpu);
+		return;
+	}
+#endif
+
 	raw_spin_lock(&rq->lock);
 	ttwu_do_activate(rq, p, 0);
 	raw_spin_unlock(&rq->lock);
@@ -6331,6 +6386,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 
 #ifdef CONFIG_HOTPLUG_CPU
 	case CPU_DYING:
+		sched_ttwu_pending();
 		/* Update our root-domain */
 		raw_spin_lock_irqsave(&rq->lock, flags);
 		if (rq->rd) {
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 68e69acc29b9..be40f7371ee1 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -64,3 +64,9 @@ SCHED_FEAT(OWNER_SPIN, 1)
  * Decrement CPU power based on irq activity
  */
 SCHED_FEAT(NONIRQ_POWER, 1)
+
+/*
+ * Queue remote wakeups on the target CPU and process them
+ * using the scheduler IPI. Reduces rq->lock contention/bounces.
+ */
+SCHED_FEAT(TTWU_QUEUE, 1)
-- 
cgit v1.2.3-71-gd317


From 38a2f37258f9e2ae3f6e4241e01088be8dfaf4e9 Mon Sep 17 00:00:00 2001
From: huajun li <huajun.li.lee@gmail.com>
Date: Wed, 13 Apr 2011 15:43:32 +0000
Subject: usbnet: Fix up 'FLAG_POINTTOPOINT' and 'FLAG_MULTI_PACKET' overlaps.

USB tethering does not work anymore since 2.6.39-rc2, but it's okay in
-rc1. The root cause is the new added mask code 'FLAG_POINTTOPOINT'
overlaps 'FLAG_MULTI_PACKET'  in  include/linux/usb/usbnet.h, this
causes logic issue in  rx_process(). This patch cleans up the overlap.

Reported-and-Tested-by: Gottfried Haider <gottfried.haider@gmail.com>
Signed-off-by:  Huajun Li <huajun.li.lee@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/usb/usbnet.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 3c7329b8ea0e..0e1855079fbb 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -103,8 +103,8 @@ struct driver_info {
  * Indicates to usbnet, that USB driver accumulates multiple IP packets.
  * Affects statistic (counters) and short packet handling.
  */
-#define FLAG_MULTI_PACKET	0x1000
-#define FLAG_RX_ASSEMBLE	0x2000	/* rx packets may span >1 frames */
+#define FLAG_MULTI_PACKET	0x2000
+#define FLAG_RX_ASSEMBLE	0x4000	/* rx packets may span >1 frames */
 
 	/* init device ... can sleep, or cause probe() failure */
 	int	(*bind)(struct usbnet *, struct usb_interface *);
-- 
cgit v1.2.3-71-gd317


From 67954fe95705a8ff80335964bd7e621d13fbc499 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 14 Apr 2011 15:21:52 -0700
Subject: memcg: fix mem_cgroup_rotate_reclaimable_page()

commit 3f58a8294333 ("move memcg reclaimable page into tail of inactive
list") added inline keyword twice in its prototype.

    CC      arch/x86/kernel/asm-offsets.s
  In file included from include/linux/swap.h:8,
                   from include/linux/suspend.h:4,
                   from arch/x86/kernel/asm-offsets.c:12:
  include/linux/memcontrol.h:220: error: duplicate `inline'

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5a5ce7055839..5e9840f50980 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -216,7 +216,7 @@ static inline void mem_cgroup_del_lru_list(struct page *page, int lru)
 	return ;
 }
 
-static inline inline void mem_cgroup_rotate_reclaimable_page(struct page *page)
+static inline void mem_cgroup_rotate_reclaimable_page(struct page *page)
 {
 	return ;
 }
-- 
cgit v1.2.3-71-gd317


From 81ab4201fb7d91d6b0cd9ad5b4b16776e4bed145 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Apr 2011 15:22:06 -0700
Subject: mm: add VM counters for transparent hugepages

I found it difficult to make sense of transparent huge pages without
having any counters for its actions.  Add some counters to vmstat for
allocation of transparent hugepages and fallback to smaller pages.

Optional patch, but useful for development and understanding the system.

Contains improvements from Andrea Arcangeli and Johannes Weiner

[akpm@linux-foundation.org: coding-style fixes]
[hannes@cmpxchg.org: fix vmstat_text[] entries]
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h |  7 +++++++
 mm/huge_memory.c       | 25 +++++++++++++++++++++----
 mm/vmstat.c            |  9 +++++++++
 3 files changed, 37 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 461c0119664f..2b3831b58aa4 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -58,6 +58,13 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		UNEVICTABLE_PGCLEARED,	/* on COW, page truncate */
 		UNEVICTABLE_PGSTRANDED,	/* unable to isolate on unlock */
 		UNEVICTABLE_MLOCKFREED,
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+		THP_FAULT_ALLOC,
+		THP_FAULT_FALLBACK,
+		THP_COLLAPSE_ALLOC,
+		THP_COLLAPSE_ALLOC_FAILED,
+		THP_SPLIT,
+#endif
 		NR_VM_EVENT_ITEMS
 };
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0a619e0e2e0b..1722683bde23 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -680,8 +680,11 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			return VM_FAULT_OOM;
 		page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
 					  vma, haddr, numa_node_id(), 0);
-		if (unlikely(!page))
+		if (unlikely(!page)) {
+			count_vm_event(THP_FAULT_FALLBACK);
 			goto out;
+		}
+		count_vm_event(THP_FAULT_ALLOC);
 		if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
 			put_page(page);
 			goto out;
@@ -909,11 +912,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		new_page = NULL;
 
 	if (unlikely(!new_page)) {
+		count_vm_event(THP_FAULT_FALLBACK);
 		ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
 						   pmd, orig_pmd, page, haddr);
 		put_page(page);
 		goto out;
 	}
+	count_vm_event(THP_FAULT_ALLOC);
 
 	if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
 		put_page(new_page);
@@ -1390,6 +1395,7 @@ int split_huge_page(struct page *page)
 
 	BUG_ON(!PageSwapBacked(page));
 	__split_huge_page(page, anon_vma);
+	count_vm_event(THP_SPLIT);
 
 	BUG_ON(PageCompound(page));
 out_unlock:
@@ -1784,9 +1790,11 @@ static void collapse_huge_page(struct mm_struct *mm,
 				      node, __GFP_OTHER_NODE);
 	if (unlikely(!new_page)) {
 		up_read(&mm->mmap_sem);
+		count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
 		*hpage = ERR_PTR(-ENOMEM);
 		return;
 	}
+	count_vm_event(THP_COLLAPSE_ALLOC);
 	if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
 		up_read(&mm->mmap_sem);
 		put_page(new_page);
@@ -2151,8 +2159,11 @@ static void khugepaged_do_scan(struct page **hpage)
 #ifndef CONFIG_NUMA
 		if (!*hpage) {
 			*hpage = alloc_hugepage(khugepaged_defrag());
-			if (unlikely(!*hpage))
+			if (unlikely(!*hpage)) {
+				count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
 				break;
+			}
+			count_vm_event(THP_COLLAPSE_ALLOC);
 		}
 #else
 		if (IS_ERR(*hpage))
@@ -2192,8 +2203,11 @@ static struct page *khugepaged_alloc_hugepage(void)
 
 	do {
 		hpage = alloc_hugepage(khugepaged_defrag());
-		if (!hpage)
+		if (!hpage) {
+			count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
 			khugepaged_alloc_sleep();
+		} else
+			count_vm_event(THP_COLLAPSE_ALLOC);
 	} while (unlikely(!hpage) &&
 		 likely(khugepaged_enabled()));
 	return hpage;
@@ -2210,8 +2224,11 @@ static void khugepaged_loop(void)
 	while (likely(khugepaged_enabled())) {
 #ifndef CONFIG_NUMA
 		hpage = khugepaged_alloc_hugepage();
-		if (unlikely(!hpage))
+		if (unlikely(!hpage)) {
+			count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
 			break;
+		}
+		count_vm_event(THP_COLLAPSE_ALLOC);
 #else
 		if (IS_ERR(hpage)) {
 			khugepaged_alloc_sleep();
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 8cb0f0a703e5..897ea9e88238 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -948,7 +948,16 @@ static const char * const vmstat_text[] = {
 	"unevictable_pgs_cleared",
 	"unevictable_pgs_stranded",
 	"unevictable_pgs_mlockfreed",
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	"thp_fault_alloc",
+	"thp_fault_fallback",
+	"thp_collapse_alloc",
+	"thp_collapse_alloc_failed",
+	"thp_split",
 #endif
+
+#endif /* CONFIG_VM_EVENTS_COUNTERS */
 };
 
 static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
-- 
cgit v1.2.3-71-gd317


From 4471a675dfc7ca676c165079e91c712b09dc9ce4 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 14 Apr 2011 15:22:09 -0700
Subject: brk: COMPAT_BRK: fix detection of randomized brk

5520e89 ("brk: fix min_brk lower bound computation for COMPAT_BRK")
tried to get the whole logic of brk randomization for legacy
(libc5-based) applications finally right.

It turns out that the way to detect whether brk has actually been
randomized in the end or not introduced by that patch still doesn't work
for those binaries, as reported by Geert:

: /sbin/init from my old m68k ramdisk exists prematurely.
:
: Before the patch:
:
: | brk(0x80005c8e)                         = 0x80006000
:
: After the patch:
:
: | brk(0x80005c8e)                         = 0x80005c8e
:
: Old libc5 considers brk() to have failed if the return value is not
: identical to the requested value.

I don't like it, but currently see no better option than a bit flag in
task_struct to catch the CONFIG_COMPAT_BRK && randomize_va_space == 2
case.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Tested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c       | 6 +++++-
 include/linux/sched.h | 3 +++
 mm/mmap.c             | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f34078d702d3..303983fabfd6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -941,9 +941,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	current->mm->start_stack = bprm->p;
 
 #ifdef arch_randomize_brk
-	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
+	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
 		current->mm->brk = current->mm->start_brk =
 			arch_randomize_brk(current->mm);
+#ifdef CONFIG_COMPAT_BRK
+		current->brk_randomized = 1;
+#endif
+	}
 #endif
 
 	if (current->personality & MMAP_PAGE_ZERO) {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4ec2c027e92c..18d63cea2848 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1254,6 +1254,9 @@ struct task_struct {
 #endif
 
 	struct mm_struct *mm, *active_mm;
+#ifdef CONFIG_COMPAT_BRK
+	unsigned brk_randomized:1;
+#endif
 #if defined(SPLIT_RSS_COUNTING)
 	struct task_rss_stat	rss_stat;
 #endif
diff --git a/mm/mmap.c b/mm/mmap.c
index 8c05e5b43b69..e27e0cf0de03 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -259,7 +259,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	 * randomize_va_space to 2, which will still cause mm->start_brk
 	 * to be arbitrarily shifted
 	 */
-	if (mm->start_brk > PAGE_ALIGN(mm->end_data))
+	if (current->brk_randomized)
 		min_brk = mm->start_brk;
 	else
 		min_brk = mm->end_data;
-- 
cgit v1.2.3-71-gd317


From 13209c2a52afa691ca19e7e6ebd22d4034bdfeed Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Thu, 14 Apr 2011 15:22:14 -0700
Subject: RapidIO: add IDT CPS-1432 switch definitions

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Kumar Gala <galak@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/switches/idt_gen2.c | 1 +
 include/linux/rio_ids.h             | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/rapidio/switches/idt_gen2.c b/drivers/rapidio/switches/idt_gen2.c
index 095016a9dec1..ac2701b22e71 100644
--- a/drivers/rapidio/switches/idt_gen2.c
+++ b/drivers/rapidio/switches/idt_gen2.c
@@ -418,3 +418,4 @@ DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1848, idtg2_switch_init);
 DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1616, idtg2_switch_init);
 DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTVPS1616, idtg2_switch_init);
 DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTSPS1616, idtg2_switch_init);
+DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1432, idtg2_switch_init);
diff --git a/include/linux/rio_ids.h b/include/linux/rio_ids.h
index 7410d3365e2a..0cee0152aca9 100644
--- a/include/linux/rio_ids.h
+++ b/include/linux/rio_ids.h
@@ -35,6 +35,7 @@
 #define RIO_DID_IDTCPS6Q		0x035f
 #define RIO_DID_IDTCPS10Q		0x035e
 #define RIO_DID_IDTCPS1848		0x0374
+#define RIO_DID_IDTCPS1432		0x0375
 #define RIO_DID_IDTCPS1616		0x0379
 #define RIO_DID_IDTVPS1616		0x0377
 #define RIO_DID_IDTSPS1616		0x0378
-- 
cgit v1.2.3-71-gd317


From 59f9996555542f901f2d01ccab5c0612c8c5c480 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Thu, 14 Apr 2011 15:22:14 -0700
Subject: RapidIO/mpc85xx: fix possible mport registration problems

Fix a possible problem with mport registration left non-cleared after
fsl_rio_setup() exits on link error.  Abort mport initialization if
registration failed.

This patch is applicable to 2.6.39-rc1 only.  The problem does not exist
for earlier versions.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
Cc: Thomas Moll <thomas.moll@sysgo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/sysdev/fsl_rio.c | 4 +++-
 drivers/rapidio/rio.c         | 5 +++--
 include/linux/rio.h           | 2 +-
 3 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 14232d57369c..49798532b477 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -1457,7 +1457,6 @@ int fsl_rio_setup(struct platform_device *dev)
 	port->ops = ops;
 	port->priv = priv;
 	port->phys_efptr = 0x100;
-	rio_register_mport(port);
 
 	priv->regs_win = ioremap(regs.start, regs.end - regs.start + 1);
 	rio_regs_win = priv->regs_win;
@@ -1504,6 +1503,9 @@ int fsl_rio_setup(struct platform_device *dev)
 	dev_info(&dev->dev, "RapidIO Common Transport System size: %d\n",
 			port->sys_size ? 65536 : 256);
 
+	if (rio_register_mport(port))
+		goto err;
+
 	if (port->host_deviceid >= 0)
 		out_be32(priv->regs_win + RIO_GCCSR, RIO_PORT_GEN_HOST |
 			RIO_PORT_GEN_MASTER | RIO_PORT_GEN_DISCOVERED);
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index c29719cacbca..86c9a091a2ff 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -1171,16 +1171,17 @@ static int rio_hdid_setup(char *str)
 
 __setup("riohdid=", rio_hdid_setup);
 
-void rio_register_mport(struct rio_mport *port)
+int rio_register_mport(struct rio_mport *port)
 {
 	if (next_portid >= RIO_MAX_MPORTS) {
 		pr_err("RIO: reached specified max number of mports\n");
-		return;
+		return 1;
 	}
 
 	port->id = next_portid++;
 	port->host_deviceid = rio_get_hdid(port->id);
 	list_add_tail(&port->node, &rio_mports);
+	return 0;
 }
 
 EXPORT_SYMBOL_GPL(rio_local_get_device_id);
diff --git a/include/linux/rio.h b/include/linux/rio.h
index 4e37a7cfa726..4d50611112ba 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -396,7 +396,7 @@ union rio_pw_msg {
 };
 
 /* Architecture and hardware-specific functions */
-extern void rio_register_mport(struct rio_mport *);
+extern int rio_register_mport(struct rio_mport *);
 extern int rio_open_inb_mbox(struct rio_mport *, void *, int, int);
 extern void rio_close_inb_mbox(struct rio_mport *, int);
 extern int rio_open_outb_mbox(struct rio_mport *, void *, int, int);
-- 
cgit v1.2.3-71-gd317


From fce55922f5299a04c0a56b170a141fab34f13465 Mon Sep 17 00:00:00 2001
From: "Allan, Bruce W" <bruce.w.allan@intel.com>
Date: Wed, 13 Apr 2011 13:09:10 +0000
Subject: ethtool: allow custom interval for physical identification

When physical identification of an adapter is done by toggling the
mechanism on and off through software utilizing the set_phys_id operation,
it is done with a fixed duration for both on and off states.  Some drivers
may want to set a custom duration for the on/off intervals.  This patch
changes the API so the return code from the driver's entry point when it
is called with ETHTOOL_ID_ACTIVE can specify the frequency at which to
cycle the on/off states, and updates the drivers that have already been
converted to use the new set_phys_id and use the synchronous method for
identifying an adapter.

The physical identification frequency set in the updated drivers is based
on how it was done prior to the introduction of set_phys_id.

Compile tested only.  Also fixes a compiler warning in sfc.

v2: drivers do not return -EINVAL for ETHOOL_ID_ACTIVE
v3: fold patchset into single patch and cleanup per Ben's feedback

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Sathya Perla <sathya.perla@emulex.com>
Cc: Subbu Seetharaman <subbu.seetharaman@emulex.com>
Cc: Ajit Khaparde <ajit.khaparde@emulex.com>
Cc: Michael Chan <mchan@broadcom.com>
Cc: Eilon Greenstein <eilong@broadcom.com>
Cc: Divy Le Ray <divy@chelsio.com>
Cc: Don Fry <pcnet32@frontier.com>
Cc: Jon Mason <jdmason@kudzu.us>
Cc: Solarflare linux maintainers <linux-net-drivers@solarflare.com>
Cc: Steve Hodgson <shodgson@solarflare.com>
Cc: Stephen Hemminger <shemminger@linux-foundation.org>
Cc: Matt Carlson <mcarlson@broadcom.com>
Acked-by: Jon Mason <jdmason@kudzu.us>
Acked-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/benet/be_ethtool.c    |  2 +-
 drivers/net/bnx2.c                |  2 +-
 drivers/net/bnx2x/bnx2x_ethtool.c |  2 +-
 drivers/net/cxgb3/cxgb3_main.c    |  2 +-
 drivers/net/ewrk3.c               |  2 +-
 drivers/net/niu.c                 |  2 +-
 drivers/net/pcnet32.c             |  2 +-
 drivers/net/s2io.c                |  2 +-
 drivers/net/sfc/ethtool.c         |  6 +++---
 drivers/net/skge.c                |  2 +-
 drivers/net/sky2.c                |  2 +-
 drivers/net/tg3.c                 |  2 +-
 include/linux/ethtool.h           |  6 ++++--
 net/core/ethtool.c                | 31 ++++++++++++++++---------------
 14 files changed, 34 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/benet/be_ethtool.c b/drivers/net/benet/be_ethtool.c
index 96f5502e0ef7..80226e4801f3 100644
--- a/drivers/net/benet/be_ethtool.c
+++ b/drivers/net/benet/be_ethtool.c
@@ -516,7 +516,7 @@ be_set_phys_id(struct net_device *netdev,
 	case ETHTOOL_ID_ACTIVE:
 		be_cmd_get_beacon_state(adapter, adapter->hba_port_num,
 					&adapter->beacon_state);
-		return -EINVAL;
+		return 1;	/* cycle on/off once per second */
 
 	case ETHTOOL_ID_ON:
 		be_cmd_set_beacon_state(adapter, adapter->hba_port_num, 0, 0,
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 0a52079bafef..bf729ee6acbd 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -7473,7 +7473,7 @@ bnx2_set_phys_id(struct net_device *dev, enum ethtool_phys_id_state state)
 
 		bp->leds_save = REG_RD(bp, BNX2_MISC_CFG);
 		REG_WR(bp, BNX2_MISC_CFG, BNX2_MISC_CFG_LEDMODE_MAC);
-		return -EINVAL;
+		return 1;	/* cycle on/off once per second */
 
 	case ETHTOOL_ID_ON:
 		REG_WR(bp, BNX2_EMAC_LED, BNX2_EMAC_LED_OVERRIDE |
diff --git a/drivers/net/bnx2x/bnx2x_ethtool.c b/drivers/net/bnx2x/bnx2x_ethtool.c
index ad7d91e499f4..0a5e88d6ba2c 100644
--- a/drivers/net/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/bnx2x/bnx2x_ethtool.c
@@ -2025,7 +2025,7 @@ static int bnx2x_set_phys_id(struct net_device *dev,
 
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
-		return -EINVAL;
+		return 1;	/* cycle on/off once per second */
 
 	case ETHTOOL_ID_ON:
 		bnx2x_set_led(&bp->link_params, &bp->link_vars,
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index 802c7a7c3b25..a087e0691dce 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -1757,7 +1757,7 @@ static int set_phys_id(struct net_device *dev,
 
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
-		return -EINVAL;
+		return 1;	/* cycle on/off once per second */
 
 	case ETHTOOL_ID_OFF:
 		t3_set_reg_field(adapter, A_T3DBG_GPIO_EN, F_GPIO0_OUT_VAL, 0);
diff --git a/drivers/net/ewrk3.c b/drivers/net/ewrk3.c
index c7ce4438e923..17b6027d8be8 100644
--- a/drivers/net/ewrk3.c
+++ b/drivers/net/ewrk3.c
@@ -1618,7 +1618,7 @@ static int ewrk3_set_phys_id(struct net_device *dev,
 		/* Prevent ISR from twiddling the LED */
 		lp->led_mask = 0;
 		spin_unlock_irq(&lp->hw_lock);
-		return -EINVAL;
+		return 2;	/* cycle on/off twice per second */
 
 	case ETHTOOL_ID_ON:
 		cr = inb(EWRK3_CR);
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 3fa1e9cdb4a8..ea2272f0f37e 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -7896,7 +7896,7 @@ static int niu_set_phys_id(struct net_device *dev,
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
 		np->orig_led_state = niu_led_state_save(np);
-		return -EINVAL;
+		return 1;	/* cycle on/off once per second */
 
 	case ETHTOOL_ID_ON:
 		niu_force_led(np, 1);
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index e89afb929740..0a1efbae1bc0 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -1038,7 +1038,7 @@ static int pcnet32_set_phys_id(struct net_device *dev,
 		for (i = 4; i < 8; i++)
 			lp->save_regs[i - 4] = a->read_bcr(ioaddr, i);
 		spin_unlock_irqrestore(&lp->lock, flags);
-		return -EINVAL;
+		return 2;	/* cycle on/off twice per second */
 
 	case ETHTOOL_ID_ON:
 	case ETHTOOL_ID_OFF:
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 2d5cc6142c04..2302d9743744 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -5541,7 +5541,7 @@ static int s2io_ethtool_set_led(struct net_device *dev,
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
 		sp->adapt_ctrl_org = readq(&bar0->gpio_control);
-		return -EINVAL;
+		return 1;	/* cycle on/off once per second */
 
 	case ETHTOOL_ID_ON:
 		s2io_set_led(sp, true);
diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c
index 644f7c1d6e7b..5d8468fc5804 100644
--- a/drivers/net/sfc/ethtool.c
+++ b/drivers/net/sfc/ethtool.c
@@ -182,7 +182,7 @@ static int efx_ethtool_phys_id(struct net_device *net_dev,
 			       enum ethtool_phys_id_state state)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
-	enum efx_led_mode mode;
+	enum efx_led_mode mode = EFX_LED_DEFAULT;
 
 	switch (state) {
 	case ETHTOOL_ID_ON:
@@ -194,8 +194,8 @@ static int efx_ethtool_phys_id(struct net_device *net_dev,
 	case ETHTOOL_ID_INACTIVE:
 		mode = EFX_LED_DEFAULT;
 		break;
-	default:
-		return -EINVAL;
+	case ETHTOOL_ID_ACTIVE:
+		return 1;	/* cycle on/off once per second */
 	}
 
 	efx->type->set_id_led(efx, mode);
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 310dcbce2519..176d784cbb54 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -753,7 +753,7 @@ static int skge_set_phys_id(struct net_device *dev,
 
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
-		return -EINVAL;
+		return 2;	/* cycle on/off twice per second */
 
 	case ETHTOOL_ID_ON:
 		skge_led(skge, LED_MODE_TST);
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index a4b8fe564eb0..c8d045114c66 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -3813,7 +3813,7 @@ static int sky2_set_phys_id(struct net_device *dev,
 
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
-		return -EINVAL;
+		return 1;	/* cycle on/off once per second */
 	case ETHTOOL_ID_INACTIVE:
 		sky2_led(sky2, MO_LED_NORM);
 		break;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 10fa476fede3..9915734ac3e9 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -10384,7 +10384,7 @@ static int tg3_set_phys_id(struct net_device *dev,
 
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
-		return -EINVAL;
+		return 1;	/* cycle on/off once per second */
 
 	case ETHTOOL_ID_ON:
 		tw32(MAC_LED_CTRL, LED_CTRL_LNKLED_OVERRIDE |
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index ad22a68c2e5d..9de31274341d 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -798,8 +798,10 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  *	attached to it.  The implementation may update the indicator
  *	asynchronously or synchronously, but in either case it must return
  *	quickly.  It is initially called with the argument %ETHTOOL_ID_ACTIVE,
- *	and must either activate asynchronous updates or return -%EINVAL.
- *	If it returns -%EINVAL then it will be called again at intervals with
+ *	and must either activate asynchronous updates and return zero, return
+ *	a negative error or return a positive frequency for synchronous
+ *	indication (e.g. 1 for one on/off cycle per second).  If it returns
+ *	a frequency then it will be called again at intervals with the
  *	argument %ETHTOOL_ID_ON or %ETHTOOL_ID_OFF and should set the state of
  *	the indicator accordingly.  Finally, it is called with the argument
  *	%ETHTOOL_ID_INACTIVE and must deactivate the indicator.  Returns a
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 41dee2de13ad..13d79f5a86e5 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1669,7 +1669,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 		return dev->ethtool_ops->phys_id(dev, id.data);
 
 	rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE);
-	if (rc && rc != -EINVAL)
+	if (rc < 0)
 		return rc;
 
 	/* Drop the RTNL lock while waiting, but prevent reentry or
@@ -1684,21 +1684,22 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 		schedule_timeout_interruptible(
 			id.data ? (id.data * HZ) : MAX_SCHEDULE_TIMEOUT);
 	} else {
-		/* Driver expects to be called periodically */
+		/* Driver expects to be called at twice the frequency in rc */
+		int n = rc * 2, i, interval = HZ / n;
+
+		/* Count down seconds */
 		do {
-			rtnl_lock();
-			rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ON);
-			rtnl_unlock();
-			if (rc)
-				break;
-			schedule_timeout_interruptible(HZ / 2);
-
-			rtnl_lock();
-			rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_OFF);
-			rtnl_unlock();
-			if (rc)
-				break;
-			schedule_timeout_interruptible(HZ / 2);
+			/* Count down iterations per second */
+			i = n;
+			do {
+				rtnl_lock();
+				rc = dev->ethtool_ops->set_phys_id(dev,
+				    (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON);
+				rtnl_unlock();
+				if (rc)
+					break;
+				schedule_timeout_interruptible(interval);
+			} while (!signal_pending(current) && --i != 0);
 		} while (!signal_pending(current) &&
 			 (id.data == 0 || --id.data != 0));
 	}
-- 
cgit v1.2.3-71-gd317


From 88b996cd0652280cc9b9fc70008fda15f14175e1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 15 Apr 2011 15:20:10 +0200
Subject: block: cleanup the block plug helper functions

It's a bit of a mess currently. task->plug is being cleared
and reset in __blk_finish_plug(), and blk_finish_plug() is
testing for a NULL plug which cannot happen even from schedule()
anymore since it uses blk_needs_flush_plug() to determine
whether to call into this function at all.

So get rid of some of the cruft.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c       | 24 ++++++------------------
 include/linux/blkdev.h |  6 +++---
 2 files changed, 9 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 36b1a7559f94..b598fa7720d4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2671,7 +2671,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth)
 		q->unplugged_fn(q);
 }
 
-static void flush_plug_list(struct blk_plug *plug)
+void blk_flush_plug_list(struct blk_plug *plug)
 {
 	struct request_queue *q;
 	unsigned long flags;
@@ -2733,28 +2733,16 @@ static void flush_plug_list(struct blk_plug *plug)
 
 	local_irq_restore(flags);
 }
-
-static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug)
-{
-	flush_plug_list(plug);
-
-	if (plug == tsk->plug)
-		tsk->plug = NULL;
-}
+EXPORT_SYMBOL(blk_flush_plug_list);
 
 void blk_finish_plug(struct blk_plug *plug)
 {
-	if (plug)
-		__blk_finish_plug(current, plug);
-}
-EXPORT_SYMBOL(blk_finish_plug);
+	blk_flush_plug_list(plug);
 
-void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug)
-{
-	__blk_finish_plug(tsk, plug);
-	tsk->plug = plug;
+	if (plug == current->plug)
+		current->plug = NULL;
 }
-EXPORT_SYMBOL(__blk_flush_plug);
+EXPORT_SYMBOL(blk_finish_plug);
 
 int __init blk_dev_init(void)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c07ffafac5d4..ffe48ff318f9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -865,14 +865,14 @@ struct blk_plug {
 
 extern void blk_start_plug(struct blk_plug *);
 extern void blk_finish_plug(struct blk_plug *);
-extern void __blk_flush_plug(struct task_struct *, struct blk_plug *);
+extern void blk_flush_plug_list(struct blk_plug *);
 
 static inline void blk_flush_plug(struct task_struct *tsk)
 {
 	struct blk_plug *plug = tsk->plug;
 
-	if (unlikely(plug))
-		__blk_flush_plug(tsk, plug);
+	if (plug)
+		blk_flush_plug_list(plug);
 }
 
 static inline bool blk_needs_flush_plug(struct task_struct *tsk)
-- 
cgit v1.2.3-71-gd317


From f6603783f9f099bf7a83b3f6c689bbbf74f0e96e Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Fri, 15 Apr 2011 15:49:07 +0200
Subject: block: only force kblockd unplugging from the schedule() path

For the explicit unplugging, we'd prefer to kick things off
immediately and not pay the penalty of the latency to switch
to kblockd. So let blk_finish_plug() do the run inline, while
the implicit-on-schedule-out unplug will punt to kblockd.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c       | 13 +++++++------
 include/linux/blkdev.h |  4 ++--
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index b598fa7720d4..3c8121072507 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2662,16 +2662,17 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
 	return !(rqa->q <= rqb->q);
 }
 
-static void queue_unplugged(struct request_queue *q, unsigned int depth)
+static void queue_unplugged(struct request_queue *q, unsigned int depth,
+			    bool force_kblockd)
 {
 	trace_block_unplug_io(q, depth);
-	__blk_run_queue(q, true);
+	__blk_run_queue(q, force_kblockd);
 
 	if (q->unplugged_fn)
 		q->unplugged_fn(q);
 }
 
-void blk_flush_plug_list(struct blk_plug *plug)
+void blk_flush_plug_list(struct blk_plug *plug, bool force_kblockd)
 {
 	struct request_queue *q;
 	unsigned long flags;
@@ -2706,7 +2707,7 @@ void blk_flush_plug_list(struct blk_plug *plug)
 		BUG_ON(!rq->q);
 		if (rq->q != q) {
 			if (q) {
-				queue_unplugged(q, depth);
+				queue_unplugged(q, depth, force_kblockd);
 				spin_unlock(q->queue_lock);
 			}
 			q = rq->q;
@@ -2727,7 +2728,7 @@ void blk_flush_plug_list(struct blk_plug *plug)
 	}
 
 	if (q) {
-		queue_unplugged(q, depth);
+		queue_unplugged(q, depth, force_kblockd);
 		spin_unlock(q->queue_lock);
 	}
 
@@ -2737,7 +2738,7 @@ EXPORT_SYMBOL(blk_flush_plug_list);
 
 void blk_finish_plug(struct blk_plug *plug)
 {
-	blk_flush_plug_list(plug);
+	blk_flush_plug_list(plug, false);
 
 	if (plug == current->plug)
 		current->plug = NULL;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ffe48ff318f9..1c76506fcf11 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -865,14 +865,14 @@ struct blk_plug {
 
 extern void blk_start_plug(struct blk_plug *);
 extern void blk_finish_plug(struct blk_plug *);
-extern void blk_flush_plug_list(struct blk_plug *);
+extern void blk_flush_plug_list(struct blk_plug *, bool);
 
 static inline void blk_flush_plug(struct task_struct *tsk)
 {
 	struct blk_plug *plug = tsk->plug;
 
 	if (plug)
-		blk_flush_plug_list(plug);
+		blk_flush_plug_list(plug, true);
 }
 
 static inline bool blk_needs_flush_plug(struct task_struct *tsk)
-- 
cgit v1.2.3-71-gd317


From a237c1c5bc5dc5c76a21be922dca4826f3eca8ca Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Sat, 16 Apr 2011 13:27:55 +0200
Subject: block: let io_schedule() flush the plug inline

Linus correctly observes that the most important dispatch cases
are now done from kblockd, this isn't ideal for latency reasons.
The original reason for switching dispatches out-of-line was to
avoid too deep a stack, so by _only_ letting the "accidental"
flush directly in schedule() be guarded by offload to kblockd,
we should be able to get the best of both worlds.

So add a blk_schedule_flush_plug() that offloads to kblockd,
and only use that from the schedule() path.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 include/linux/blkdev.h | 13 +++++++++++++
 kernel/sched.c         |  2 +-
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1c76506fcf11..ec0357d8c4a5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -871,6 +871,14 @@ static inline void blk_flush_plug(struct task_struct *tsk)
 {
 	struct blk_plug *plug = tsk->plug;
 
+	if (plug)
+		blk_flush_plug_list(plug, false);
+}
+
+static inline void blk_schedule_flush_plug(struct task_struct *tsk)
+{
+	struct blk_plug *plug = tsk->plug;
+
 	if (plug)
 		blk_flush_plug_list(plug, true);
 }
@@ -1317,6 +1325,11 @@ static inline void blk_flush_plug(struct task_struct *task)
 {
 }
 
+static inline void blk_schedule_flush_plug(struct task_struct *task)
+{
+}
+
+
 static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 {
 	return false;
diff --git a/kernel/sched.c b/kernel/sched.c
index a187c3fe027b..312f8b95c2d4 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4118,7 +4118,7 @@ need_resched:
 			 */
 			if (blk_needs_flush_plug(prev)) {
 				raw_spin_unlock(&rq->lock);
-				blk_flush_plug(prev);
+				blk_schedule_flush_plug(prev);
 				raw_spin_lock(&rq->lock);
 			}
 		}
-- 
cgit v1.2.3-71-gd317


From 048c9374a749a27f16493cea033fa4a8ff492356 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 18 Apr 2011 09:52:22 +0200
Subject: block: Enhance new plugging support to support general callbacks

md/raid requires an unplug callback, but as it does not uses
requests the current code cannot provide one.

So allow arbitrary callbacks to be attached to the blk_plug.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c       | 20 ++++++++++++++++++++
 include/linux/blkdev.h |  7 ++++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 78b7b0cb7216..77edf0512338 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2638,6 +2638,7 @@ void blk_start_plug(struct blk_plug *plug)
 
 	plug->magic = PLUG_MAGIC;
 	INIT_LIST_HEAD(&plug->list);
+	INIT_LIST_HEAD(&plug->cb_list);
 	plug->should_sort = 0;
 
 	/*
@@ -2678,6 +2679,24 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
 		q->unplugged_fn(q);
 }
 
+static void flush_plug_callbacks(struct blk_plug *plug)
+{
+	LIST_HEAD(callbacks);
+
+	if (list_empty(&plug->cb_list))
+		return;
+
+	list_splice_init(&plug->cb_list, &callbacks);
+
+	while (!list_empty(&callbacks)) {
+		struct blk_plug_cb *cb = list_first_entry(&callbacks,
+							  struct blk_plug_cb,
+							  list);
+		list_del(&cb->list);
+		cb->callback(cb);
+	}
+}
+
 void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 {
 	struct request_queue *q;
@@ -2688,6 +2707,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 
 	BUG_ON(plug->magic != PLUG_MAGIC);
 
+	flush_plug_callbacks(plug);
 	if (list_empty(&plug->list))
 		return;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ec0357d8c4a5..f3f7879391a7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -860,8 +860,13 @@ extern void blk_put_queue(struct request_queue *);
 struct blk_plug {
 	unsigned long magic;
 	struct list_head list;
+	struct list_head cb_list;
 	unsigned int should_sort;
 };
+struct blk_plug_cb {
+	struct list_head list;
+	void (*callback)(struct blk_plug_cb *);
+};
 
 extern void blk_start_plug(struct blk_plug *);
 extern void blk_finish_plug(struct blk_plug *);
@@ -887,7 +892,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 {
 	struct blk_plug *plug = tsk->plug;
 
-	return plug && !list_empty(&plug->list);
+	return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list));
 }
 
 /*
-- 
cgit v1.2.3-71-gd317


From b4cb290e0a7d19235bd075c2ad4d60dbab0bac15 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Mon, 18 Apr 2011 09:54:05 +0200
Subject: Revert "block: add callback function for unplug notification"

MD can't use this since it really requires us to be able to
keep more than a single piece of state for the unplug. Commit
048c9374 added the required support for MD, so get rid of this
now unused code.

This reverts commit f75664570d8b75469cc468f23c2b27220984983b.

Conflicts:

	block/blk-core.c

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c       |  3 ---
 block/blk-settings.c   | 16 ----------------
 include/linux/blkdev.h |  3 ---
 3 files changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 77edf0512338..09b262811fff 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2674,9 +2674,6 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
 {
 	trace_block_unplug(q, depth, !from_schedule);
 	__blk_run_queue(q, from_schedule);
-
-	if (q->unplugged_fn)
-		q->unplugged_fn(q);
 }
 
 static void flush_plug_callbacks(struct blk_plug *plug)
diff --git a/block/blk-settings.c b/block/blk-settings.c
index eb949045bb12..1fa769293597 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -790,22 +790,6 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush)
 }
 EXPORT_SYMBOL_GPL(blk_queue_flush);
 
-/**
- * blk_queue_unplugged - register a callback for an unplug event
- * @q:		the request queue for the device
- * @fn:		the function to call
- *
- * Some stacked drivers may need to know when IO is dispatched on an
- * unplug event. By registrering a callback here, they will be notified
- * when someone flushes their on-stack queue plug. The function will be
- * called with the queue lock held.
- */
-void blk_queue_unplugged(struct request_queue *q, unplugged_fn *fn)
-{
-	q->unplugged_fn = fn;
-}
-EXPORT_SYMBOL(blk_queue_unplugged);
-
 static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f3f7879391a7..3448d89297e8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -196,7 +196,6 @@ typedef void (request_fn_proc) (struct request_queue *q);
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
-typedef void (unplugged_fn) (struct request_queue *);
 
 struct bio_vec;
 struct bvec_merge_data {
@@ -284,7 +283,6 @@ struct request_queue
 	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
 	lld_busy_fn		*lld_busy_fn;
-	unplugged_fn		*unplugged_fn;
 
 	/*
 	 * Dispatch queue sorting
@@ -843,7 +841,6 @@ extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
-extern void blk_queue_unplugged(struct request_queue *, unplugged_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
-- 
cgit v1.2.3-71-gd317


From af1db72d8b340f97ad12b60175afdef43e6f0e60 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 18 Apr 2011 18:25:41 +1000
Subject: md/dm - remove remains of plug_fn callback.

Now that unplugging is done differently, the unplug_fn callback is
never called, so it can be completely discarded.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/dm-raid.c          | 8 --------
 include/linux/device-mapper.h | 1 -
 2 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 5ef136cdba91..e5d8904fc8f6 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -390,13 +390,6 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
 	return md_raid5_congested(&rs->md, bits);
 }
 
-static void raid_unplug(struct dm_target_callbacks *cb)
-{
-	struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
-
-	md_raid5_kick_device(rs->md.private);
-}
-
 /*
  * Construct a RAID4/5/6 mapping:
  * Args:
@@ -487,7 +480,6 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	}
 
 	rs->callbacks.congested_fn = raid_is_congested;
-	rs->callbacks.unplug_fn = raid_unplug;
 	dm_table_add_target_callbacks(ti->table, &rs->callbacks);
 
 	return 0;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index e2768834f397..32a4423710f5 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -197,7 +197,6 @@ struct dm_target {
 struct dm_target_callbacks {
 	struct list_head list;
 	int (*congested_fn) (struct dm_target_callbacks *, int);
-	void (*unplug_fn)(struct dm_target_callbacks *);
 };
 
 int dm_register_target(struct target_type *t);
-- 
cgit v1.2.3-71-gd317


From 1791f881435fab951939ad700e947b66c062e083 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Wed, 30 Mar 2011 15:24:21 +0200
Subject: posix clocks: Replace mutex with reader/writer semaphore

A dynamic posix clock is protected from asynchronous removal by a mutex.
However, using a mutex has the unwanted effect that a long running clock
operation in one process will unnecessarily block other processes.

For example, one process might call read() to get an external time stamp
coming in at one pulse per second. A second process calling clock_gettime
would have to wait for almost a whole second.

This patch fixes the issue by using a reader/writer semaphore instead of
a mutex.

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/%3C20110330132421.GA31771%40riccoc20.at.omicron.at%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/posix-clock.h |  5 +++--
 kernel/time/posix-clock.c   | 24 +++++++++---------------
 2 files changed, 12 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
index 369e19d3750b..7f1183dcd119 100644
--- a/include/linux/posix-clock.h
+++ b/include/linux/posix-clock.h
@@ -24,6 +24,7 @@
 #include <linux/fs.h>
 #include <linux/poll.h>
 #include <linux/posix-timers.h>
+#include <linux/rwsem.h>
 
 struct posix_clock;
 
@@ -104,7 +105,7 @@ struct posix_clock_operations {
  * @ops:     Functional interface to the clock
  * @cdev:    Character device instance for this clock
  * @kref:    Reference count.
- * @mutex:   Protects the 'zombie' field from concurrent access.
+ * @rwsem:   Protects the 'zombie' field from concurrent access.
  * @zombie:  If 'zombie' is true, then the hardware has disappeared.
  * @release: A function to free the structure when the reference count reaches
  *           zero. May be NULL if structure is statically allocated.
@@ -117,7 +118,7 @@ struct posix_clock {
 	struct posix_clock_operations ops;
 	struct cdev cdev;
 	struct kref kref;
-	struct mutex mutex;
+	struct rw_semaphore rwsem;
 	bool zombie;
 	void (*release)(struct posix_clock *clk);
 };
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index 25028dd4fa18..c340ca658f37 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -19,7 +19,6 @@
  */
 #include <linux/device.h>
 #include <linux/file.h>
-#include <linux/mutex.h>
 #include <linux/posix-clock.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
@@ -34,19 +33,19 @@ static struct posix_clock *get_posix_clock(struct file *fp)
 {
 	struct posix_clock *clk = fp->private_data;
 
-	mutex_lock(&clk->mutex);
+	down_read(&clk->rwsem);
 
 	if (!clk->zombie)
 		return clk;
 
-	mutex_unlock(&clk->mutex);
+	up_read(&clk->rwsem);
 
 	return NULL;
 }
 
 static void put_posix_clock(struct posix_clock *clk)
 {
-	mutex_unlock(&clk->mutex);
+	up_read(&clk->rwsem);
 }
 
 static ssize_t posix_clock_read(struct file *fp, char __user *buf,
@@ -156,7 +155,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp)
 	struct posix_clock *clk =
 		container_of(inode->i_cdev, struct posix_clock, cdev);
 
-	mutex_lock(&clk->mutex);
+	down_read(&clk->rwsem);
 
 	if (clk->zombie) {
 		err = -ENODEV;
@@ -172,7 +171,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp)
 		fp->private_data = clk;
 	}
 out:
-	mutex_unlock(&clk->mutex);
+	up_read(&clk->rwsem);
 	return err;
 }
 
@@ -211,25 +210,20 @@ int posix_clock_register(struct posix_clock *clk, dev_t devid)
 	int err;
 
 	kref_init(&clk->kref);
-	mutex_init(&clk->mutex);
+	init_rwsem(&clk->rwsem);
 
 	cdev_init(&clk->cdev, &posix_clock_file_operations);
 	clk->cdev.owner = clk->ops.owner;
 	err = cdev_add(&clk->cdev, devid, 1);
-	if (err)
-		goto no_cdev;
 
 	return err;
-no_cdev:
-	mutex_destroy(&clk->mutex);
-	return err;
 }
 EXPORT_SYMBOL_GPL(posix_clock_register);
 
 static void delete_clock(struct kref *kref)
 {
 	struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
-	mutex_destroy(&clk->mutex);
+
 	if (clk->release)
 		clk->release(clk);
 }
@@ -238,9 +232,9 @@ void posix_clock_unregister(struct posix_clock *clk)
 {
 	cdev_del(&clk->cdev);
 
-	mutex_lock(&clk->mutex);
+	down_write(&clk->rwsem);
 	clk->zombie = true;
-	mutex_unlock(&clk->mutex);
+	up_write(&clk->rwsem);
 
 	kref_put(&clk->kref, delete_clock);
 }
-- 
cgit v1.2.3-71-gd317


From 24ecfbe27f65563909b14492afda2f1c21f7c044 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 18 Apr 2011 11:41:33 +0200
Subject: block: add blk_run_queue_async

Instead of overloading __blk_run_queue to force an offload to kblockd
add a new blk_run_queue_async helper to do it explicitly.  I've kept
the blk_queue_stopped check for now, but I suspect it's not needed
as the check we do when the workqueue items runs should be enough.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c                 | 36 ++++++++++++++++++++++++------------
 block/blk-exec.c                 |  2 +-
 block/blk-flush.c                |  4 ++--
 block/blk.h                      |  1 +
 block/cfq-iosched.c              |  6 +++---
 block/elevator.c                 |  4 ++--
 drivers/scsi/scsi_lib.c          |  2 +-
 drivers/scsi/scsi_transport_fc.c |  2 +-
 include/linux/blkdev.h           |  2 +-
 9 files changed, 36 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index e2bacfa46cc3..5fa3dd2705c6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -204,7 +204,7 @@ static void blk_delay_work(struct work_struct *work)
 
 	q = container_of(work, struct request_queue, delay_work.work);
 	spin_lock_irq(q->queue_lock);
-	__blk_run_queue(q, false);
+	__blk_run_queue(q);
 	spin_unlock_irq(q->queue_lock);
 }
 
@@ -239,7 +239,7 @@ void blk_start_queue(struct request_queue *q)
 	WARN_ON(!irqs_disabled());
 
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
-	__blk_run_queue(q, false);
+	__blk_run_queue(q);
 }
 EXPORT_SYMBOL(blk_start_queue);
 
@@ -296,11 +296,9 @@ EXPORT_SYMBOL(blk_sync_queue);
  *
  * Description:
  *    See @blk_run_queue. This variant must be called with the queue lock
- *    held and interrupts disabled. If force_kblockd is true, then it is
- *    safe to call this without holding the queue lock.
- *
+ *    held and interrupts disabled.
  */
-void __blk_run_queue(struct request_queue *q, bool force_kblockd)
+void __blk_run_queue(struct request_queue *q)
 {
 	if (unlikely(blk_queue_stopped(q)))
 		return;
@@ -309,7 +307,7 @@ void __blk_run_queue(struct request_queue *q, bool force_kblockd)
 	 * Only recurse once to avoid overrunning the stack, let the unplug
 	 * handling reinvoke the handler shortly if we already got there.
 	 */
-	if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
+	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
 		q->request_fn(q);
 		queue_flag_clear(QUEUE_FLAG_REENTER, q);
 	} else
@@ -317,6 +315,20 @@ void __blk_run_queue(struct request_queue *q, bool force_kblockd)
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
+/**
+ * blk_run_queue_async - run a single device queue in workqueue context
+ * @q:	The queue to run
+ *
+ * Description:
+ *    Tells kblockd to perform the equivalent of @blk_run_queue on behalf
+ *    of us.
+ */
+void blk_run_queue_async(struct request_queue *q)
+{
+	if (likely(!blk_queue_stopped(q)))
+		queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
+}
+
 /**
  * blk_run_queue - run a single device queue
  * @q: The queue to run
@@ -330,7 +342,7 @@ void blk_run_queue(struct request_queue *q)
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	__blk_run_queue(q, false);
+	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_run_queue);
@@ -979,7 +991,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
 		blk_queue_end_tag(q, rq);
 
 	add_acct_request(q, rq, where);
-	__blk_run_queue(q, false);
+	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_insert_request);
@@ -1323,7 +1335,7 @@ get_rq:
 	} else {
 		spin_lock_irq(q->queue_lock);
 		add_acct_request(q, req, where);
-		__blk_run_queue(q, false);
+		__blk_run_queue(q);
 out_unlock:
 		spin_unlock_irq(q->queue_lock);
 	}
@@ -2684,9 +2696,9 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
 	 */
 	if (from_schedule) {
 		spin_unlock(q->queue_lock);
-		__blk_run_queue(q, true);
+		blk_run_queue_async(q);
 	} else {
-		__blk_run_queue(q, false);
+		__blk_run_queue(q);
 		spin_unlock(q->queue_lock);
 	}
 
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 7482b7fa863b..81e31819a597 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -55,7 +55,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	WARN_ON(irqs_disabled());
 	spin_lock_irq(q->queue_lock);
 	__elv_add_request(q, rq, where);
-	__blk_run_queue(q, false);
+	__blk_run_queue(q);
 	/* the queue is stopped so it won't be plugged+unplugged */
 	if (rq->cmd_type == REQ_TYPE_PM_RESUME)
 		q->request_fn(q);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index eba4a2790c6c..6c9b5e189e62 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -218,7 +218,7 @@ static void flush_end_io(struct request *flush_rq, int error)
 	 * request_fn may confuse the driver.  Always use kblockd.
 	 */
 	if (queued)
-		__blk_run_queue(q, true);
+		blk_run_queue_async(q);
 }
 
 /**
@@ -274,7 +274,7 @@ static void flush_data_end_io(struct request *rq, int error)
 	 * the comment in flush_end_io().
 	 */
 	if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
-		__blk_run_queue(q, true);
+		blk_run_queue_async(q);
 }
 
 /**
diff --git a/block/blk.h b/block/blk.h
index 61263463e38e..c9df8fc3c999 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -22,6 +22,7 @@ void blk_rq_timed_out_timer(unsigned long data);
 void blk_delete_timer(struct request *);
 void blk_add_timer(struct request *);
 void __generic_unplug_device(struct request_queue *);
+void blk_run_queue_async(struct request_queue *q);
 
 /*
  * Internal atomic flags for request handling
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 3be881ec95ad..46b0a1d1d925 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3368,7 +3368,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 			    cfqd->busy_queues > 1) {
 				cfq_del_timer(cfqd, cfqq);
 				cfq_clear_cfqq_wait_request(cfqq);
-				__blk_run_queue(cfqd->queue, false);
+				__blk_run_queue(cfqd->queue);
 			} else {
 				cfq_blkiocg_update_idle_time_stats(
 						&cfqq->cfqg->blkg);
@@ -3383,7 +3383,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		 * this new queue is RT and the current one is BE
 		 */
 		cfq_preempt_queue(cfqd, cfqq);
-		__blk_run_queue(cfqd->queue, false);
+		__blk_run_queue(cfqd->queue);
 	}
 }
 
@@ -3743,7 +3743,7 @@ static void cfq_kick_queue(struct work_struct *work)
 	struct request_queue *q = cfqd->queue;
 
 	spin_lock_irq(q->queue_lock);
-	__blk_run_queue(cfqd->queue, false);
+	__blk_run_queue(cfqd->queue);
 	spin_unlock_irq(q->queue_lock);
 }
 
diff --git a/block/elevator.c b/block/elevator.c
index 0cdb4e7ebab4..6f6abc08bb56 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -642,7 +642,7 @@ void elv_quiesce_start(struct request_queue *q)
 	 */
 	elv_drain_elevator(q);
 	while (q->rq.elvpriv) {
-		__blk_run_queue(q, false);
+		__blk_run_queue(q);
 		spin_unlock_irq(q->queue_lock);
 		msleep(10);
 		spin_lock_irq(q->queue_lock);
@@ -695,7 +695,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 		 *   with anything.  There's no point in delaying queue
 		 *   processing.
 		 */
-		__blk_run_queue(q, false);
+		__blk_run_queue(q);
 		break;
 
 	case ELEVATOR_INSERT_SORT_MERGE:
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 6d5c7ff43f5b..ab55c2fa7ce2 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -443,7 +443,7 @@ static void scsi_run_queue(struct request_queue *q)
 					&sdev->request_queue->queue_flags);
 		if (flagset)
 			queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
-		__blk_run_queue(sdev->request_queue, false);
+		__blk_run_queue(sdev->request_queue);
 		if (flagset)
 			queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue);
 		spin_unlock(sdev->request_queue->queue_lock);
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index fdf3fa639056..28c33506e4ad 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3829,7 +3829,7 @@ fc_bsg_goose_queue(struct fc_rport *rport)
 		  !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags);
 	if (flagset)
 		queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q);
-	__blk_run_queue(rport->rqst_q, false);
+	__blk_run_queue(rport->rqst_q);
 	if (flagset)
 		queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q);
 	spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3448d89297e8..cbbfd98ad4a3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -697,7 +697,7 @@ extern void blk_start_queue(struct request_queue *q);
 extern void blk_stop_queue(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(struct request_queue *q);
-extern void __blk_run_queue(struct request_queue *q, bool force_kblockd);
+extern void __blk_run_queue(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
-- 
cgit v1.2.3-71-gd317


From 80b4895aa4578e9372d76cd4063f82d0c3994d77 Mon Sep 17 00:00:00 2001
From: Jeff Brown <jeffbrown@google.com>
Date: Mon, 18 Apr 2011 10:08:02 -0700
Subject: Input: estimate number of events per packet

Calculate a default based on the number of ABS axes, REL axes,
and MT slots for the device during input device registration.

Signed-off-by: Jeff Brown <jeffbrown@android.com>
Reviewed-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/input.c    | 40 ++++++++++++++++++++++++++++++++++++++++
 include/linux/input/mt.h |  6 ++++++
 2 files changed, 46 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/input/input.c b/drivers/input/input.c
index d6e8bd8a851c..ebbceedc92f4 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1746,6 +1746,42 @@ void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int
 }
 EXPORT_SYMBOL(input_set_capability);
 
+static unsigned int input_estimate_events_per_packet(struct input_dev *dev)
+{
+	int mt_slots;
+	int i;
+	unsigned int events;
+
+	if (dev->mtsize) {
+		mt_slots = dev->mtsize;
+	} else if (test_bit(ABS_MT_TRACKING_ID, dev->absbit)) {
+		mt_slots = dev->absinfo[ABS_MT_TRACKING_ID].maximum -
+			   dev->absinfo[ABS_MT_TRACKING_ID].minimum + 1,
+		clamp(mt_slots, 2, 32);
+	} else if (test_bit(ABS_MT_POSITION_X, dev->absbit)) {
+		mt_slots = 2;
+	} else {
+		mt_slots = 0;
+	}
+
+	events = mt_slots + 1; /* count SYN_MT_REPORT and SYN_REPORT */
+
+	for (i = 0; i < ABS_CNT; i++) {
+		if (test_bit(i, dev->absbit)) {
+			if (input_is_mt_axis(i))
+				events += mt_slots;
+			else
+				events++;
+		}
+	}
+
+	for (i = 0; i < REL_CNT; i++)
+		if (test_bit(i, dev->relbit))
+			events++;
+
+	return events;
+}
+
 #define INPUT_CLEANSE_BITMASK(dev, type, bits)				\
 	do {								\
 		if (!test_bit(EV_##type, dev->evbit))			\
@@ -1793,6 +1829,10 @@ int input_register_device(struct input_dev *dev)
 	/* Make sure that bitmasks not mentioned in dev->evbit are clean. */
 	input_cleanse_bitmasks(dev);
 
+	if (!dev->hint_events_per_packet)
+		dev->hint_events_per_packet =
+				input_estimate_events_per_packet(dev);
+
 	/*
 	 * If delay and period are pre-set by the driver, then autorepeating
 	 * is handled by the driver itself and we don't do it in input.c.
diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h
index b3ac06a4435d..318bb82325a6 100644
--- a/include/linux/input/mt.h
+++ b/include/linux/input/mt.h
@@ -48,6 +48,12 @@ static inline void input_mt_slot(struct input_dev *dev, int slot)
 	input_event(dev, EV_ABS, ABS_MT_SLOT, slot);
 }
 
+static inline bool input_is_mt_axis(int axis)
+{
+	return axis == ABS_MT_SLOT ||
+		(axis >= ABS_MT_FIRST && axis <= ABS_MT_LAST);
+}
+
 void input_mt_report_slot_state(struct input_dev *dev,
 				unsigned int tool_type, bool active);
 
-- 
cgit v1.2.3-71-gd317


From c78193e9c7bcbf25b8237ad0dec82f805c4ea69b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 18 Apr 2011 10:35:30 -0700
Subject: next_pidmap: fix overflow condition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

next_pidmap() just quietly accepted whatever 'last' pid that was passed
in, which is not all that safe when one of the users is /proc.

Admittedly the proc code should do some sanity checking on the range
(and that will be the next commit), but that doesn't mean that the
helper functions should just do that pidmap pointer arithmetic without
checking the range of its arguments.

So clamp 'last' to PID_MAX_LIMIT.  The fact that we then do "last+1"
doesn't really matter, the for-loop does check against the end of the
pidmap array properly (it's only the actual pointer arithmetic overflow
case we need to worry about, and going one bit beyond isn't going to
overflow).

[ Use PID_MAX_LIMIT rather than pid_max as per Eric Biederman ]

Reported-by: Tavis Ormandy <taviso@cmpxchg8b.com>
Analyzed-by: Robert Święcki <robert@swiecki.net>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid.h | 2 +-
 kernel/pid.c        | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 31afb7ecbe1f..cdced84261d7 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -117,7 +117,7 @@ extern struct pid *find_vpid(int nr);
  */
 extern struct pid *find_get_pid(int nr);
 extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
-int next_pidmap(struct pid_namespace *pid_ns, int last);
+int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);
 
 extern struct pid *alloc_pid(struct pid_namespace *ns);
 extern void free_pid(struct pid *pid);
diff --git a/kernel/pid.c b/kernel/pid.c
index 02f221274265..57a8346a270e 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -217,11 +217,14 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
 	return -1;
 }
 
-int next_pidmap(struct pid_namespace *pid_ns, int last)
+int next_pidmap(struct pid_namespace *pid_ns, unsigned int last)
 {
 	int offset;
 	struct pidmap *map, *end;
 
+	if (last >= PID_MAX_LIMIT)
+		return -1;
+
 	offset = (last + 1) & BITS_PER_PAGE_MASK;
 	map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE];
 	end = &pid_ns->pidmap[PIDMAP_ENTRIES];
-- 
cgit v1.2.3-71-gd317


From 468f86134ee515234afe5c5b3f39f266c50e61a5 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Mon, 18 Apr 2011 15:57:32 -0400
Subject: NFSv4.1: Don't update sequence number if rpc_task is not sent

If we fail to contact the gss upcall program, then no message will
be sent to the server.  The client still updated the sequence number,
however, and this lead to NFS4ERR_SEQ_MISMATCH for the next several
RPC calls.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c            | 4 ++--
 include/linux/sunrpc/sched.h | 2 ++
 net/sunrpc/xprt.c            | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b8e1ac69b743..e7e2077eebd9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -444,8 +444,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 	if (res->sr_status == 1)
 		res->sr_status = NFS_OK;
 
-	/* -ERESTARTSYS can result in skipping nfs41_sequence_setup */
-	if (!res->sr_slot)
+	/* don't increment the sequence number if the task wasn't sent */
+	if (!RPC_WAS_SENT(task))
 		goto out;
 
 	/* Check the SEQUENCE operation status */
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index d81db8012c63..3b94f804b852 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -127,6 +127,7 @@ struct rpc_task_setup {
 #define RPC_TASK_KILLED		0x0100		/* task was killed */
 #define RPC_TASK_SOFT		0x0200		/* Use soft timeouts */
 #define RPC_TASK_SOFTCONN	0x0400		/* Fail if can't connect */
+#define RPC_TASK_SENT		0x0800		/* message was sent */
 
 #define RPC_IS_ASYNC(t)		((t)->tk_flags & RPC_TASK_ASYNC)
 #define RPC_IS_SWAPPER(t)	((t)->tk_flags & RPC_TASK_SWAPPER)
@@ -134,6 +135,7 @@ struct rpc_task_setup {
 #define RPC_ASSASSINATED(t)	((t)->tk_flags & RPC_TASK_KILLED)
 #define RPC_IS_SOFT(t)		((t)->tk_flags & RPC_TASK_SOFT)
 #define RPC_IS_SOFTCONN(t)	((t)->tk_flags & RPC_TASK_SOFTCONN)
+#define RPC_WAS_SENT(t)		((t)->tk_flags & RPC_TASK_SENT)
 
 #define RPC_TASK_RUNNING	0
 #define RPC_TASK_QUEUED		1
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 9494c3767356..ce5eb68a9664 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -906,6 +906,7 @@ void xprt_transmit(struct rpc_task *task)
 	}
 
 	dprintk("RPC: %5u xmit complete\n", task->tk_pid);
+	task->tk_flags |= RPC_TASK_SENT;
 	spin_lock_bh(&xprt->transport_lock);
 
 	xprt->ops->set_retrans_timeout(task);
-- 
cgit v1.2.3-71-gd317


From c21e6beba8835d09bb80e34961430b13e60381c5 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Tue, 19 Apr 2011 13:32:46 +0200
Subject: block: get rid of QUEUE_FLAG_REENTER

We are currently using this flag to check whether it's safe
to call into ->request_fn(). If it is set, we punt to kblockd.
But we get a lot of false positives and excessive punts to
kblockd, which hurts performance.

The only real abuser of this infrastructure is SCSI. So export
the async queue run and convert SCSI over to use that. There's
room for improvement in that SCSI need not always use the async
call, but this fixes our performance issue and they can fix that
up in due time.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c                 | 11 ++---------
 block/blk.h                      |  1 -
 drivers/scsi/scsi_lib.c          | 17 +----------------
 drivers/scsi/scsi_transport_fc.c | 19 ++++---------------
 include/linux/blkdev.h           | 26 +++++++++++++-------------
 5 files changed, 20 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 580eee5743e5..40725b9091f1 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -303,15 +303,7 @@ void __blk_run_queue(struct request_queue *q)
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 
-	/*
-	 * Only recurse once to avoid overrunning the stack, let the unplug
-	 * handling reinvoke the handler shortly if we already got there.
-	 */
-	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
-		q->request_fn(q);
-		queue_flag_clear(QUEUE_FLAG_REENTER, q);
-	} else
-		queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
+	q->request_fn(q);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
@@ -328,6 +320,7 @@ void blk_run_queue_async(struct request_queue *q)
 	if (likely(!blk_queue_stopped(q)))
 		queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
 }
+EXPORT_SYMBOL(blk_run_queue_async);
 
 /**
  * blk_run_queue - run a single device queue
diff --git a/block/blk.h b/block/blk.h
index c9df8fc3c999..61263463e38e 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -22,7 +22,6 @@ void blk_rq_timed_out_timer(unsigned long data);
 void blk_delete_timer(struct request *);
 void blk_add_timer(struct request *);
 void __generic_unplug_device(struct request_queue *);
-void blk_run_queue_async(struct request_queue *q);
 
 /*
  * Internal atomic flags for request handling
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ab55c2fa7ce2..e9901b8f8443 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -411,8 +411,6 @@ static void scsi_run_queue(struct request_queue *q)
 	list_splice_init(&shost->starved_list, &starved_list);
 
 	while (!list_empty(&starved_list)) {
-		int flagset;
-
 		/*
 		 * As long as shost is accepting commands and we have
 		 * starved queues, call blk_run_queue. scsi_request_fn
@@ -435,20 +433,7 @@ static void scsi_run_queue(struct request_queue *q)
 			continue;
 		}
 
-		spin_unlock(shost->host_lock);
-
-		spin_lock(sdev->request_queue->queue_lock);
-		flagset = test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) &&
-				!test_bit(QUEUE_FLAG_REENTER,
-					&sdev->request_queue->queue_flags);
-		if (flagset)
-			queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
-		__blk_run_queue(sdev->request_queue);
-		if (flagset)
-			queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue);
-		spin_unlock(sdev->request_queue->queue_lock);
-
-		spin_lock(shost->host_lock);
+		blk_run_queue_async(sdev->request_queue);
 	}
 	/* put any unprocessed entries back */
 	list_splice(&starved_list, &shost->starved_list);
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 28c33506e4ad..815069d13f9b 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3816,28 +3816,17 @@ fail_host_msg:
 static void
 fc_bsg_goose_queue(struct fc_rport *rport)
 {
-	int flagset;
-	unsigned long flags;
-
 	if (!rport->rqst_q)
 		return;
 
+	/*
+	 * This get/put dance makes no sense
+	 */
 	get_device(&rport->dev);
-
-	spin_lock_irqsave(rport->rqst_q->queue_lock, flags);
-	flagset = test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags) &&
-		  !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags);
-	if (flagset)
-		queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q);
-	__blk_run_queue(rport->rqst_q);
-	if (flagset)
-		queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q);
-	spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags);
-
+	blk_run_queue_async(rport->rqst_q);
 	put_device(&rport->dev);
 }
 
-
 /**
  * fc_bsg_rport_dispatch - process rport bsg requests and dispatch to LLDD
  * @q:		rport request queue
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cbbfd98ad4a3..2ad95fa1d130 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -388,20 +388,19 @@ struct request_queue
 #define	QUEUE_FLAG_SYNCFULL	3	/* read queue has been filled */
 #define QUEUE_FLAG_ASYNCFULL	4	/* write queue has been filled */
 #define QUEUE_FLAG_DEAD		5	/* queue being torn down */
-#define QUEUE_FLAG_REENTER	6	/* Re-entrancy avoidance */
-#define QUEUE_FLAG_ELVSWITCH	7	/* don't use elevator, just do FIFO */
-#define QUEUE_FLAG_BIDI		8	/* queue supports bidi requests */
-#define QUEUE_FLAG_NOMERGES     9	/* disable merge attempts */
-#define QUEUE_FLAG_SAME_COMP   10	/* force complete on same CPU */
-#define QUEUE_FLAG_FAIL_IO     11	/* fake timeout */
-#define QUEUE_FLAG_STACKABLE   12	/* supports request stacking */
-#define QUEUE_FLAG_NONROT      13	/* non-rotational device (SSD) */
+#define QUEUE_FLAG_ELVSWITCH	6	/* don't use elevator, just do FIFO */
+#define QUEUE_FLAG_BIDI		7	/* queue supports bidi requests */
+#define QUEUE_FLAG_NOMERGES     8	/* disable merge attempts */
+#define QUEUE_FLAG_SAME_COMP	9	/* force complete on same CPU */
+#define QUEUE_FLAG_FAIL_IO     10	/* fake timeout */
+#define QUEUE_FLAG_STACKABLE   11	/* supports request stacking */
+#define QUEUE_FLAG_NONROT      12	/* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
-#define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
-#define QUEUE_FLAG_DISCARD     16	/* supports DISCARD */
-#define QUEUE_FLAG_NOXMERGES   17	/* No extended merges */
-#define QUEUE_FLAG_ADD_RANDOM  18	/* Contributes to random pool */
-#define QUEUE_FLAG_SECDISCARD  19	/* supports SECDISCARD */
+#define QUEUE_FLAG_IO_STAT     13	/* do IO stats */
+#define QUEUE_FLAG_DISCARD     14	/* supports DISCARD */
+#define QUEUE_FLAG_NOXMERGES   15	/* No extended merges */
+#define QUEUE_FLAG_ADD_RANDOM  16	/* Contributes to random pool */
+#define QUEUE_FLAG_SECDISCARD  17	/* supports SECDISCARD */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -699,6 +698,7 @@ extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(struct request_queue *q);
 extern void __blk_run_queue(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
+extern void blk_run_queue_async(struct request_queue *q);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
 			   gfp_t);
-- 
cgit v1.2.3-71-gd317


From 62c7d085e1f2a1f2b4d89560551eff18d703b3b1 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <coelho@ti.com>
Date: Thu, 10 Mar 2011 16:42:47 +0200
Subject: wl12xx: add new board_tcxo_clock element to the platform data

This new value is a new type of clock setting that is used by wl128x
chipsets.

Signed-off-by: Luciano Coelho <coelho@ti.com>
---
 include/linux/wl12xx.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
index bebb8efea0a6..eb8aacab8d4e 100644
--- a/include/linux/wl12xx.h
+++ b/include/linux/wl12xx.h
@@ -24,7 +24,7 @@
 #ifndef _LINUX_WL12XX_H
 #define _LINUX_WL12XX_H
 
-/* The board reference clock values */
+/* Reference clock values */
 enum {
 	WL12XX_REFCLOCK_19 = 0,	/* 19.2 MHz */
 	WL12XX_REFCLOCK_26 = 1,	/* 26 MHz */
@@ -32,12 +32,25 @@ enum {
 	WL12XX_REFCLOCK_54 = 3,	/* 54 MHz */
 };
 
+/* TCXO clock values */
+enum {
+	WL12XX_TCXOCLOCK_19_2	= 0, /* 19.2MHz */
+	WL12XX_TCXOCLOCK_26	= 1, /* 26 MHz */
+	WL12XX_TCXOCLOCK_38_4	= 2, /* 38.4MHz */
+	WL12XX_TCXOCLOCK_52	= 3, /* 52 MHz */
+	WL12XX_TCXOCLOCK_16_368	= 4, /* 16.368 MHz */
+	WL12XX_TCXOCLOCK_32_736	= 5, /* 32.736 MHz */
+	WL12XX_TCXOCLOCK_16_8	= 6, /* 16.8 MHz */
+	WL12XX_TCXOCLOCK_33_6	= 7, /* 33.6 MHz */
+};
+
 struct wl12xx_platform_data {
 	void (*set_power)(bool enable);
 	/* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */
 	int irq;
 	bool use_eeprom;
 	int board_ref_clock;
+	int board_tcxo_clock;
 };
 
 #ifdef CONFIG_WL12XX_PLATFORM_DATA
-- 
cgit v1.2.3-71-gd317


From d29633b40e6afc6b4276a4e381bc532cc84be104 Mon Sep 17 00:00:00 2001
From: Ido Yariv <ido@wizery.com>
Date: Thu, 31 Mar 2011 10:06:57 +0200
Subject: wl12xx: Clean up and fix the 128x boot sequence

Clean up the boot sequence code & fix the following issues:
1. Always read the registers' values and set the relevant bits instead of
   zeroing all other bits
2. Handle cases where wl1271_top_reg_read returns an error
3. Verify that the HW can detect the selected clock source
4. Remove 128x PG10 initialization code
5. Configure the MCS PLL to work in HP mode

Signed-off-by: Ido Yariv <ido@wizery.com>
Reviewed-by: Luciano Coelho <coelho@ti.com>
Signed-off-by: Luciano Coelho <coelho@ti.com>
---
 drivers/net/wireless/wl12xx/boot.c | 235 ++++++++++++++++++-------------------
 drivers/net/wireless/wl12xx/boot.h |   1 +
 include/linux/wl12xx.h             |  10 +-
 3 files changed, 123 insertions(+), 123 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/wl12xx/boot.c b/drivers/net/wireless/wl12xx/boot.c
index 34bf2fe47dc7..b5ec2c2b6f78 100644
--- a/drivers/net/wireless/wl12xx/boot.c
+++ b/drivers/net/wireless/wl12xx/boot.c
@@ -523,137 +523,137 @@ static void wl1271_boot_hw_version(struct wl1271 *wl)
 		wl->quirks |= WL12XX_QUIRK_END_OF_TRANSACTION;
 }
 
-/*
- * WL128x has two clocks input - TCXO and FREF.
- * TCXO is the main clock of the device, while FREF is used to sync
- * between the GPS and the cellular modem.
- * In cases where TCXO is 32.736MHz or 16.368MHz, the FREF will be used
- * as the WLAN/BT main clock.
- */
-static int wl128x_switch_fref(struct wl1271 *wl, bool *is_ref_clk)
+static int wl128x_switch_tcxo_to_fref(struct wl1271 *wl)
 {
-	u16 sys_clk_cfg_val;
+	u16 spare_reg;
 
-	/* if working on XTAL-only mode go directly to TCXO TO FREF SWITCH */
-	if ((wl->ref_clock == CONF_REF_CLK_38_4_M_XTAL) ||
-	    (wl->ref_clock == CONF_REF_CLK_26_M_XTAL))
-		return true;
+	/* Mask bits [2] & [8:4] in the sys_clk_cfg register */
+	spare_reg = wl1271_top_reg_read(wl, WL_SPARE_REG);
+	if (spare_reg == 0xFFFF)
+		return -EFAULT;
+	spare_reg |= (BIT(3) | BIT(5) | BIT(6));
+	wl1271_top_reg_write(wl, WL_SPARE_REG, spare_reg);
 
-	/* Read clock source FREF or TCXO */
-	sys_clk_cfg_val = wl1271_top_reg_read(wl, SYS_CLK_CFG_REG);
+	/* Enable FREF_CLK_REQ & mux MCS and coex PLLs to FREF */
+	wl1271_top_reg_write(wl, SYS_CLK_CFG_REG,
+			     WL_CLK_REQ_TYPE_PG2 | MCS_PLL_CLK_SEL_FREF);
 
-	if (sys_clk_cfg_val & PRCM_CM_EN_MUX_WLAN_FREF) {
-		/* if bit 3 is set - working with FREF clock */
-		wl1271_debug(DEBUG_BOOT, "working with FREF clock, skip"
-			     " to FREF");
+	/* Delay execution for 15msec, to let the HW settle */
+	mdelay(15);
 
-		*is_ref_clk = true;
-	} else {
-		/* if bit 3 is clear - working with TCXO clock */
-		wl1271_debug(DEBUG_BOOT, "working with TCXO clock");
-
-		/* TCXO to FREF switch, check TXCO clock config */
-		if ((wl->tcxo_clock != WL12XX_TCXOCLOCK_16_368) &&
-		    (wl->tcxo_clock != WL12XX_TCXOCLOCK_32_736)) {
-			/*
-			 * not 16.368Mhz and not 32.736Mhz - skip to
-			 * configure ELP stage
-			 */
-			wl1271_debug(DEBUG_BOOT, "NEW PLL ALGO:"
-				     " TcxoRefClk=%d - not 16.368Mhz and not"
-				     " 32.736Mhz - skip to configure ELP"
-				     " stage", wl->tcxo_clock);
-
-			*is_ref_clk = false;
-		} else {
-			wl1271_debug(DEBUG_BOOT, "NEW PLL ALGO:"
-				     "TcxoRefClk=%d - 16.368Mhz or 32.736Mhz"
-				     " - TCXO to FREF switch",
-				     wl->tcxo_clock);
+	return 0;
+}
 
-			return true;
-		}
-	}
+static bool wl128x_is_tcxo_valid(struct wl1271 *wl)
+{
+	u16 tcxo_detection;
+
+	tcxo_detection = wl1271_top_reg_read(wl, TCXO_CLK_DETECT_REG);
+	if (tcxo_detection & TCXO_DET_FAILED)
+		return false;
 
-	return false;
+	return true;
 }
 
-static int wl128x_boot_clk(struct wl1271 *wl, bool *is_ref_clk)
+static bool wl128x_is_fref_valid(struct wl1271 *wl)
 {
-	if (wl128x_switch_fref(wl, is_ref_clk)) {
-		wl1271_debug(DEBUG_BOOT, "XTAL-only mode go directly to"
-					 " TCXO TO FREF SWITCH");
-		/* TCXO to FREF switch - for PG2.0 */
-		wl1271_top_reg_write(wl, WL_SPARE_REG,
-				     WL_SPARE_MASK_8526);
-
-		wl1271_top_reg_write(wl, SYS_CLK_CFG_REG,
-			WL_CLK_REQ_TYPE_PG2 | MCS_PLL_CLK_SEL_FREF);
-
-		*is_ref_clk = true;
-		mdelay(15);
-	}
+	u16 fref_detection;
 
-	/* Set bit 2 in spare register to avoid illegal access */
-	wl1271_top_reg_write(wl, WL_SPARE_REG, WL_SPARE_VAL);
+	fref_detection = wl1271_top_reg_read(wl, FREF_CLK_DETECT_REG);
+	if (fref_detection & FREF_CLK_DETECT_FAIL)
+		return false;
 
-	/* working with TCXO clock */
-	if ((*is_ref_clk == false) &&
-	    ((wl->tcxo_clock == WL12XX_TCXOCLOCK_16_8) ||
-	     (wl->tcxo_clock == WL12XX_TCXOCLOCK_33_6))) {
-		wl1271_debug(DEBUG_BOOT, "16_8_M or 33_6_M TCXO detected");
+	return true;
+}
 
-		/* Manually Configure MCS PLL settings PG2.0 Only */
-		wl1271_top_reg_write(wl, MCS_PLL_M_REG, MCS_PLL_M_REG_VAL);
-		wl1271_top_reg_write(wl, MCS_PLL_N_REG, MCS_PLL_N_REG_VAL);
-		wl1271_top_reg_write(wl, MCS_PLL_CONFIG_REG,
-				     MCS_PLL_CONFIG_REG_VAL);
-	} else {
-		int pll_config;
-		u16 mcs_pll_config_val;
+static int wl128x_manually_configure_mcs_pll(struct wl1271 *wl)
+{
+	wl1271_top_reg_write(wl, MCS_PLL_M_REG, MCS_PLL_M_REG_VAL);
+	wl1271_top_reg_write(wl, MCS_PLL_N_REG, MCS_PLL_N_REG_VAL);
+	wl1271_top_reg_write(wl, MCS_PLL_CONFIG_REG, MCS_PLL_CONFIG_REG_VAL);
 
-		/*
-		 * Configure MCS PLL settings to FREF Freq
-		 * Set the values that determine the time elapse since the PLL's
-		 * get their enable signal until the lock indication is set
-		 */
-		wl1271_top_reg_write(wl, PLL_LOCK_COUNTERS_REG,
-			PLL_LOCK_COUNTERS_COEX | PLL_LOCK_COUNTERS_MCS);
+	return 0;
+}
 
-		mcs_pll_config_val = wl1271_top_reg_read(wl,
-						 MCS_PLL_CONFIG_REG);
-		/*
-		 * Set the MCS PLL input frequency value according to the
-		 * reference clock value detected/read
-		 */
-		if (*is_ref_clk == false) {
-			if ((wl->tcxo_clock == WL12XX_TCXOCLOCK_19_2) ||
-			    (wl->tcxo_clock == WL12XX_TCXOCLOCK_38_4))
-				pll_config = 1;
-			else if ((wl->tcxo_clock == WL12XX_TCXOCLOCK_26)
-				 ||
-				 (wl->tcxo_clock == WL12XX_TCXOCLOCK_52))
-				pll_config = 2;
-			else
-				return -EINVAL;
-		} else {
-			if ((wl->ref_clock == CONF_REF_CLK_19_2_E) ||
-			    (wl->ref_clock == CONF_REF_CLK_38_4_E))
-				pll_config = 1;
-			else if ((wl->ref_clock == CONF_REF_CLK_26_E) ||
-				 (wl->ref_clock == CONF_REF_CLK_52_E))
-				pll_config = 2;
-			else
-				return -EINVAL;
-		}
+static int wl128x_configure_mcs_pll(struct wl1271 *wl, int clk)
+{
+	u16 spare_reg;
+	u16 pll_config;
+	u8 input_freq;
+
+	/* Mask bits [3:1] in the sys_clk_cfg register */
+	spare_reg = wl1271_top_reg_read(wl, WL_SPARE_REG);
+	if (spare_reg == 0xFFFF)
+		return -EFAULT;
+	spare_reg |= BIT(2);
+	wl1271_top_reg_write(wl, WL_SPARE_REG, spare_reg);
+
+	/* Handle special cases of the TCXO clock */
+	if (wl->tcxo_clock == WL12XX_TCXOCLOCK_16_8 ||
+	    wl->tcxo_clock == WL12XX_TCXOCLOCK_33_6)
+		return wl128x_manually_configure_mcs_pll(wl);
+
+	/* Set the input frequency according to the selected clock source */
+	input_freq = (clk & 1) + 1;
+
+	pll_config = wl1271_top_reg_read(wl, MCS_PLL_CONFIG_REG);
+	if (pll_config == 0xFFFF)
+		return -EFAULT;
+	pll_config |= (input_freq << MCS_SEL_IN_FREQ_SHIFT);
+	pll_config |= MCS_PLL_ENABLE_HP;
+	wl1271_top_reg_write(wl, MCS_PLL_CONFIG_REG, pll_config);
 
-		mcs_pll_config_val |= (pll_config << (MCS_SEL_IN_FREQ_SHIFT)) &
-				      (MCS_SEL_IN_FREQ_MASK);
-		wl1271_top_reg_write(wl, MCS_PLL_CONFIG_REG,
-				     mcs_pll_config_val);
+	return 0;
+}
+
+/*
+ * WL128x has two clocks input - TCXO and FREF.
+ * TCXO is the main clock of the device, while FREF is used to sync
+ * between the GPS and the cellular modem.
+ * In cases where TCXO is 32.736MHz or 16.368MHz, the FREF will be used
+ * as the WLAN/BT main clock.
+ */
+static int wl128x_boot_clk(struct wl1271 *wl, int *selected_clock)
+{
+	u16 sys_clk_cfg;
+
+	/* For XTAL-only modes, FREF will be used after switching from TCXO */
+	if (wl->ref_clock == WL12XX_REFCLOCK_26_XTAL ||
+	    wl->ref_clock == WL12XX_REFCLOCK_38_XTAL) {
+		if (!wl128x_switch_tcxo_to_fref(wl))
+			return -EINVAL;
+		goto fref_clk;
 	}
 
-	return 0;
+	/* Query the HW, to determine which clock source we should use */
+	sys_clk_cfg = wl1271_top_reg_read(wl, SYS_CLK_CFG_REG);
+	if (sys_clk_cfg == 0xFFFF)
+		return -EINVAL;
+	if (sys_clk_cfg & PRCM_CM_EN_MUX_WLAN_FREF)
+		goto fref_clk;
+
+	/* If TCXO is either 32.736MHz or 16.368MHz, switch to FREF */
+	if (wl->tcxo_clock == WL12XX_TCXOCLOCK_16_368 ||
+	    wl->tcxo_clock == WL12XX_TCXOCLOCK_32_736) {
+		if (!wl128x_switch_tcxo_to_fref(wl))
+			return -EINVAL;
+		goto fref_clk;
+	}
+
+	/* TCXO clock is selected */
+	if (!wl128x_is_tcxo_valid(wl))
+		return -EINVAL;
+	*selected_clock = wl->tcxo_clock;
+	goto config_mcs_pll;
+
+fref_clk:
+	/* FREF clock is selected */
+	if (!wl128x_is_fref_valid(wl))
+		return -EINVAL;
+	*selected_clock = wl->ref_clock;
+
+config_mcs_pll:
+	return wl128x_configure_mcs_pll(wl, *selected_clock);
 }
 
 static int wl127x_boot_clk(struct wl1271 *wl)
@@ -713,10 +713,10 @@ int wl1271_load_firmware(struct wl1271 *wl)
 {
 	int ret = 0;
 	u32 tmp, clk;
-	bool is_ref_clk = false;
+	int selected_clock = -1;
 
 	if (wl->chip.id == CHIP_ID_1283_PG20) {
-		ret = wl128x_boot_clk(wl, &is_ref_clk);
+		ret = wl128x_boot_clk(wl, &selected_clock);
 		if (ret < 0)
 			goto out;
 	} else {
@@ -741,10 +741,7 @@ int wl1271_load_firmware(struct wl1271 *wl)
 	wl1271_debug(DEBUG_BOOT, "clk2 0x%x", clk);
 
 	if (wl->chip.id == CHIP_ID_1283_PG20) {
-		if (is_ref_clk == false)
-			clk |= ((wl->tcxo_clock & 0x3) << 1) << 4;
-		else
-			clk |= ((wl->ref_clock & 0x3) << 1) << 4;
+		clk |= ((selected_clock & 0x3) << 1) << 4;
 	} else {
 		clk |= (wl->ref_clock << 1) << 4;
 	}
diff --git a/drivers/net/wireless/wl12xx/boot.h b/drivers/net/wireless/wl12xx/boot.h
index 1f5ee31dc0b1..d9de64ac1442 100644
--- a/drivers/net/wireless/wl12xx/boot.h
+++ b/drivers/net/wireless/wl12xx/boot.h
@@ -107,6 +107,7 @@ struct wl1271_static_data {
 #define MCS_SEL_IN_FREQ_MASK         0x0070
 #define MCS_SEL_IN_FREQ_SHIFT        4
 #define MCS_PLL_CONFIG_REG_VAL       0x73
+#define MCS_PLL_ENABLE_HP            (BIT(0) | BIT(1))
 
 #define MCS_PLL_M_REG                0xD94
 #define MCS_PLL_N_REG                0xD96
diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
index eb8aacab8d4e..c1a743ea7470 100644
--- a/include/linux/wl12xx.h
+++ b/include/linux/wl12xx.h
@@ -26,10 +26,12 @@
 
 /* Reference clock values */
 enum {
-	WL12XX_REFCLOCK_19 = 0,	/* 19.2 MHz */
-	WL12XX_REFCLOCK_26 = 1,	/* 26 MHz */
-	WL12XX_REFCLOCK_38 = 2,	/* 38.4 MHz */
-	WL12XX_REFCLOCK_54 = 3,	/* 54 MHz */
+	WL12XX_REFCLOCK_19	= 0, /* 19.2 MHz */
+	WL12XX_REFCLOCK_26	= 1, /* 26 MHz */
+	WL12XX_REFCLOCK_38	= 2, /* 38.4 MHz */
+	WL12XX_REFCLOCK_52	= 3, /* 52 MHz */
+	WL12XX_REFCLOCK_38_XTAL = 4, /* 38.4 MHz, XTAL */
+	WL12XX_REFCLOCK_26_XTAL = 5, /* 26 MHz, XTAL */
 };
 
 /* TCXO clock values */
-- 
cgit v1.2.3-71-gd317


From 341b7cde6ccc60672fcd7fc84dd24a1b7c0b8d94 Mon Sep 17 00:00:00 2001
From: Ido Yariv <ido@wizery.com>
Date: Thu, 31 Mar 2011 10:07:01 +0200
Subject: wl12xx: Handle platforms without level trigger interrupts

Some platforms are incapable of triggering on level interrupts. Add a
platform quirks member in the platform data structure, as well as an
edge interrupt quirk which can be set on such platforms.

When the interrupt is requested with IRQF_TRIGGER_RISING, IRQF_ONESHOT
cannot be used, as we might miss interrupts that occur after the FW
status is cleared and before the threaded interrupt handler exits.

Moreover, when IRQF_ONESHOT is not set, iterating more than once in the
threaded interrupt handler introduces a few race conditions between this
handler and the hardirq handler. Currently this is worked around by
limiting the loop to one iteration only. This workaround has an impact
on performance. To remove to this restriction, the race conditions will
need to be addressed.

Signed-off-by: Ido Yariv <ido@wizery.com>
Signed-off-by: Luciano Coelho <coelho@ti.com>
---
 drivers/net/wireless/wl12xx/main.c   | 9 +++++++++
 drivers/net/wireless/wl12xx/sdio.c   | 9 ++++++++-
 drivers/net/wireless/wl12xx/spi.c    | 9 ++++++++-
 drivers/net/wireless/wl12xx/wl12xx.h | 3 +++
 include/linux/wl12xx.h               | 4 ++++
 5 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/wl12xx/main.c b/drivers/net/wireless/wl12xx/main.c
index 1feb9551ef8f..7126506611c1 100644
--- a/drivers/net/wireless/wl12xx/main.c
+++ b/drivers/net/wireless/wl12xx/main.c
@@ -30,6 +30,7 @@
 #include <linux/vmalloc.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/wl12xx.h>
 
 #include "wl12xx.h"
 #include "wl12xx_80211.h"
@@ -719,6 +720,13 @@ irqreturn_t wl1271_irq(int irq, void *cookie)
 	set_bit(WL1271_FLAG_TX_PENDING, &wl->flags);
 	cancel_work_sync(&wl->tx_work);
 
+	/*
+	 * In case edge triggered interrupt must be used, we cannot iterate
+	 * more than once without introducing race conditions with the hardirq.
+	 */
+	if (wl->platform_quirks & WL12XX_PLATFORM_QUIRK_EDGE_IRQ)
+		loopcount = 1;
+
 	mutex_lock(&wl->mutex);
 
 	wl1271_debug(DEBUG_IRQ, "IRQ work");
@@ -3648,6 +3656,7 @@ struct ieee80211_hw *wl1271_alloc_hw(void)
 	wl->ap_ps_map = 0;
 	wl->ap_fw_ps_map = 0;
 	wl->quirks = 0;
+	wl->platform_quirks = 0;
 
 	memset(wl->tx_frames_map, 0, sizeof(wl->tx_frames_map));
 	for (i = 0; i < ACX_TX_DESCRIPTORS; i++)
diff --git a/drivers/net/wireless/wl12xx/sdio.c b/drivers/net/wireless/wl12xx/sdio.c
index 8246e9de4306..bcd4ad7ba90d 100644
--- a/drivers/net/wireless/wl12xx/sdio.c
+++ b/drivers/net/wireless/wl12xx/sdio.c
@@ -220,6 +220,7 @@ static int __devinit wl1271_probe(struct sdio_func *func,
 	struct ieee80211_hw *hw;
 	const struct wl12xx_platform_data *wlan_data;
 	struct wl1271 *wl;
+	unsigned long irqflags;
 	int ret;
 
 	/* We are only able to handle the wlan function */
@@ -251,9 +252,15 @@ static int __devinit wl1271_probe(struct sdio_func *func,
 	wl->irq = wlan_data->irq;
 	wl->ref_clock = wlan_data->board_ref_clock;
 	wl->tcxo_clock = wlan_data->board_tcxo_clock;
+	wl->platform_quirks = wlan_data->platform_quirks;
+
+	if (wl->platform_quirks & WL12XX_PLATFORM_QUIRK_EDGE_IRQ)
+		irqflags = IRQF_TRIGGER_RISING;
+	else
+		irqflags = IRQF_TRIGGER_HIGH | IRQF_ONESHOT;
 
 	ret = request_threaded_irq(wl->irq, wl1271_hardirq, wl1271_irq,
-				   IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+				   irqflags,
 				   DRIVER_NAME, wl);
 	if (ret < 0) {
 		wl1271_error("request_irq() failed: %d", ret);
diff --git a/drivers/net/wireless/wl12xx/spi.c b/drivers/net/wireless/wl12xx/spi.c
index 7b82b5f0e490..51662bb68019 100644
--- a/drivers/net/wireless/wl12xx/spi.c
+++ b/drivers/net/wireless/wl12xx/spi.c
@@ -364,6 +364,7 @@ static int __devinit wl1271_probe(struct spi_device *spi)
 	struct wl12xx_platform_data *pdata;
 	struct ieee80211_hw *hw;
 	struct wl1271 *wl;
+	unsigned long irqflags;
 	int ret;
 
 	pdata = spi->dev.platform_data;
@@ -402,6 +403,12 @@ static int __devinit wl1271_probe(struct spi_device *spi)
 
 	wl->ref_clock = pdata->board_ref_clock;
 	wl->tcxo_clock = pdata->board_tcxo_clock;
+	wl->platform_quirks = pdata->platform_quirks;
+
+	if (wl->platform_quirks & WL12XX_PLATFORM_QUIRK_EDGE_IRQ)
+		irqflags = IRQF_TRIGGER_RISING;
+	else
+		irqflags = IRQF_TRIGGER_HIGH | IRQF_ONESHOT;
 
 	wl->irq = spi->irq;
 	if (wl->irq < 0) {
@@ -411,7 +418,7 @@ static int __devinit wl1271_probe(struct spi_device *spi)
 	}
 
 	ret = request_threaded_irq(wl->irq, wl1271_hardirq, wl1271_irq,
-				   IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+				   irqflags,
 				   DRIVER_NAME, wl);
 	if (ret < 0) {
 		wl1271_error("request_irq() failed: %d", ret);
diff --git a/drivers/net/wireless/wl12xx/wl12xx.h b/drivers/net/wireless/wl12xx/wl12xx.h
index 9ccbcfdd0802..fb2b79fa42b4 100644
--- a/drivers/net/wireless/wl12xx/wl12xx.h
+++ b/drivers/net/wireless/wl12xx/wl12xx.h
@@ -579,6 +579,9 @@ struct wl1271 {
 
 	/* Quirks of specific hardware revisions */
 	unsigned int quirks;
+
+	/* Platform limitations */
+	unsigned int platform_quirks;
 };
 
 struct wl1271_station {
diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
index c1a743ea7470..4b697395326e 100644
--- a/include/linux/wl12xx.h
+++ b/include/linux/wl12xx.h
@@ -53,8 +53,12 @@ struct wl12xx_platform_data {
 	bool use_eeprom;
 	int board_ref_clock;
 	int board_tcxo_clock;
+	unsigned long platform_quirks;
 };
 
+/* Platform does not support level trigger interrupts */
+#define WL12XX_PLATFORM_QUIRK_EDGE_IRQ	BIT(0)
+
 #ifdef CONFIG_WL12XX_PLATFORM_DATA
 
 int wl12xx_set_platform_data(const struct wl12xx_platform_data *data);
-- 
cgit v1.2.3-71-gd317


From d924de09cac6e18bdfbe9461a2ab2adeb36e77b0 Mon Sep 17 00:00:00 2001
From: Michael Jones <michael.jones@matrix-vision.de>
Date: Tue, 29 Mar 2011 05:19:06 -0300
Subject: [media] v4l: add V4L2_PIX_FMT_Y12 format

Y12 is a grey-scale format with a depth of 12 bits per pixel stored in
16-bit words.

Signed-off-by: Michael Jones <michael.jones@matrix-vision.de>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media-entities.tmpl |  1 +
 Documentation/DocBook/v4l/pixfmt-y12.xml  | 79 +++++++++++++++++++++++++++++++
 Documentation/DocBook/v4l/pixfmt.xml      |  1 +
 include/linux/videodev2.h                 |  1 +
 4 files changed, 82 insertions(+)
 create mode 100644 Documentation/DocBook/v4l/pixfmt-y12.xml

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media-entities.tmpl b/Documentation/DocBook/media-entities.tmpl
index 5d259c632cdf..fea63b45471a 100644
--- a/Documentation/DocBook/media-entities.tmpl
+++ b/Documentation/DocBook/media-entities.tmpl
@@ -294,6 +294,7 @@
 <!ENTITY sub-srggb10 SYSTEM "v4l/pixfmt-srggb10.xml">
 <!ENTITY sub-srggb8 SYSTEM "v4l/pixfmt-srggb8.xml">
 <!ENTITY sub-y10 SYSTEM "v4l/pixfmt-y10.xml">
+<!ENTITY sub-y12 SYSTEM "v4l/pixfmt-y12.xml">
 <!ENTITY sub-pixfmt SYSTEM "v4l/pixfmt.xml">
 <!ENTITY sub-cropcap SYSTEM "v4l/vidioc-cropcap.xml">
 <!ENTITY sub-dbg-g-register SYSTEM "v4l/vidioc-dbg-g-register.xml">
diff --git a/Documentation/DocBook/v4l/pixfmt-y12.xml b/Documentation/DocBook/v4l/pixfmt-y12.xml
new file mode 100644
index 000000000000..ff417b858cc9
--- /dev/null
+++ b/Documentation/DocBook/v4l/pixfmt-y12.xml
@@ -0,0 +1,79 @@
+<refentry id="V4L2-PIX-FMT-Y12">
+  <refmeta>
+    <refentrytitle>V4L2_PIX_FMT_Y12 ('Y12 ')</refentrytitle>
+    &manvol;
+  </refmeta>
+  <refnamediv>
+    <refname><constant>V4L2_PIX_FMT_Y12</constant></refname>
+    <refpurpose>Grey-scale image</refpurpose>
+  </refnamediv>
+  <refsect1>
+    <title>Description</title>
+
+    <para>This is a grey-scale image with a depth of 12 bits per pixel. Pixels
+are stored in 16-bit words with unused high bits padded with 0. The least
+significant byte is stored at lower memory addresses (little-endian).</para>
+
+    <example>
+      <title><constant>V4L2_PIX_FMT_Y12</constant> 4 &times; 4
+pixel image</title>
+
+      <formalpara>
+	<title>Byte Order.</title>
+	<para>Each cell is one byte.
+	  <informaltable frame="none">
+	    <tgroup cols="9" align="center">
+	      <colspec align="left" colwidth="2*" />
+	      <tbody valign="top">
+		<row>
+		  <entry>start&nbsp;+&nbsp;0:</entry>
+		  <entry>Y'<subscript>00low</subscript></entry>
+		  <entry>Y'<subscript>00high</subscript></entry>
+		  <entry>Y'<subscript>01low</subscript></entry>
+		  <entry>Y'<subscript>01high</subscript></entry>
+		  <entry>Y'<subscript>02low</subscript></entry>
+		  <entry>Y'<subscript>02high</subscript></entry>
+		  <entry>Y'<subscript>03low</subscript></entry>
+		  <entry>Y'<subscript>03high</subscript></entry>
+		</row>
+		<row>
+		  <entry>start&nbsp;+&nbsp;8:</entry>
+		  <entry>Y'<subscript>10low</subscript></entry>
+		  <entry>Y'<subscript>10high</subscript></entry>
+		  <entry>Y'<subscript>11low</subscript></entry>
+		  <entry>Y'<subscript>11high</subscript></entry>
+		  <entry>Y'<subscript>12low</subscript></entry>
+		  <entry>Y'<subscript>12high</subscript></entry>
+		  <entry>Y'<subscript>13low</subscript></entry>
+		  <entry>Y'<subscript>13high</subscript></entry>
+		</row>
+		<row>
+		  <entry>start&nbsp;+&nbsp;16:</entry>
+		  <entry>Y'<subscript>20low</subscript></entry>
+		  <entry>Y'<subscript>20high</subscript></entry>
+		  <entry>Y'<subscript>21low</subscript></entry>
+		  <entry>Y'<subscript>21high</subscript></entry>
+		  <entry>Y'<subscript>22low</subscript></entry>
+		  <entry>Y'<subscript>22high</subscript></entry>
+		  <entry>Y'<subscript>23low</subscript></entry>
+		  <entry>Y'<subscript>23high</subscript></entry>
+		</row>
+		<row>
+		  <entry>start&nbsp;+&nbsp;24:</entry>
+		  <entry>Y'<subscript>30low</subscript></entry>
+		  <entry>Y'<subscript>30high</subscript></entry>
+		  <entry>Y'<subscript>31low</subscript></entry>
+		  <entry>Y'<subscript>31high</subscript></entry>
+		  <entry>Y'<subscript>32low</subscript></entry>
+		  <entry>Y'<subscript>32high</subscript></entry>
+		  <entry>Y'<subscript>33low</subscript></entry>
+		  <entry>Y'<subscript>33high</subscript></entry>
+		</row>
+	      </tbody>
+	    </tgroup>
+	  </informaltable>
+	</para>
+      </formalpara>
+    </example>
+  </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/v4l/pixfmt.xml b/Documentation/DocBook/v4l/pixfmt.xml
index c6fdcbbd1b41..40af4beb48b9 100644
--- a/Documentation/DocBook/v4l/pixfmt.xml
+++ b/Documentation/DocBook/v4l/pixfmt.xml
@@ -696,6 +696,7 @@ information.</para>
     &sub-packed-yuv;
     &sub-grey;
     &sub-y10;
+    &sub-y12;
     &sub-y16;
     &sub-yuyv;
     &sub-uyvy;
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index aa6c393b7ae9..be82c8ead1af 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -308,6 +308,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_Y4      v4l2_fourcc('Y', '0', '4', ' ') /*  4  Greyscale     */
 #define V4L2_PIX_FMT_Y6      v4l2_fourcc('Y', '0', '6', ' ') /*  6  Greyscale     */
 #define V4L2_PIX_FMT_Y10     v4l2_fourcc('Y', '1', '0', ' ') /* 10  Greyscale     */
+#define V4L2_PIX_FMT_Y12     v4l2_fourcc('Y', '1', '2', ' ') /* 12  Greyscale     */
 #define V4L2_PIX_FMT_Y16     v4l2_fourcc('Y', '1', '6', ' ') /* 16  Greyscale     */
 
 /* Palette formats */
-- 
cgit v1.2.3-71-gd317


From cbbc69a4a98081740f0e3d7717fbfa0b584b983d Mon Sep 17 00:00:00 2001
From: Michael Jones <michael.jones@matrix-vision.de>
Date: Tue, 29 Mar 2011 05:19:07 -0300
Subject: [media] media: add missing 8-bit bayer formats and Y12

8-bit SGBRG and SRGGB media bus formats are missing, as well as the
12-bit grey format. Add them.

Signed-off-by: Michael Jones <michael.jones@matrix-vision.de>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/v4l/subdev-formats.xml | 59 ++++++++++++++++++++++++++++
 include/linux/v4l2-mediabus.h                |  7 +++-
 2 files changed, 64 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/v4l/subdev-formats.xml b/Documentation/DocBook/v4l/subdev-formats.xml
index 7041127d6dfc..d7ccd25edcc1 100644
--- a/Documentation/DocBook/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/v4l/subdev-formats.xml
@@ -456,6 +456,23 @@
 	      <entry>b<subscript>1</subscript></entry>
 	      <entry>b<subscript>0</subscript></entry>
 	    </row>
+	    <row id="V4L2-MBUS-FMT-SGBRG8-1X8">
+	      <entry>V4L2_MBUS_FMT_SGBRG8_1X8</entry>
+	      <entry>0x3013</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>g<subscript>7</subscript></entry>
+	      <entry>g<subscript>6</subscript></entry>
+	      <entry>g<subscript>5</subscript></entry>
+	      <entry>g<subscript>4</subscript></entry>
+	      <entry>g<subscript>3</subscript></entry>
+	      <entry>g<subscript>2</subscript></entry>
+	      <entry>g<subscript>1</subscript></entry>
+	      <entry>g<subscript>0</subscript></entry>
+	    </row>
 	    <row id="V4L2-MBUS-FMT-SGRBG8-1X8">
 	      <entry>V4L2_MBUS_FMT_SGRBG8_1X8</entry>
 	      <entry>0x3002</entry>
@@ -473,6 +490,23 @@
 	      <entry>g<subscript>1</subscript></entry>
 	      <entry>g<subscript>0</subscript></entry>
 	    </row>
+	    <row id="V4L2-MBUS-FMT-SRGGB8-1X8">
+	      <entry>V4L2_MBUS_FMT_SRGGB8_1X8</entry>
+	      <entry>0x3014</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>r<subscript>7</subscript></entry>
+	      <entry>r<subscript>6</subscript></entry>
+	      <entry>r<subscript>5</subscript></entry>
+	      <entry>r<subscript>4</subscript></entry>
+	      <entry>r<subscript>3</subscript></entry>
+	      <entry>r<subscript>2</subscript></entry>
+	      <entry>r<subscript>1</subscript></entry>
+	      <entry>r<subscript>0</subscript></entry>
+	    </row>
 	    <row id="V4L2-MBUS-FMT-SBGGR10-DPCM8-1X8">
 	      <entry>V4L2_MBUS_FMT_SBGGR10_DPCM8_1X8</entry>
 	      <entry>0x300b</entry>
@@ -2159,6 +2193,31 @@
 	      <entry>u<subscript>1</subscript></entry>
 	      <entry>u<subscript>0</subscript></entry>
 	    </row>
+	    <row id="V4L2-MBUS-FMT-Y12-1X12">
+	      <entry>V4L2_MBUS_FMT_Y12_1X12</entry>
+	      <entry>0x2013</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>y<subscript>11</subscript></entry>
+	      <entry>y<subscript>10</subscript></entry>
+	      <entry>y<subscript>9</subscript></entry>
+	      <entry>y<subscript>8</subscript></entry>
+	      <entry>y<subscript>7</subscript></entry>
+	      <entry>y<subscript>6</subscript></entry>
+	      <entry>y<subscript>5</subscript></entry>
+	      <entry>y<subscript>4</subscript></entry>
+	      <entry>y<subscript>3</subscript></entry>
+	      <entry>y<subscript>2</subscript></entry>
+	      <entry>y<subscript>1</subscript></entry>
+	      <entry>y<subscript>0</subscript></entry>
+	    </row>
 	    <row id="V4L2-MBUS-FMT-UYVY8-1X16">
 	      <entry>V4L2_MBUS_FMT_UYVY8_1X16</entry>
 	      <entry>0x200f</entry>
diff --git a/include/linux/v4l2-mediabus.h b/include/linux/v4l2-mediabus.h
index 7054a7a8065e..de5c15921025 100644
--- a/include/linux/v4l2-mediabus.h
+++ b/include/linux/v4l2-mediabus.h
@@ -47,7 +47,7 @@ enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_RGB565_2X8_BE = 0x1007,
 	V4L2_MBUS_FMT_RGB565_2X8_LE = 0x1008,
 
-	/* YUV (including grey) - next is 0x2013 */
+	/* YUV (including grey) - next is 0x2014 */
 	V4L2_MBUS_FMT_Y8_1X8 = 0x2001,
 	V4L2_MBUS_FMT_UYVY8_1_5X8 = 0x2002,
 	V4L2_MBUS_FMT_VYUY8_1_5X8 = 0x2003,
@@ -60,6 +60,7 @@ enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_Y10_1X10 = 0x200a,
 	V4L2_MBUS_FMT_YUYV10_2X10 = 0x200b,
 	V4L2_MBUS_FMT_YVYU10_2X10 = 0x200c,
+	V4L2_MBUS_FMT_Y12_1X12 = 0x2013,
 	V4L2_MBUS_FMT_UYVY8_1X16 = 0x200f,
 	V4L2_MBUS_FMT_VYUY8_1X16 = 0x2010,
 	V4L2_MBUS_FMT_YUYV8_1X16 = 0x2011,
@@ -67,9 +68,11 @@ enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_YUYV10_1X20 = 0x200d,
 	V4L2_MBUS_FMT_YVYU10_1X20 = 0x200e,
 
-	/* Bayer - next is 0x3013 */
+	/* Bayer - next is 0x3015 */
 	V4L2_MBUS_FMT_SBGGR8_1X8 = 0x3001,
+	V4L2_MBUS_FMT_SGBRG8_1X8 = 0x3013,
 	V4L2_MBUS_FMT_SGRBG8_1X8 = 0x3002,
+	V4L2_MBUS_FMT_SRGGB8_1X8 = 0x3014,
 	V4L2_MBUS_FMT_SBGGR10_DPCM8_1X8 = 0x300b,
 	V4L2_MBUS_FMT_SGBRG10_DPCM8_1X8 = 0x300c,
 	V4L2_MBUS_FMT_SGRBG10_DPCM8_1X8 = 0x3009,
-- 
cgit v1.2.3-71-gd317


From cbc6a6ed0900aed789b5ca77192845f2f987af70 Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Wed, 13 Apr 2011 21:40:45 +0200
Subject: rfkill: Regulator consumer driver for rfkill

Add a regulator consumer driver for rfkill to enable controlling radio
transmitters connected to voltage regulators using the regulator
framework.

A new "vrfkill" virtual supply is provided to use in platform code.

Signed-off-by: Guiming Zhuo <gmzhuo@gmail.com>
Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill-regulator.h |  48 ++++++++++++
 net/rfkill/Kconfig               |  11 +++
 net/rfkill/Makefile              |   1 +
 net/rfkill/rfkill-regulator.c    | 164 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 224 insertions(+)
 create mode 100644 include/linux/rfkill-regulator.h
 create mode 100644 net/rfkill/rfkill-regulator.c

(limited to 'include/linux')

diff --git a/include/linux/rfkill-regulator.h b/include/linux/rfkill-regulator.h
new file mode 100644
index 000000000000..aca36bc83315
--- /dev/null
+++ b/include/linux/rfkill-regulator.h
@@ -0,0 +1,48 @@
+/*
+ * rfkill-regulator.c - Regulator consumer driver for rfkill
+ *
+ * Copyright (C) 2009  Guiming Zhuo <gmzhuo@gmail.com>
+ * Copyright (C) 2011  Antonio Ospite <ospite@studenti.unina.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __LINUX_RFKILL_REGULATOR_H
+#define __LINUX_RFKILL_REGULATOR_H
+
+/*
+ * Use "vrfkill" as supply id when declaring the regulator consumer:
+ *
+ * static struct regulator_consumer_supply pcap_regulator_V6_consumers [] = {
+ * 	{ .dev_name = "rfkill-regulator.0", .supply = "vrfkill" },
+ * };
+ *
+ * If you have several regulator driven rfkill, you can append a numerical id to
+ * .dev_name as done above, and use the same id when declaring the platform
+ * device:
+ *
+ * static struct rfkill_regulator_platform_data ezx_rfkill_bt_data = {
+ * 	.name  = "ezx-bluetooth",
+ * 	.type  = RFKILL_TYPE_BLUETOOTH,
+ * };
+ *
+ * static struct platform_device a910_rfkill = {
+ * 	.name  = "rfkill-regulator",
+ * 	.id    = 0,
+ * 	.dev   = {
+ * 		.platform_data = &ezx_rfkill_bt_data,
+ * 	},
+ * };
+ */
+
+#include <linux/rfkill.h>
+
+struct rfkill_regulator_platform_data {
+	char *name;             /* the name for the rfkill switch */
+	enum rfkill_type type;  /* the type as specified in rfkill.h */
+};
+
+#endif /* __LINUX_RFKILL_REGULATOR_H */
diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
index 7fce6dfd2180..48464ca13b24 100644
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig
@@ -22,3 +22,14 @@ config RFKILL_INPUT
 	depends on RFKILL
 	depends on INPUT = y || RFKILL = INPUT
 	default y if !EXPERT
+
+config RFKILL_REGULATOR
+	tristate "Generic rfkill regulator driver"
+	depends on RFKILL || !RFKILL
+	depends on REGULATOR
+	help
+          This options enable controlling radio transmitters connected to
+          voltage regulator using the regulator framework.
+
+          To compile this driver as a module, choose M here: the module will
+          be called rfkill-regulator.
diff --git a/net/rfkill/Makefile b/net/rfkill/Makefile
index 662105352691..d9a5a58ffd8c 100644
--- a/net/rfkill/Makefile
+++ b/net/rfkill/Makefile
@@ -5,3 +5,4 @@
 rfkill-y			+= core.o
 rfkill-$(CONFIG_RFKILL_INPUT)	+= input.o
 obj-$(CONFIG_RFKILL)		+= rfkill.o
+obj-$(CONFIG_RFKILL_REGULATOR)	+= rfkill-regulator.o
diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c
new file mode 100644
index 000000000000..18dc512a10f3
--- /dev/null
+++ b/net/rfkill/rfkill-regulator.c
@@ -0,0 +1,164 @@
+/*
+ * rfkill-regulator.c - Regulator consumer driver for rfkill
+ *
+ * Copyright (C) 2009  Guiming Zhuo <gmzhuo@gmail.com>
+ * Copyright (C) 2011  Antonio Ospite <ospite@studenti.unina.it>
+ *
+ * Implementation inspired by leds-regulator driver.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/rfkill.h>
+#include <linux/rfkill-regulator.h>
+
+struct rfkill_regulator_data {
+	struct rfkill *rf_kill;
+	bool reg_enabled;
+
+	struct regulator *vcc;
+};
+
+static int rfkill_regulator_set_block(void *data, bool blocked)
+{
+	struct rfkill_regulator_data *rfkill_data = data;
+
+	pr_debug("%s: blocked: %d\n", __func__, blocked);
+
+	if (blocked) {
+		if (rfkill_data->reg_enabled) {
+			regulator_disable(rfkill_data->vcc);
+			rfkill_data->reg_enabled = 0;
+		}
+	} else {
+		if (!rfkill_data->reg_enabled) {
+			regulator_enable(rfkill_data->vcc);
+			rfkill_data->reg_enabled = 1;
+		}
+	}
+
+	pr_debug("%s: regulator_is_enabled after set_block: %d\n", __func__,
+		regulator_is_enabled(rfkill_data->vcc));
+
+	return 0;
+}
+
+struct rfkill_ops rfkill_regulator_ops = {
+	.set_block = rfkill_regulator_set_block,
+};
+
+static int __devinit rfkill_regulator_probe(struct platform_device *pdev)
+{
+	struct rfkill_regulator_platform_data *pdata = pdev->dev.platform_data;
+	struct rfkill_regulator_data *rfkill_data;
+	struct regulator *vcc;
+	struct rfkill *rf_kill;
+	int ret = 0;
+
+	if (pdata == NULL) {
+		dev_err(&pdev->dev, "no platform data\n");
+		return -ENODEV;
+	}
+
+	if (pdata->name == NULL || pdata->type == 0) {
+		dev_err(&pdev->dev, "invalid name or type in platform data\n");
+		return -EINVAL;
+	}
+
+	vcc = regulator_get_exclusive(&pdev->dev, "vrfkill");
+	if (IS_ERR(vcc)) {
+		dev_err(&pdev->dev, "Cannot get vcc for %s\n", pdata->name);
+		ret = PTR_ERR(vcc);
+		goto out;
+	}
+
+	rfkill_data = kzalloc(sizeof(*rfkill_data), GFP_KERNEL);
+	if (rfkill_data == NULL) {
+		ret = -ENOMEM;
+		goto err_data_alloc;
+	}
+
+	rf_kill = rfkill_alloc(pdata->name, &pdev->dev,
+				pdata->type,
+				&rfkill_regulator_ops, rfkill_data);
+	if (rf_kill == NULL) {
+		dev_err(&pdev->dev, "Cannot alloc rfkill device\n");
+		ret = -ENOMEM;
+		goto err_rfkill_alloc;
+	}
+
+	if (regulator_is_enabled(vcc)) {
+		dev_dbg(&pdev->dev, "Regulator already enabled\n");
+		rfkill_data->reg_enabled = 1;
+	}
+	rfkill_data->vcc = vcc;
+	rfkill_data->rf_kill = rf_kill;
+
+	ret = rfkill_register(rf_kill);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot register rfkill device\n");
+		goto err_rfkill_register;
+	}
+
+	platform_set_drvdata(pdev, rfkill_data);
+	dev_info(&pdev->dev, "%s initialized\n", pdata->name);
+
+	return 0;
+
+err_rfkill_register:
+	rfkill_destroy(rf_kill);
+err_rfkill_alloc:
+	kfree(rfkill_data);
+err_data_alloc:
+	regulator_put(vcc);
+out:
+	return ret;
+}
+
+static int __devexit rfkill_regulator_remove(struct platform_device *pdev)
+{
+	struct rfkill_regulator_data *rfkill_data = platform_get_drvdata(pdev);
+	struct rfkill *rf_kill = rfkill_data->rf_kill;
+
+	rfkill_unregister(rf_kill);
+	rfkill_destroy(rf_kill);
+	regulator_put(rfkill_data->vcc);
+	kfree(rfkill_data);
+
+	return 0;
+}
+
+static struct platform_driver rfkill_regulator_driver = {
+	.probe = rfkill_regulator_probe,
+	.remove = __devexit_p(rfkill_regulator_remove),
+	.driver = {
+		.name = "rfkill-regulator",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init rfkill_regulator_init(void)
+{
+	return platform_driver_register(&rfkill_regulator_driver);
+}
+module_init(rfkill_regulator_init);
+
+static void __exit rfkill_regulator_exit(void)
+{
+	platform_driver_unregister(&rfkill_regulator_driver);
+}
+module_exit(rfkill_regulator_exit);
+
+MODULE_AUTHOR("Guiming Zhuo <gmzhuo@gmail.com>");
+MODULE_AUTHOR("Antonio Ospite <ospite@studenti.unina.it>");
+MODULE_DESCRIPTION("Regulator consumer driver for rfkill");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:rfkill-regulator");
-- 
cgit v1.2.3-71-gd317


From 6716671d8c1c07a8072098764d1b7cbfef7412ad Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Wed, 23 Mar 2011 10:48:35 +0100
Subject: TTY: introduce deinit helpers for proper ldisc shutdown

Introduce deinitialize_tty_struct which should be called after
initialize_tty_struct and before successfull tty_ldisc_setup.

It calls tty_ldisc_deinit which is opposite of tty_ldisc_init. It only
puts a reference to ldisc and assigns NULL to tty->ldisc.

It will be used to shut down ldisc when tty_release cannot be called
yet.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/tty_io.c    | 14 ++++++++++++++
 drivers/tty/tty_ldisc.c | 13 +++++++++++++
 include/linux/tty.h     |  2 ++
 3 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 026bf2f6f5f2..f5dd23520fe3 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2886,6 +2886,20 @@ void initialize_tty_struct(struct tty_struct *tty,
 	tty->dev = tty_get_device(tty);
 }
 
+/**
+ *	deinitialize_tty_struct
+ *	@tty: tty to deinitialize
+ *
+ *	This subroutine deinitializes a tty structure that has been newly
+ *	allocated but tty_release cannot be called on that yet.
+ *
+ *	Locking: none - tty in question must not be exposed at this point
+ */
+void deinitialize_tty_struct(struct tty_struct *tty)
+{
+	tty_ldisc_deinit(tty);
+}
+
 /**
  *	tty_put_char	-	write one character to a tty
  *	@tty: tty
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index e19e13647116..5d01d32e2cf0 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -956,6 +956,19 @@ void tty_ldisc_init(struct tty_struct *tty)
 	tty_ldisc_assign(tty, ld);
 }
 
+/**
+ *	tty_ldisc_init		-	ldisc cleanup for new tty
+ *	@tty: tty that was allocated recently
+ *
+ *	The tty structure must not becompletely set up (tty_ldisc_setup) when
+ *      this call is made.
+ */
+void tty_ldisc_deinit(struct tty_struct *tty)
+{
+	put_ldisc(tty->ldisc);
+	tty_ldisc_assign(tty, NULL);
+}
+
 void tty_ldisc_begin(void)
 {
 	/* Setup the default TTY line discipline. */
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 9f469c700550..4db4ca79f895 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -472,6 +472,7 @@ extern int tty_add_file(struct tty_struct *tty, struct file *file);
 extern void free_tty_struct(struct tty_struct *tty);
 extern void initialize_tty_struct(struct tty_struct *tty,
 		struct tty_driver *driver, int idx);
+extern void deinitialize_tty_struct(struct tty_struct *tty);
 extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx,
 								int first_ok);
 extern int tty_release(struct inode *inode, struct file *filp);
@@ -525,6 +526,7 @@ extern int tty_set_ldisc(struct tty_struct *tty, int ldisc);
 extern int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty);
 extern void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty);
 extern void tty_ldisc_init(struct tty_struct *tty);
+extern void tty_ldisc_deinit(struct tty_struct *tty);
 extern void tty_ldisc_begin(void);
 /* This last one is just for the tty layer internals and shouldn't be used elsewhere */
 extern void tty_ldisc_enable(struct tty_struct *tty);
-- 
cgit v1.2.3-71-gd317


From bcdd323b893ad3c9b7ef26b5e4a0bef974238501 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Wed, 16 Mar 2011 15:59:35 +0200
Subject: device: add dev_WARN_ONCE

it's quite useful to print the device name
on the stack dump caused by WARN(), but
there are other cases where we might want
to use WARN_ONCE.

Introduce a helper similar to dev_WARN() for
that case too.

Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index ab8dfc095709..d4840511e877 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -742,13 +742,17 @@ do {						     \
 #endif
 
 /*
- * dev_WARN() acts like dev_printk(), but with the key difference
+ * dev_WARN*() acts like dev_printk(), but with the key difference
  * of using a WARN/WARN_ON to get the message out, including the
  * file/line information and a backtrace.
  */
 #define dev_WARN(dev, format, arg...) \
 	WARN(1, "Device: %s\n" format, dev_driver_string(dev), ## arg);
 
+#define dev_WARN_ONCE(dev, condition, format, arg...) \
+	WARN_ONCE(condition, "Device %s\n" format, \
+			dev_driver_string(dev), ## arg)
+
 /* Create alias, so I can be autoloaded. */
 #define MODULE_ALIAS_CHARDEV(major,minor) \
 	MODULE_ALIAS("char-major-" __stringify(major) "-" __stringify(minor))
-- 
cgit v1.2.3-71-gd317


From aed65af1cc2f6fc9ded5a8158f1405a02cf6d2ff Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 28 Mar 2011 09:12:52 -0700
Subject: drivers: make device_type const

The device_type structure does not contain data that changes
during usage and should be const. This allows devices to declare
the struct const.

I have patches to change all the subsystems, but need the infra
structure change first.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/core.c    | 4 ++--
 include/linux/device.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 81b78ede37c4..fb8130ceea10 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -400,7 +400,7 @@ static void device_remove_groups(struct device *dev,
 static int device_add_attrs(struct device *dev)
 {
 	struct class *class = dev->class;
-	struct device_type *type = dev->type;
+	const struct device_type *type = dev->type;
 	int error;
 
 	if (class) {
@@ -440,7 +440,7 @@ static int device_add_attrs(struct device *dev)
 static void device_remove_attrs(struct device *dev)
 {
 	struct class *class = dev->class;
-	struct device_type *type = dev->type;
+	const struct device_type *type = dev->type;
 
 	device_remove_groups(dev, dev->groups);
 
diff --git a/include/linux/device.h b/include/linux/device.h
index d4840511e877..350ceda4de97 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -408,7 +408,7 @@ struct device {
 
 	struct kobject kobj;
 	const char		*init_name; /* initial name of the device */
-	struct device_type	*type;
+	const struct device_type *type;
 
 	struct mutex		mutex;	/* mutex to synchronize calls to
 					 * its driver.
-- 
cgit v1.2.3-71-gd317


From 0b2e9a8e10ad2d191e5c37e77f1ce23e148e7a0b Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 14 Apr 2011 22:31:57 +0000
Subject: of: Export of_irq_find_parent()

We have platform code that needs to find a node's interrupt parent, so
export of_irq_find_parent() so we can use it.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/of/irq.c       | 2 +-
 include/linux/of_irq.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 75b0d3cb7676..9f689f1da0fc 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -56,7 +56,7 @@ EXPORT_SYMBOL_GPL(irq_of_parse_and_map);
  * Returns a pointer to the interrupt parent node, or NULL if the interrupt
  * parent could not be determined.
  */
-static struct device_node *of_irq_find_parent(struct device_node *child)
+struct device_node *of_irq_find_parent(struct device_node *child)
 {
 	struct device_node *p;
 	const __be32 *parp;
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 109e013b1772..e6955f5d1f08 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -68,6 +68,7 @@ extern int of_irq_to_resource(struct device_node *dev, int index,
 extern int of_irq_count(struct device_node *dev);
 extern int of_irq_to_resource_table(struct device_node *dev,
 		struct resource *res, int nr_irqs);
+extern struct device_node *of_irq_find_parent(struct device_node *child);
 
 #endif /* CONFIG_OF_IRQ */
 #endif /* CONFIG_OF */
-- 
cgit v1.2.3-71-gd317


From 8c9e80ed276fc4b9c9fadf29d8bf6b3576112f1a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 21 Apr 2011 17:23:19 -0700
Subject: SECURITY: Move exec_permission RCU checks into security modules

Right now all RCU walks fall back to reference walk when CONFIG_SECURITY
is enabled, even though just the standard capability module is active.
This is because security_inode_exec_permission unconditionally fails
RCU walks.

Move this decision to the low level security module. This requires
passing the RCU flags down the security hook. This way at least
the capability module and a few easy cases in selinux/smack work
with RCU walks with CONFIG_SECURITY=y

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/security.h   | 2 +-
 security/capability.c      | 2 +-
 security/security.c        | 6 ++----
 security/selinux/hooks.c   | 6 +++++-
 security/smack/smack_lsm.c | 6 +++++-
 5 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index ca02f1716736..8ce59ef3e5af 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1456,7 +1456,7 @@ struct security_operations {
 			     struct inode *new_dir, struct dentry *new_dentry);
 	int (*inode_readlink) (struct dentry *dentry);
 	int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd);
-	int (*inode_permission) (struct inode *inode, int mask);
+	int (*inode_permission) (struct inode *inode, int mask, unsigned flags);
 	int (*inode_setattr)	(struct dentry *dentry, struct iattr *attr);
 	int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry);
 	int (*inode_setxattr) (struct dentry *dentry, const char *name,
diff --git a/security/capability.c b/security/capability.c
index 2984ea4f776f..bbb51156261b 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -181,7 +181,7 @@ static int cap_inode_follow_link(struct dentry *dentry,
 	return 0;
 }
 
-static int cap_inode_permission(struct inode *inode, int mask)
+static int cap_inode_permission(struct inode *inode, int mask, unsigned flags)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 101142369db4..4ba6d4cc061f 100644
--- a/security/security.c
+++ b/security/security.c
@@ -518,16 +518,14 @@ int security_inode_permission(struct inode *inode, int mask)
 {
 	if (unlikely(IS_PRIVATE(inode)))
 		return 0;
-	return security_ops->inode_permission(inode, mask);
+	return security_ops->inode_permission(inode, mask, 0);
 }
 
 int security_inode_exec_permission(struct inode *inode, unsigned int flags)
 {
 	if (unlikely(IS_PRIVATE(inode)))
 		return 0;
-	if (flags)
-		return -ECHILD;
-	return security_ops->inode_permission(inode, MAY_EXEC);
+	return security_ops->inode_permission(inode, MAY_EXEC, flags);
 }
 
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index f9c3764e4859..a73f4e463774 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2635,7 +2635,7 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct nameidata *na
 	return dentry_has_perm(cred, NULL, dentry, FILE__READ);
 }
 
-static int selinux_inode_permission(struct inode *inode, int mask)
+static int selinux_inode_permission(struct inode *inode, int mask, unsigned flags)
 {
 	const struct cred *cred = current_cred();
 	struct common_audit_data ad;
@@ -2649,6 +2649,10 @@ static int selinux_inode_permission(struct inode *inode, int mask)
 	if (!mask)
 		return 0;
 
+	/* May be droppable after audit */
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.inode = inode;
 
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index c6f8fcadae07..400a5d5cde61 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -686,7 +686,7 @@ static int smack_inode_rename(struct inode *old_inode,
  *
  * Returns 0 if access is permitted, -EACCES otherwise
  */
-static int smack_inode_permission(struct inode *inode, int mask)
+static int smack_inode_permission(struct inode *inode, int mask, unsigned flags)
 {
 	struct smk_audit_info ad;
 
@@ -696,6 +696,10 @@ static int smack_inode_permission(struct inode *inode, int mask)
 	 */
 	if (mask == 0)
 		return 0;
+
+	/* May be droppable after audit */
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
 	smk_ad_setfield_u_fs_inode(&ad, inode);
 	return smk_curacc(smk_of_inode(inode), mask, &ad);
-- 
cgit v1.2.3-71-gd317


From 764b0c4b3256ad4431cb52eaf99c0abe6df0a085 Mon Sep 17 00:00:00 2001
From: Pavan Savoy <pavan_savoy@ti.com>
Date: Fri, 8 Apr 2011 04:57:42 -0500
Subject: drivers:misc:ti-st: handle delayed tty receive

When certain technologies shutdown their interface without waiting for
the acknowledgement from the chip. The receive_buf from the TTY would be
invoked a while after the relevant technology is unregistered.

This patch introduces a new flag "is_registered" which maintains the
state of protocols BT, FM or GPS and thereby removes the need to clear
the protocol data from ST when protocols gets unregistered.

This fixes corner cases when HCI RESET is sent down from bluetooth stack
and the receive_buf is called from tty after 250ms before which
bluetooth would have unregistered from the system.
OR - when FM application decides to close down the device without
sending a power-off FM command resulting in some RDS data or interrupt
data coming in after the driver is unregistered.

Signed-off-by: Pavan Savoy <pavan_savoy@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/misc/ti-st/st_core.c | 23 +++++++++++++----------
 include/linux/ti_wilink_st.h |  3 ++-
 2 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index 486117f72c9f..f91f82eabda7 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -43,13 +43,15 @@ static void add_channel_to_table(struct st_data_s *st_gdata,
 	pr_info("%s: id %d\n", __func__, new_proto->chnl_id);
 	/* list now has the channel id as index itself */
 	st_gdata->list[new_proto->chnl_id] = new_proto;
+	st_gdata->is_registered[new_proto->chnl_id] = true;
 }
 
 static void remove_channel_from_table(struct st_data_s *st_gdata,
 		struct st_proto_s *proto)
 {
 	pr_info("%s: id %d\n", __func__, proto->chnl_id);
-	st_gdata->list[proto->chnl_id] = NULL;
+/*	st_gdata->list[proto->chnl_id] = NULL; */
+	st_gdata->is_registered[proto->chnl_id] = false;
 }
 
 /*
@@ -104,7 +106,7 @@ void st_send_frame(unsigned char chnl_id, struct st_data_s *st_gdata)
 
 	if (unlikely
 	    (st_gdata == NULL || st_gdata->rx_skb == NULL
-	     || st_gdata->list[chnl_id] == NULL)) {
+	     || st_gdata->is_registered[chnl_id] == false)) {
 		pr_err("chnl_id %d not registered, no data to send?",
 			   chnl_id);
 		kfree_skb(st_gdata->rx_skb);
@@ -141,14 +143,15 @@ void st_reg_complete(struct st_data_s *st_gdata, char err)
 	unsigned char i = 0;
 	pr_info(" %s ", __func__);
 	for (i = 0; i < ST_MAX_CHANNELS; i++) {
-		if (likely(st_gdata != NULL && st_gdata->list[i] != NULL &&
-			   st_gdata->list[i]->reg_complete_cb != NULL)) {
+		if (likely(st_gdata != NULL &&
+			st_gdata->is_registered[i] == true &&
+				st_gdata->list[i]->reg_complete_cb != NULL)) {
 			st_gdata->list[i]->reg_complete_cb
 				(st_gdata->list[i]->priv_data, err);
 			pr_info("protocol %d's cb sent %d\n", i, err);
 			if (err) { /* cleanup registered protocol */
 				st_gdata->protos_registered--;
-				st_gdata->list[i] = NULL;
+				st_gdata->is_registered[i] = false;
 			}
 		}
 	}
@@ -475,9 +478,9 @@ void kim_st_list_protocols(struct st_data_s *st_gdata, void *buf)
 {
 	seq_printf(buf, "[%d]\nBT=%c\nFM=%c\nGPS=%c\n",
 			st_gdata->protos_registered,
-			st_gdata->list[0x04] != NULL ? 'R' : 'U',
-			st_gdata->list[0x08] != NULL ? 'R' : 'U',
-			st_gdata->list[0x09] != NULL ? 'R' : 'U');
+			st_gdata->is_registered[0x04] == true ? 'R' : 'U',
+			st_gdata->is_registered[0x08] == true ? 'R' : 'U',
+			st_gdata->is_registered[0x09] == true ? 'R' : 'U');
 }
 
 /********************************************************************/
@@ -504,7 +507,7 @@ long st_register(struct st_proto_s *new_proto)
 		return -EPROTONOSUPPORT;
 	}
 
-	if (st_gdata->list[new_proto->chnl_id] != NULL) {
+	if (st_gdata->is_registered[new_proto->chnl_id] == true) {
 		pr_err("chnl_id %d already registered", new_proto->chnl_id);
 		return -EALREADY;
 	}
@@ -563,7 +566,7 @@ long st_register(struct st_proto_s *new_proto)
 		/* check for already registered once more,
 		 * since the above check is old
 		 */
-		if (st_gdata->list[new_proto->chnl_id] != NULL) {
+		if (st_gdata->is_registered[new_proto->chnl_id] == true) {
 			pr_err(" proto %d already registered ",
 				   new_proto->chnl_id);
 			return -EALREADY;
diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h
index 7071ec5d0118..b004e557caa9 100644
--- a/include/linux/ti_wilink_st.h
+++ b/include/linux/ti_wilink_st.h
@@ -140,12 +140,12 @@ extern long st_unregister(struct st_proto_s *);
  */
 struct st_data_s {
 	unsigned long st_state;
-	struct tty_struct *tty;
 	struct sk_buff *tx_skb;
 #define ST_TX_SENDING	1
 #define ST_TX_WAKEUP	2
 	unsigned long tx_state;
 	struct st_proto_s *list[ST_MAX_CHANNELS];
+	bool is_registered[ST_MAX_CHANNELS];
 	unsigned long rx_state;
 	unsigned long rx_count;
 	struct sk_buff *rx_skb;
@@ -155,6 +155,7 @@ struct st_data_s {
 	unsigned char	protos_registered;
 	unsigned long ll_state;
 	void *kim_data;
+	struct tty_struct *tty;
 };
 
 /*
-- 
cgit v1.2.3-71-gd317


From c8705082404823a5bb3e02a32ba0764399b9e6f2 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Wed, 20 Apr 2011 09:44:46 +0200
Subject: driver core: let dev_set_drvdata return int instead of void as it can
 fail
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before commit

	b402843 (Driver core: move dev_get/set_drvdata to drivers/base/dd.c)

calling dev_set_drvdata with dev=NULL was an unchecked error. After some
discussion about what to return in this case removing the check (and so
producing a null pointer exception) seems fine.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/dd.c      | 7 +++----
 include/linux/device.h | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 7e9219b02796..e3a3eff1dacc 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -413,17 +413,16 @@ void *dev_get_drvdata(const struct device *dev)
 }
 EXPORT_SYMBOL(dev_get_drvdata);
 
-void dev_set_drvdata(struct device *dev, void *data)
+int dev_set_drvdata(struct device *dev, void *data)
 {
 	int error;
 
-	if (!dev)
-		return;
 	if (!dev->p) {
 		error = device_private_init(dev);
 		if (error)
-			return;
+			return error;
 	}
 	dev->p->driver_data = data;
+	return 0;
 }
 EXPORT_SYMBOL(dev_set_drvdata);
diff --git a/include/linux/device.h b/include/linux/device.h
index 350ceda4de97..2215d013ca96 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -557,7 +557,7 @@ extern int device_move(struct device *dev, struct device *new_parent,
 extern const char *device_get_devnode(struct device *dev,
 				      mode_t *mode, const char **tmp);
 extern void *dev_get_drvdata(const struct device *dev);
-extern void dev_set_drvdata(struct device *dev, void *data);
+extern int dev_set_drvdata(struct device *dev, void *data);
 
 /*
  * Root device objects for grouping under /sys/devices
-- 
cgit v1.2.3-71-gd317


From b1c43f82c5aa265442f82dba31ce985ebb7aa71c Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Mon, 21 Mar 2011 12:25:08 +0200
Subject: tty: make receive_buf() return the amout of bytes received

it makes it simpler to keep track of the amount of
bytes received and simplifies how flush_to_ldisc counts
the remaining bytes. It also fixes a bug of lost bytes
on n_tty when flushing too many bytes via the USB
serial gadget driver.

Tested-by: Stefan Bigler <stefan.bigler@keymile.com>
Tested-by: Toby Gray <toby.gray@realvnc.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/bluetooth/hci_ldisc.c      | 12 +++++---
 drivers/input/serio/serport.c      | 10 +++++--
 drivers/isdn/gigaset/ser-gigaset.c |  8 +++--
 drivers/misc/ti-st/st_core.c       |  6 ++--
 drivers/net/caif/caif_serial.c     |  6 ++--
 drivers/net/can/slcan.c            |  9 ++++--
 drivers/net/hamradio/6pack.c       |  8 +++--
 drivers/net/hamradio/mkiss.c       | 11 ++++---
 drivers/net/irda/irtty-sir.c       | 16 +++++-----
 drivers/net/ppp_async.c            |  6 ++--
 drivers/net/ppp_synctty.c          |  6 ++--
 drivers/net/slip.c                 | 11 ++++---
 drivers/net/wan/x25_asy.c          |  7 +++--
 drivers/tty/n_gsm.c                |  6 ++--
 drivers/tty/n_hdlc.c               | 18 ++++++-----
 drivers/tty/n_r3964.c              | 10 ++++---
 drivers/tty/n_tty.c                | 61 +++++++++-----------------------------
 drivers/tty/tty_buffer.c           | 15 ++++++----
 drivers/tty/vt/selection.c         |  3 +-
 include/linux/tty_ldisc.h          |  9 +++---
 20 files changed, 125 insertions(+), 113 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 48ad2a7ab080..0d4da5e14ba0 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -357,22 +357,26 @@ static void hci_uart_tty_wakeup(struct tty_struct *tty)
  *     
  * Return Value:    None
  */
-static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, char *flags, int count)
+static unsigned int hci_uart_tty_receive(struct tty_struct *tty,
+		const u8 *data, char *flags, int count)
 {
 	struct hci_uart *hu = (void *)tty->disc_data;
+	int received;
 
 	if (!hu || tty != hu->tty)
-		return;
+		return -ENODEV;
 
 	if (!test_bit(HCI_UART_PROTO_SET, &hu->flags))
-		return;
+		return -EINVAL;
 
 	spin_lock(&hu->rx_lock);
-	hu->proto->recv(hu, (void *) data, count);
+	received = hu->proto->recv(hu, (void *) data, count);
 	hu->hdev->stat.byte_rx += count;
 	spin_unlock(&hu->rx_lock);
 
 	tty_unthrottle(tty);
+
+	return received;
 }
 
 static int hci_uart_register_dev(struct hci_uart *hu)
diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c
index 8755f5f3ad37..f3698967edf6 100644
--- a/drivers/input/serio/serport.c
+++ b/drivers/input/serio/serport.c
@@ -120,17 +120,21 @@ static void serport_ldisc_close(struct tty_struct *tty)
  * 'interrupt' routine.
  */
 
-static void serport_ldisc_receive(struct tty_struct *tty, const unsigned char *cp, char *fp, int count)
+static unsigned int serport_ldisc_receive(struct tty_struct *tty,
+		const unsigned char *cp, char *fp, int count)
 {
 	struct serport *serport = (struct serport*) tty->disc_data;
 	unsigned long flags;
 	unsigned int ch_flags;
+	int ret = 0;
 	int i;
 
 	spin_lock_irqsave(&serport->lock, flags);
 
-	if (!test_bit(SERPORT_ACTIVE, &serport->flags))
+	if (!test_bit(SERPORT_ACTIVE, &serport->flags)) {
+		ret = -EINVAL;
 		goto out;
+	}
 
 	for (i = 0; i < count; i++) {
 		switch (fp[i]) {
@@ -152,6 +156,8 @@ static void serport_ldisc_receive(struct tty_struct *tty, const unsigned char *c
 
 out:
 	spin_unlock_irqrestore(&serport->lock, flags);
+
+	return ret == 0 ? count : ret;
 }
 
 /*
diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c
index 86a5c4f7775e..1d44d470897c 100644
--- a/drivers/isdn/gigaset/ser-gigaset.c
+++ b/drivers/isdn/gigaset/ser-gigaset.c
@@ -674,7 +674,7 @@ gigaset_tty_ioctl(struct tty_struct *tty, struct file *file,
  *	cflags	buffer containing error flags for received characters (ignored)
  *	count	number of received characters
  */
-static void
+static unsigned int
 gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf,
 		    char *cflags, int count)
 {
@@ -683,12 +683,12 @@ gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf,
 	struct inbuf_t *inbuf;
 
 	if (!cs)
-		return;
+		return -ENODEV;
 	inbuf = cs->inbuf;
 	if (!inbuf) {
 		dev_err(cs->dev, "%s: no inbuf\n", __func__);
 		cs_put(cs);
-		return;
+		return -EINVAL;
 	}
 
 	tail = inbuf->tail;
@@ -725,6 +725,8 @@ gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf,
 	gig_dbg(DEBUG_INTR, "%s-->BH", __func__);
 	gigaset_schedule_event(cs);
 	cs_put(cs);
+
+	return count;
 }
 
 /*
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index 486117f72c9f..cb98a7da98ef 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -744,8 +744,8 @@ static void st_tty_close(struct tty_struct *tty)
 	pr_debug("%s: done ", __func__);
 }
 
-static void st_tty_receive(struct tty_struct *tty, const unsigned char *data,
-			   char *tty_flags, int count)
+static unsigned int st_tty_receive(struct tty_struct *tty,
+		const unsigned char *data, char *tty_flags, int count)
 {
 #ifdef VERBOSE
 	print_hex_dump(KERN_DEBUG, ">in>", DUMP_PREFIX_NONE,
@@ -758,6 +758,8 @@ static void st_tty_receive(struct tty_struct *tty, const unsigned char *data,
 	 */
 	st_recv(tty->disc_data, data, count);
 	pr_debug("done %s", __func__);
+
+	return count;
 }
 
 /* wake-up function called in from the TTY layer
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index 3df0c0f8b8bf..73c7e03617ec 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -167,8 +167,8 @@ static inline void debugfs_tx(struct ser_device *ser, const u8 *data, int size)
 
 #endif
 
-static void ldisc_receive(struct tty_struct *tty, const u8 *data,
-			char *flags, int count)
+static unsigned int ldisc_receive(struct tty_struct *tty,
+		const u8 *data, char *flags, int count)
 {
 	struct sk_buff *skb = NULL;
 	struct ser_device *ser;
@@ -215,6 +215,8 @@ static void ldisc_receive(struct tty_struct *tty, const u8 *data,
 	} else
 		++ser->dev->stats.rx_dropped;
 	update_tty_status(ser);
+
+	return count;
 }
 
 static int handle_tx(struct ser_device *ser)
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index b423965a78d1..c600954998d5 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -425,16 +425,17 @@ static void slc_setup(struct net_device *dev)
  * in parallel
  */
 
-static void slcan_receive_buf(struct tty_struct *tty,
+static unsigned int slcan_receive_buf(struct tty_struct *tty,
 			      const unsigned char *cp, char *fp, int count)
 {
 	struct slcan *sl = (struct slcan *) tty->disc_data;
+	int bytes = count;
 
 	if (!sl || sl->magic != SLCAN_MAGIC || !netif_running(sl->dev))
-		return;
+		return -ENODEV;
 
 	/* Read the characters out of the buffer */
-	while (count--) {
+	while (bytes--) {
 		if (fp && *fp++) {
 			if (!test_and_set_bit(SLF_ERROR, &sl->flags))
 				sl->dev->stats.rx_errors++;
@@ -443,6 +444,8 @@ static void slcan_receive_buf(struct tty_struct *tty,
 		}
 		slcan_unesc(sl, *cp++);
 	}
+
+	return count;
 }
 
 /************************************
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 3e5d0b6b6516..992089639ea4 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -456,7 +456,7 @@ out:
  * a block of 6pack data has been received, which can now be decapsulated
  * and sent on to some IP layer for further processing.
  */
-static void sixpack_receive_buf(struct tty_struct *tty,
+static unsigned int sixpack_receive_buf(struct tty_struct *tty,
 	const unsigned char *cp, char *fp, int count)
 {
 	struct sixpack *sp;
@@ -464,11 +464,11 @@ static void sixpack_receive_buf(struct tty_struct *tty,
 	int count1;
 
 	if (!count)
-		return;
+		return 0;
 
 	sp = sp_get(tty);
 	if (!sp)
-		return;
+		return -ENODEV;
 
 	memcpy(buf, cp, count < sizeof(buf) ? count : sizeof(buf));
 
@@ -487,6 +487,8 @@ static void sixpack_receive_buf(struct tty_struct *tty,
 
 	sp_put(sp);
 	tty_unthrottle(tty);
+
+	return count1;
 }
 
 /*
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 4c628393c8b1..0e4f23531140 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -923,13 +923,14 @@ static long mkiss_compat_ioctl(struct tty_struct *tty, struct file *file,
  * a block of data has been received, which can now be decapsulated
  * and sent on to the AX.25 layer for further processing.
  */
-static void mkiss_receive_buf(struct tty_struct *tty, const unsigned char *cp,
-	char *fp, int count)
+static unsigned int mkiss_receive_buf(struct tty_struct *tty,
+		const unsigned char *cp, char *fp, int count)
 {
 	struct mkiss *ax = mkiss_get(tty);
+	int bytes = count;
 
 	if (!ax)
-		return;
+		return -ENODEV;
 
 	/*
 	 * Argh! mtu change time! - costs us the packet part received
@@ -939,7 +940,7 @@ static void mkiss_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 		ax_changedmtu(ax);
 
 	/* Read the characters out of the buffer */
-	while (count--) {
+	while (bytes--) {
 		if (fp != NULL && *fp++) {
 			if (!test_and_set_bit(AXF_ERROR, &ax->flags))
 				ax->dev->stats.rx_errors++;
@@ -952,6 +953,8 @@ static void mkiss_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 
 	mkiss_put(ax);
 	tty_unthrottle(tty);
+
+	return count;
 }
 
 /*
diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c
index 3352b2443e58..035861d8acb1 100644
--- a/drivers/net/irda/irtty-sir.c
+++ b/drivers/net/irda/irtty-sir.c
@@ -216,23 +216,23 @@ static int irtty_do_write(struct sir_dev *dev, const unsigned char *ptr, size_t
  * usbserial:	urb-complete-interrupt / softint
  */
 
-static void irtty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
-			      char *fp, int count) 
+static unsigned int irtty_receive_buf(struct tty_struct *tty,
+		const unsigned char *cp, char *fp, int count)
 {
 	struct sir_dev *dev;
 	struct sirtty_cb *priv = tty->disc_data;
 	int	i;
 
-	IRDA_ASSERT(priv != NULL, return;);
-	IRDA_ASSERT(priv->magic == IRTTY_MAGIC, return;);
+	IRDA_ASSERT(priv != NULL, return -ENODEV;);
+	IRDA_ASSERT(priv->magic == IRTTY_MAGIC, return -EINVAL;);
 
 	if (unlikely(count==0))		/* yes, this happens */
-		return;
+		return 0;
 
 	dev = priv->dev;
 	if (!dev) {
 		IRDA_WARNING("%s(), not ready yet!\n", __func__);
-		return;
+		return -ENODEV;
 	}
 
 	for (i = 0; i < count; i++) {
@@ -242,11 +242,13 @@ static void irtty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
  		if (fp && *fp++) { 
 			IRDA_DEBUG(0, "Framing or parity error!\n");
 			sirdev_receive(dev, NULL, 0);	/* notify sir_dev (updating stats) */
-			return;
+			return -EINVAL;
  		}
 	}
 
 	sirdev_receive(dev, cp, count);
+
+	return count;
 }
 
 /*
diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c
index a1b82c9c67d2..53872d7d7382 100644
--- a/drivers/net/ppp_async.c
+++ b/drivers/net/ppp_async.c
@@ -340,7 +340,7 @@ ppp_asynctty_poll(struct tty_struct *tty, struct file *file, poll_table *wait)
 }
 
 /* May sleep, don't call from interrupt level or with interrupts disabled */
-static void
+static unsigned int
 ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf,
 		  char *cflags, int count)
 {
@@ -348,7 +348,7 @@ ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf,
 	unsigned long flags;
 
 	if (!ap)
-		return;
+		return -ENODEV;
 	spin_lock_irqsave(&ap->recv_lock, flags);
 	ppp_async_input(ap, buf, cflags, count);
 	spin_unlock_irqrestore(&ap->recv_lock, flags);
@@ -356,6 +356,8 @@ ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf,
 		tasklet_schedule(&ap->tsk);
 	ap_put(ap);
 	tty_unthrottle(tty);
+
+	return count;
 }
 
 static void
diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c
index 2573f525f11c..0815790a5cf9 100644
--- a/drivers/net/ppp_synctty.c
+++ b/drivers/net/ppp_synctty.c
@@ -381,7 +381,7 @@ ppp_sync_poll(struct tty_struct *tty, struct file *file, poll_table *wait)
 }
 
 /* May sleep, don't call from interrupt level or with interrupts disabled */
-static void
+static unsigned int
 ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf,
 		  char *cflags, int count)
 {
@@ -389,7 +389,7 @@ ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf,
 	unsigned long flags;
 
 	if (!ap)
-		return;
+		return -ENODEV;
 	spin_lock_irqsave(&ap->recv_lock, flags);
 	ppp_sync_input(ap, buf, cflags, count);
 	spin_unlock_irqrestore(&ap->recv_lock, flags);
@@ -397,6 +397,8 @@ ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf,
 		tasklet_schedule(&ap->tsk);
 	sp_put(ap);
 	tty_unthrottle(tty);
+
+	return count;
 }
 
 static void
diff --git a/drivers/net/slip.c b/drivers/net/slip.c
index 86cbb9ea2f26..86718d358395 100644
--- a/drivers/net/slip.c
+++ b/drivers/net/slip.c
@@ -670,16 +670,17 @@ static void sl_setup(struct net_device *dev)
  * in parallel
  */
 
-static void slip_receive_buf(struct tty_struct *tty, const unsigned char *cp,
-							char *fp, int count)
+static unsigned int slip_receive_buf(struct tty_struct *tty,
+		const unsigned char *cp, char *fp, int count)
 {
 	struct slip *sl = tty->disc_data;
+	int bytes = count;
 
 	if (!sl || sl->magic != SLIP_MAGIC || !netif_running(sl->dev))
-		return;
+		return -ENODEV;
 
 	/* Read the characters out of the buffer */
-	while (count--) {
+	while (bytes--) {
 		if (fp && *fp++) {
 			if (!test_and_set_bit(SLF_ERROR, &sl->flags))
 				sl->dev->stats.rx_errors++;
@@ -693,6 +694,8 @@ static void slip_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 #endif
 			slip_unesc(sl, *cp++);
 	}
+
+	return count;
 }
 
 /************************************
diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c
index 24297b274cd4..40398bf7d036 100644
--- a/drivers/net/wan/x25_asy.c
+++ b/drivers/net/wan/x25_asy.c
@@ -517,17 +517,18 @@ static int x25_asy_close(struct net_device *dev)
  * and sent on to some IP layer for further processing.
  */
 
-static void x25_asy_receive_buf(struct tty_struct *tty,
+static unsigned int x25_asy_receive_buf(struct tty_struct *tty,
 				const unsigned char *cp, char *fp, int count)
 {
 	struct x25_asy *sl = tty->disc_data;
+	int bytes = count;
 
 	if (!sl || sl->magic != X25_ASY_MAGIC || !netif_running(sl->dev))
 		return;
 
 
 	/* Read the characters out of the buffer */
-	while (count--) {
+	while (bytes--) {
 		if (fp && *fp++) {
 			if (!test_and_set_bit(SLF_ERROR, &sl->flags))
 				sl->dev->stats.rx_errors++;
@@ -536,6 +537,8 @@ static void x25_asy_receive_buf(struct tty_struct *tty,
 		}
 		x25_asy_unesc(sl, *cp++);
 	}
+
+	return count;
 }
 
 /*
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 47f8cdb207f1..6abc73598847 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -2138,8 +2138,8 @@ static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm)
 	gsm->tty = NULL;
 }
 
-static void gsmld_receive_buf(struct tty_struct *tty, const unsigned char *cp,
-			      char *fp, int count)
+static unsigned int gsmld_receive_buf(struct tty_struct *tty,
+		const unsigned char *cp, char *fp, int count)
 {
 	struct gsm_mux *gsm = tty->disc_data;
 	const unsigned char *dp;
@@ -2173,6 +2173,8 @@ static void gsmld_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 	}
 	/* FASYNC if needed ? */
 	/* If clogged call tty_throttle(tty); */
+
+	return count;
 }
 
 /**
diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c
index cea56033b34c..cac666314aef 100644
--- a/drivers/tty/n_hdlc.c
+++ b/drivers/tty/n_hdlc.c
@@ -188,8 +188,8 @@ static unsigned int n_hdlc_tty_poll(struct tty_struct *tty, struct file *filp,
 				    poll_table *wait);
 static int n_hdlc_tty_open(struct tty_struct *tty);
 static void n_hdlc_tty_close(struct tty_struct *tty);
-static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *cp,
-			       char *fp, int count);
+static unsigned int n_hdlc_tty_receive(struct tty_struct *tty,
+		const __u8 *cp, char *fp, int count);
 static void n_hdlc_tty_wakeup(struct tty_struct *tty);
 
 #define bset(p,b)	((p)[(b) >> 5] |= (1 << ((b) & 0x1f)))
@@ -509,8 +509,8 @@ static void n_hdlc_tty_wakeup(struct tty_struct *tty)
  * Called by tty low level driver when receive data is available. Data is
  * interpreted as one HDLC frame.
  */
-static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *data,
-			       char *flags, int count)
+static unsigned int n_hdlc_tty_receive(struct tty_struct *tty,
+		const __u8 *data, char *flags, int count)
 {
 	register struct n_hdlc *n_hdlc = tty2n_hdlc (tty);
 	register struct n_hdlc_buf *buf;
@@ -521,20 +521,20 @@ static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *data,
 		
 	/* This can happen if stuff comes in on the backup tty */
 	if (!n_hdlc || tty != n_hdlc->tty)
-		return;
+		return -ENODEV;
 		
 	/* verify line is using HDLC discipline */
 	if (n_hdlc->magic != HDLC_MAGIC) {
 		printk("%s(%d) line not using HDLC discipline\n",
 			__FILE__,__LINE__);
-		return;
+		return -EINVAL;
 	}
 	
 	if ( count>maxframe ) {
 		if (debuglevel >= DEBUG_LEVEL_INFO)	
 			printk("%s(%d) rx count>maxframesize, data discarded\n",
 			       __FILE__,__LINE__);
-		return;
+		return -EINVAL;
 	}
 
 	/* get a free HDLC buffer */	
@@ -550,7 +550,7 @@ static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *data,
 		if (debuglevel >= DEBUG_LEVEL_INFO)	
 			printk("%s(%d) no more rx buffers, data discarded\n",
 			       __FILE__,__LINE__);
-		return;
+		return -EINVAL;
 	}
 		
 	/* copy received data to HDLC buffer */
@@ -565,6 +565,8 @@ static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *data,
 	if (n_hdlc->tty->fasync != NULL)
 		kill_fasync (&n_hdlc->tty->fasync, SIGIO, POLL_IN);
 
+	return count;
+
 }	/* end of n_hdlc_tty_receive() */
 
 /**
diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c
index 5c6c31459a2f..a4bc39c21a43 100644
--- a/drivers/tty/n_r3964.c
+++ b/drivers/tty/n_r3964.c
@@ -139,8 +139,8 @@ static int r3964_ioctl(struct tty_struct *tty, struct file *file,
 static void r3964_set_termios(struct tty_struct *tty, struct ktermios *old);
 static unsigned int r3964_poll(struct tty_struct *tty, struct file *file,
 		struct poll_table_struct *wait);
-static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp,
-		char *fp, int count);
+static unsigned int r3964_receive_buf(struct tty_struct *tty,
+		const unsigned char *cp, char *fp, int count);
 
 static struct tty_ldisc_ops tty_ldisc_N_R3964 = {
 	.owner = THIS_MODULE,
@@ -1239,8 +1239,8 @@ static unsigned int r3964_poll(struct tty_struct *tty, struct file *file,
 	return result;
 }
 
-static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp,
-			char *fp, int count)
+static unsigned int r3964_receive_buf(struct tty_struct *tty,
+		const unsigned char *cp, char *fp, int count)
 {
 	struct r3964_info *pInfo = tty->disc_data;
 	const unsigned char *p;
@@ -1257,6 +1257,8 @@ static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 		}
 
 	}
+
+	return count;
 }
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 0ad32888091c..95d0a9c2dd13 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -81,38 +81,6 @@ static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
 	return put_user(x, ptr);
 }
 
-/**
- *	n_tty_set__room	-	receive space
- *	@tty: terminal
- *
- *	Called by the driver to find out how much data it is
- *	permitted to feed to the line discipline without any being lost
- *	and thus to manage flow control. Not serialized. Answers for the
- *	"instant".
- */
-
-static void n_tty_set_room(struct tty_struct *tty)
-{
-	/* tty->read_cnt is not read locked ? */
-	int	left = N_TTY_BUF_SIZE - tty->read_cnt - 1;
-	int old_left;
-
-	/*
-	 * If we are doing input canonicalization, and there are no
-	 * pending newlines, let characters through without limit, so
-	 * that erase characters will be handled.  Other excess
-	 * characters will be beeped.
-	 */
-	if (left <= 0)
-		left = tty->icanon && !tty->canon_data;
-	old_left = tty->receive_room;
-	tty->receive_room = left;
-
-	/* Did this open up the receive buffer? We may need to flip */
-	if (left && !old_left)
-		schedule_work(&tty->buf.work);
-}
-
 static void put_tty_queue_nolock(unsigned char c, struct tty_struct *tty)
 {
 	if (tty->read_cnt < N_TTY_BUF_SIZE) {
@@ -184,7 +152,6 @@ static void reset_buffer_flags(struct tty_struct *tty)
 
 	tty->canon_head = tty->canon_data = tty->erasing = 0;
 	memset(&tty->read_flags, 0, sizeof tty->read_flags);
-	n_tty_set_room(tty);
 	check_unthrottle(tty);
 }
 
@@ -1360,17 +1327,19 @@ static void n_tty_write_wakeup(struct tty_struct *tty)
  *	calls one at a time and in order (or using flush_to_ldisc)
  */
 
-static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
-			      char *fp, int count)
+static unsigned int n_tty_receive_buf(struct tty_struct *tty,
+		const unsigned char *cp, char *fp, int count)
 {
 	const unsigned char *p;
 	char *f, flags = TTY_NORMAL;
 	int	i;
 	char	buf[64];
 	unsigned long cpuflags;
+	int left;
+	int ret = 0;
 
 	if (!tty->read_buf)
-		return;
+		return 0;
 
 	if (tty->real_raw) {
 		spin_lock_irqsave(&tty->read_lock, cpuflags);
@@ -1380,6 +1349,7 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 		memcpy(tty->read_buf + tty->read_head, cp, i);
 		tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1);
 		tty->read_cnt += i;
+		ret += i;
 		cp += i;
 		count -= i;
 
@@ -1389,8 +1359,10 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 		memcpy(tty->read_buf + tty->read_head, cp, i);
 		tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1);
 		tty->read_cnt += i;
+		ret += i;
 		spin_unlock_irqrestore(&tty->read_lock, cpuflags);
 	} else {
+		ret = count;
 		for (i = count, p = cp, f = fp; i; i--, p++) {
 			if (f)
 				flags = *f++;
@@ -1418,8 +1390,6 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 			tty->ops->flush_chars(tty);
 	}
 
-	n_tty_set_room(tty);
-
 	if ((!tty->icanon && (tty->read_cnt >= tty->minimum_to_wake)) ||
 		L_EXTPROC(tty)) {
 		kill_fasync(&tty->fasync, SIGIO, POLL_IN);
@@ -1432,8 +1402,12 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 	 * mode.  We don't want to throttle the driver if we're in
 	 * canonical mode and don't have a newline yet!
 	 */
-	if (tty->receive_room < TTY_THRESHOLD_THROTTLE)
+	left = N_TTY_BUF_SIZE - tty->read_cnt - 1;
+
+	if (left < TTY_THRESHOLD_THROTTLE)
 		tty_throttle(tty);
+
+	return ret;
 }
 
 int is_ignored(int sig)
@@ -1477,7 +1451,6 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 	if (test_bit(TTY_HW_COOK_IN, &tty->flags)) {
 		tty->raw = 1;
 		tty->real_raw = 1;
-		n_tty_set_room(tty);
 		return;
 	}
 	if (I_ISTRIP(tty) || I_IUCLC(tty) || I_IGNCR(tty) ||
@@ -1530,7 +1503,6 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 		else
 			tty->real_raw = 0;
 	}
-	n_tty_set_room(tty);
 	/* The termios change make the tty ready for I/O */
 	wake_up_interruptible(&tty->write_wait);
 	wake_up_interruptible(&tty->read_wait);
@@ -1812,8 +1784,6 @@ do_it_again:
 				retval = -ERESTARTSYS;
 				break;
 			}
-			/* FIXME: does n_tty_set_room need locking ? */
-			n_tty_set_room(tty);
 			timeout = schedule_timeout(timeout);
 			continue;
 		}
@@ -1885,10 +1855,8 @@ do_it_again:
 		 * longer than TTY_THRESHOLD_UNTHROTTLE in canonical mode,
 		 * we won't get any more characters.
 		 */
-		if (n_tty_chars_in_buffer(tty) <= TTY_THRESHOLD_UNTHROTTLE) {
-			n_tty_set_room(tty);
+		if (n_tty_chars_in_buffer(tty) <= TTY_THRESHOLD_UNTHROTTLE)
 			check_unthrottle(tty);
-		}
 
 		if (b - buf >= minimum)
 			break;
@@ -1910,7 +1878,6 @@ do_it_again:
 	} else if (test_and_clear_bit(TTY_PUSH, &tty->flags))
 		 goto do_it_again;
 
-	n_tty_set_room(tty);
 	return retval;
 }
 
diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index f1a7918d71aa..46de2e075dac 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -416,6 +416,7 @@ static void flush_to_ldisc(struct work_struct *work)
 		struct tty_buffer *head, *tail = tty->buf.tail;
 		int seen_tail = 0;
 		while ((head = tty->buf.head) != NULL) {
+			int copied;
 			int count;
 			char *char_buf;
 			unsigned char *flag_buf;
@@ -442,17 +443,19 @@ static void flush_to_ldisc(struct work_struct *work)
 			   line discipline as we want to empty the queue */
 			if (test_bit(TTY_FLUSHPENDING, &tty->flags))
 				break;
-			if (!tty->receive_room || seen_tail)
-				break;
-			if (count > tty->receive_room)
-				count = tty->receive_room;
 			char_buf = head->char_buf_ptr + head->read;
 			flag_buf = head->flag_buf_ptr + head->read;
-			head->read += count;
 			spin_unlock_irqrestore(&tty->buf.lock, flags);
-			disc->ops->receive_buf(tty, char_buf,
+			copied = disc->ops->receive_buf(tty, char_buf,
 							flag_buf, count);
 			spin_lock_irqsave(&tty->buf.lock, flags);
+
+			head->read += copied;
+
+			if (copied == 0 || seen_tail) {
+				schedule_work(&tty->buf.work);
+				break;
+			}
 		}
 		clear_bit(TTY_FLUSHING, &tty->flags);
 	}
diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c
index fb864e7fcd13..67b1d0d7c8ac 100644
--- a/drivers/tty/vt/selection.c
+++ b/drivers/tty/vt/selection.c
@@ -332,8 +332,7 @@ int paste_selection(struct tty_struct *tty)
 			continue;
 		}
 		count = sel_buffer_lth - pasted;
-		count = min(count, tty->receive_room);
-		tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted,
+		count = tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted,
 								NULL, count);
 		pasted += count;
 	}
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index ff7dc08696a8..5b07792ccb46 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -76,7 +76,7 @@
  * 	tty device.  It is solely the responsibility of the line
  * 	discipline to handle poll requests.
  *
- * void	(*receive_buf)(struct tty_struct *, const unsigned char *cp,
+ * unsigned int (*receive_buf)(struct tty_struct *, const unsigned char *cp,
  * 		       char *fp, int count);
  *
  * 	This function is called by the low-level tty driver to send
@@ -84,7 +84,8 @@
  * 	processing.  <cp> is a pointer to the buffer of input
  * 	character received by the device.  <fp> is a pointer to a
  * 	pointer of flag bytes which indicate whether a character was
- * 	received with a parity error, etc.
+ * 	received with a parity error, etc. Returns the amount of bytes
+ * 	received.
  * 
  * void	(*write_wakeup)(struct tty_struct *);
  *
@@ -140,8 +141,8 @@ struct tty_ldisc_ops {
 	/*
 	 * The following routines are called from below.
 	 */
-	void	(*receive_buf)(struct tty_struct *, const unsigned char *cp,
-			       char *fp, int count);
+	unsigned int (*receive_buf)(struct tty_struct *,
+			const unsigned char *cp, char *fp, int count);
 	void	(*write_wakeup)(struct tty_struct *);
 	void	(*dcd_change)(struct tty_struct *, unsigned int,
 				struct pps_event_time *);
-- 
cgit v1.2.3-71-gd317


From 0911f124bf55357803d53197cc1ae5479f5e37e2 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 10 Apr 2011 11:01:51 +0200
Subject: genirq: Forgotten updates/deletions after removal of compat code

commit 0c6f8a8b917ad361319c8ace3e9f28e69bfdb4c1 ("genirq: Remove compat code")
removed the compat code, but forgot to update some references in comments and
delete some of its documentation.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: http://lkml.kernel.org/r/%3C1302426113-13808-1-git-send-email-geert%40linux-m68k.org%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h     | 19 +------------------
 include/linux/irqdesc.h |  2 +-
 2 files changed, 2 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 09a308072f56..a71dd18639fb 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -53,7 +53,7 @@ typedef	void (*irq_preflow_handler_t)(struct irq_data *data);
  * Bits which can be modified via irq_set/clear/modify_status_flags()
  * IRQ_LEVEL			- Interrupt is level type. Will be also
  *				  updated in the code when the above trigger
- *				  bits are modified via set_irq_type()
+ *				  bits are modified via irq_set_irq_type()
  * IRQ_PER_CPU			- Mark an interrupt PER_CPU. Will protect
  *				  it from affinity setting
  * IRQ_NOPROBE			- Interrupt cannot be probed by autoprobing
@@ -261,23 +261,6 @@ static inline void irqd_clr_chained_irq_inprogress(struct irq_data *d)
  * struct irq_chip - hardware interrupt chip descriptor
  *
  * @name:		name for /proc/interrupts
- * @startup:		deprecated, replaced by irq_startup
- * @shutdown:		deprecated, replaced by irq_shutdown
- * @enable:		deprecated, replaced by irq_enable
- * @disable:		deprecated, replaced by irq_disable
- * @ack:		deprecated, replaced by irq_ack
- * @mask:		deprecated, replaced by irq_mask
- * @mask_ack:		deprecated, replaced by irq_mask_ack
- * @unmask:		deprecated, replaced by irq_unmask
- * @eoi:		deprecated, replaced by irq_eoi
- * @end:		deprecated, will go away with __do_IRQ()
- * @set_affinity:	deprecated, replaced by irq_set_affinity
- * @retrigger:		deprecated, replaced by irq_retrigger
- * @set_type:		deprecated, replaced by irq_set_type
- * @set_wake:		deprecated, replaced by irq_wake
- * @bus_lock:		deprecated, replaced by irq_bus_lock
- * @bus_sync_unlock:	deprecated, replaced by irq_bus_sync_unlock
- *
  * @irq_startup:	start up the interrupt (defaults to ->enable if NULL)
  * @irq_shutdown:	shut down the interrupt (defaults to ->disable if NULL)
  * @irq_enable:		enable the interrupt (defaults to chip->unmask if NULL)
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index a082905b5ebe..8e1dc8ea5471 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -21,7 +21,7 @@ struct timer_rand_state;
  * @status:		status information
  * @core_internal_state__do_not_mess_with_it: core internal status information
  * @depth:		disable-depth, for nested irq_disable() calls
- * @wake_depth:		enable depth, for multiple set_irq_wake() callers
+ * @wake_depth:		enable depth, for multiple irq_set_irq_wake() callers
  * @irq_count:		stats field to detect stalled irqs
  * @last_unhandled:	aging timer for unhandled count
  * @irqs_unhandled:	stats field for spurious unhandled interrupts
-- 
cgit v1.2.3-71-gd317


From 770767787c23040dc152e7ae230597ff55b39470 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 10 Apr 2011 11:01:52 +0200
Subject: genirq: irq_desc: Document preflow_handler and affinity_hint

[ tglx: Filled in the FIXME place holders ]

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: http://lkml.kernel.org/r/%3C1302426113-13808-2-git-send-email-geert%40linux-m68k.org%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdesc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 8e1dc8ea5471..c70b1aa4b93a 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -16,7 +16,8 @@ struct timer_rand_state;
  * @irq_data:		per irq and chip data passed down to chip functions
  * @timer_rand_state:	pointer to timer rand state struct
  * @kstat_irqs:		irq stats per cpu
- * @handle_irq:		highlevel irq-events handler [if NULL, __do_IRQ()]
+ * @handle_irq:		highlevel irq-events handler
+ * @preflow_handler:	handler called before the flow handler (currently used by sparc)
  * @action:		the irq action chain
  * @status:		status information
  * @core_internal_state__do_not_mess_with_it: core internal status information
@@ -26,6 +27,7 @@ struct timer_rand_state;
  * @last_unhandled:	aging timer for unhandled count
  * @irqs_unhandled:	stats field for spurious unhandled interrupts
  * @lock:		locking for SMP
+ * @affinity_hint:	hint to user space for preferred irq affinity
  * @affinity_notify:	context for notification of affinity changes
  * @pending_mask:	pending rebalanced interrupts
  * @threads_oneshot:	bitfield to handle shared oneshot threads
-- 
cgit v1.2.3-71-gd317


From 7f1b1244e159a8490d7fb13667c6cb7e1e75046b Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 7 Apr 2011 06:01:44 +0900
Subject: genirq: Support per-IRQ thread disabling.

This adds support for disabling threading on a per-IRQ basis via the IRQ
status instead of the IRQ flow, which is necessary for interrupts that
don't follow the natural IRQ flow channels, such as those that are
virtually created.

The new APIs added are simply:

	irq_set_thread()
	irq_set_nothread()

which follow the rest of the IRQ status routines.

Chained handlers also have IRQ_NOTHREAD set on them automatically, making
the lack of threading explicit rather than implicit. Subsequently, the
nothread flag can be viewed through the standard genirq debugging
facilities.

[ tglx: Fixed cleanup fallout ]

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Link: http://lkml.kernel.org/r/%3C20110406210135.GF18426%40linux-sh.org%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h   | 14 +++++++++++++-
 kernel/irq/chip.c     |  1 +
 kernel/irq/debug.h    |  1 +
 kernel/irq/manage.c   |  3 ++-
 kernel/irq/settings.h | 17 +++++++++++++++++
 5 files changed, 34 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index a71dd18639fb..39c23786c1db 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -59,6 +59,7 @@ typedef	void (*irq_preflow_handler_t)(struct irq_data *data);
  * IRQ_NOPROBE			- Interrupt cannot be probed by autoprobing
  * IRQ_NOREQUEST		- Interrupt cannot be requested via
  *				  request_irq()
+ * IRQ_NOTHREAD			- Interrupt cannot be threaded
  * IRQ_NOAUTOEN			- Interrupt is not automatically enabled in
  *				  request/setup_irq()
  * IRQ_NO_BALANCING		- Interrupt cannot be balanced (affinity set)
@@ -85,6 +86,7 @@ enum {
 	IRQ_NO_BALANCING	= (1 << 13),
 	IRQ_MOVE_PCNTXT		= (1 << 14),
 	IRQ_NESTED_THREAD	= (1 << 15),
+	IRQ_NOTHREAD		= (1 << 16),
 };
 
 #define IRQF_MODIFY_MASK	\
@@ -422,7 +424,7 @@ irq_set_handler(unsigned int irq, irq_flow_handler_t handle)
 /*
  * Set a highlevel chained flow handler for a given IRQ.
  * (a chained handler is automatically enabled and set to
- *  IRQ_NOREQUEST and IRQ_NOPROBE)
+ *  IRQ_NOREQUEST, IRQ_NOPROBE, and IRQ_NOTHREAD)
  */
 static inline void
 irq_set_chained_handler(unsigned int irq, irq_flow_handler_t handle)
@@ -452,6 +454,16 @@ static inline void irq_set_probe(unsigned int irq)
 	irq_modify_status(irq, IRQ_NOPROBE, 0);
 }
 
+static inline void irq_set_nothread(unsigned int irq)
+{
+	irq_modify_status(irq, 0, IRQ_NOTHREAD);
+}
+
+static inline void irq_set_thread(unsigned int irq)
+{
+	irq_modify_status(irq, IRQ_NOTHREAD, 0);
+}
+
 static inline void irq_set_nested_thread(unsigned int irq, bool nest)
 {
 	if (nest)
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 4af1e2b244cb..52d856d513ff 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -573,6 +573,7 @@ __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 	if (handle != handle_bad_irq && is_chained) {
 		irq_settings_set_noprobe(desc);
 		irq_settings_set_norequest(desc);
+		irq_settings_set_nothread(desc);
 		irq_startup(desc);
 	}
 out:
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h
index 306cba37e9a5..97a8bfadc88a 100644
--- a/kernel/irq/debug.h
+++ b/kernel/irq/debug.h
@@ -27,6 +27,7 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
 	P(IRQ_PER_CPU);
 	P(IRQ_NOPROBE);
 	P(IRQ_NOREQUEST);
+	P(IRQ_NOTHREAD);
 	P(IRQ_NOAUTOEN);
 
 	PS(IRQS_AUTODETECT);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 07c1611f3899..f7ce0021e1c4 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -900,7 +900,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		 */
 		new->handler = irq_nested_primary_handler;
 	} else {
-		irq_setup_forced_threading(new);
+		if (irq_settings_can_thread(desc))
+			irq_setup_forced_threading(new);
 	}
 
 	/*
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
index 0d91730b6330..f1667833d444 100644
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -8,6 +8,7 @@ enum {
 	_IRQ_LEVEL		= IRQ_LEVEL,
 	_IRQ_NOPROBE		= IRQ_NOPROBE,
 	_IRQ_NOREQUEST		= IRQ_NOREQUEST,
+	_IRQ_NOTHREAD		= IRQ_NOTHREAD,
 	_IRQ_NOAUTOEN		= IRQ_NOAUTOEN,
 	_IRQ_MOVE_PCNTXT	= IRQ_MOVE_PCNTXT,
 	_IRQ_NO_BALANCING	= IRQ_NO_BALANCING,
@@ -20,6 +21,7 @@ enum {
 #define IRQ_LEVEL		GOT_YOU_MORON
 #define IRQ_NOPROBE		GOT_YOU_MORON
 #define IRQ_NOREQUEST		GOT_YOU_MORON
+#define IRQ_NOTHREAD		GOT_YOU_MORON
 #define IRQ_NOAUTOEN		GOT_YOU_MORON
 #define IRQ_NESTED_THREAD	GOT_YOU_MORON
 #undef IRQF_MODIFY_MASK
@@ -94,6 +96,21 @@ static inline void irq_settings_set_norequest(struct irq_desc *desc)
 	desc->status_use_accessors |= _IRQ_NOREQUEST;
 }
 
+static inline bool irq_settings_can_thread(struct irq_desc *desc)
+{
+	return !(desc->status_use_accessors & _IRQ_NOTHREAD);
+}
+
+static inline void irq_settings_clr_nothread(struct irq_desc *desc)
+{
+	desc->status_use_accessors &= ~_IRQ_NOTHREAD;
+}
+
+static inline void irq_settings_set_nothread(struct irq_desc *desc)
+{
+	desc->status_use_accessors |= _IRQ_NOTHREAD;
+}
+
 static inline bool irq_settings_can_probe(struct irq_desc *desc)
 {
 	return !(desc->status_use_accessors & _IRQ_NOPROBE);
-- 
cgit v1.2.3-71-gd317


From 7d8280624797bbe2f5170bd3c85c75a8c9c74242 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 3 Apr 2011 11:42:53 +0200
Subject: genirq: Implement a generic interrupt chip

Implement a generic interrupt chip, which is configurable and is able
to handle the most common irq chip implementations.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arm-kernel@lists.infradead.org
Tested-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Tested-by; Kevin Hilman <khilman@ti.com>
---
 include/linux/irq.h       | 135 ++++++++++++++++++++++++
 kernel/irq/Makefile       |   1 +
 kernel/irq/generic-chip.c | 261 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 397 insertions(+)
 create mode 100644 kernel/irq/generic-chip.c

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 39c23786c1db..2ba2f1216790 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -568,6 +568,141 @@ static inline int irq_reserve_irq(unsigned int irq)
 	return irq_reserve_irqs(irq, 1);
 }
 
+#ifndef irq_reg_writel
+# define irq_reg_writel(val, addr)	writel(val, addr)
+#endif
+#ifndef irq_reg_readl
+# define irq_reg_readl(addr)		readl(addr)
+#endif
+
+/**
+ * struct irq_chip_regs - register offsets for struct irq_gci
+ * @enable:	Enable register offset to reg_base
+ * @disable:	Disable register offset to reg_base
+ * @mask:	Mask register offset to reg_base
+ * @ack:	Ack register offset to reg_base
+ * @eoi:	Eoi register offset to reg_base
+ * @type:	Type configuration register offset to reg_base
+ * @polarity:	Polarity configuration register offset to reg_base
+ */
+struct irq_chip_regs {
+	unsigned long		enable;
+	unsigned long		disable;
+	unsigned long		mask;
+	unsigned long		ack;
+	unsigned long		eoi;
+	unsigned long		type;
+	unsigned long		polarity;
+};
+
+/**
+ * struct irq_chip_type - Generic interrupt chip instance for a flow type
+ * @chip:		The real interrupt chip which provides the callbacks
+ * @regs:		Register offsets for this chip
+ * @handler:		Flow handler associated with this chip
+ * @type:		Chip can handle these flow types
+ *
+ * A irq_generic_chip can have several instances of irq_chip_type when
+ * it requires different functions and register offsets for different
+ * flow types.
+ */
+struct irq_chip_type {
+	struct irq_chip		chip;
+	struct irq_chip_regs	regs;
+	irq_flow_handler_t	handler;
+	u32			type;
+};
+
+/**
+ * struct irq_chip_generic - Generic irq chip data structure
+ * @lock:		Lock to protect register and cache data access
+ * @reg_base:		Register base address (virtual)
+ * @irq_base:		Interrupt base nr for this chip
+ * @irq_cnt:		Number of interrupts handled by this chip
+ * @mask_cache:		Cached mask register
+ * @type_cache:		Cached type register
+ * @polarity_cache:	Cached polarity register
+ * @wake_enabled:	Interrupt can wakeup from suspend
+ * @wake_active:	Interrupt is marked as an wakeup from suspend source
+ * @num_ct:		Number of available irq_chip_type instances (usually 1)
+ * @private:		Private data for non generic chip callbacks
+ * @chip_types:		Array of interrupt irq_chip_types
+ *
+ * Note, that irq_chip_generic can have multiple irq_chip_type
+ * implementations which can be associated to a particular irq line of
+ * an irq_chip_generic instance. That allows to share and protect
+ * state in an irq_chip_generic instance when we need to implement
+ * different flow mechanisms (level/edge) for it.
+ */
+struct irq_chip_generic {
+	raw_spinlock_t		lock;
+	void __iomem		*reg_base;
+	unsigned int		irq_base;
+	unsigned int		irq_cnt;
+	u32			mask_cache;
+	u32			type_cache;
+	u32			polarity_cache;
+	u32			wake_enabled;
+	u32			wake_active;
+	unsigned int		num_ct;
+	void			*private;
+	struct irq_chip_type	chip_types[0];
+};
+
+/**
+ * enum irq_gc_flags - Initialization flags for generic irq chips
+ * @IRQ_GC_INIT_MASK_CACHE:	Initialize the mask_cache by reading mask reg
+ * @IRQ_GC_INIT_NESTED_LOCK:	Set the lock class of the irqs to nested for
+ *				irq chips which need to call irq_set_wake() on
+ *				the parent irq. Usually GPIO implementations
+ */
+enum irq_gc_flags {
+	IRQ_GC_INIT_MASK_CACHE		= 1 << 0,
+	IRQ_GC_INIT_NESTED_LOCK		= 1 << 1,
+};
+
+/* Generic chip callback functions */
+void irq_gc_noop(struct irq_data *d);
+void irq_gc_mask_disable_reg(struct irq_data *d);
+void irq_gc_mask_set_bit(struct irq_data *d);
+void irq_gc_mask_clr_bit(struct irq_data *d);
+void irq_gc_unmask_enable_reg(struct irq_data *d);
+void irq_gc_ack(struct irq_data *d);
+void irq_gc_mask_disable_reg_and_ack(struct irq_data *d);
+void irq_gc_eoi(struct irq_data *d);
+int irq_gc_set_wake(struct irq_data *d, unsigned int on);
+
+/* Setup functions for irq_chip_generic */
+struct irq_chip_generic *
+irq_alloc_generic_chip(const char *name, int nr_ct, unsigned int irq_base,
+		       void __iomem *reg_base, irq_flow_handler_t handler);
+void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
+			    enum irq_gc_flags flags, unsigned int clr,
+			    unsigned int set);
+int irq_setup_alt_chip(struct irq_data *d, unsigned int type);
+
+static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d)
+{
+	return container_of(d->chip, struct irq_chip_type, chip);
+}
+
+#define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX)
+
+#ifdef CONFIG_SMP
+static inline void irq_gc_lock(struct irq_chip_generic *gc)
+{
+	raw_spin_lock(&gc->lock);
+}
+
+static inline void irq_gc_unlock(struct irq_chip_generic *gc)
+{
+	raw_spin_unlock(&gc->lock);
+}
+#else
+static inline void irq_gc_lock(struct irq_chip_generic *gc) { }
+static inline void irq_gc_unlock(struct irq_chip_generic *gc) { }
+#endif
+
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
 #endif /* !CONFIG_S390 */
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 54329cd7b3ee..e7a13bd3316a 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -1,5 +1,6 @@
 
 obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o
+obj-y += generic-chip.o
 obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
new file mode 100644
index 000000000000..eb23e5924260
--- /dev/null
+++ b/kernel/irq/generic-chip.c
@@ -0,0 +1,261 @@
+/*
+ * Library implementing the most common irq chip callback functions
+ *
+ * Copyright (C) 2011, Thomas Gleixner
+ */
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#include "internals.h"
+
+static inline struct irq_chip_regs *cur_regs(struct irq_data *d)
+{
+	return &container_of(d->chip, struct irq_chip_type, chip)->regs;
+}
+
+/**
+ * irq_gc_noop - NOOP function
+ * @d: irq_data
+ */
+void irq_gc_noop(struct irq_data *d)
+{
+}
+
+/**
+ * irq_gc_mask_disable_reg - Mask chip via disable register
+ * @d: irq_data
+ *
+ * Chip has separate enable/disable registers instead of a single mask
+ * register.
+ */
+void irq_gc_mask_disable_reg(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->disable);
+	gc->mask_cache &= ~mask;
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_mask_set_mask_bit - Mask chip via setting bit in mask register
+ * @d: irq_data
+ *
+ * Chip has a single mask register. Values of this register are cached
+ * and protected by gc->lock
+ */
+void irq_gc_mask_set_bit(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	gc->mask_cache |= mask;
+	irq_reg_writel(gc->mask_cache, gc->reg_base + cur_regs(d)->mask);
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_mask_set_mask_bit - Mask chip via clearing bit in mask register
+ * @d: irq_data
+ *
+ * Chip has a single mask register. Values of this register are cached
+ * and protected by gc->lock
+ */
+void irq_gc_mask_clr_bit(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	gc->mask_cache &= ~mask;
+	irq_reg_writel(gc->mask_cache, gc->reg_base + cur_regs(d)->mask);
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_unmask_enable_reg - Unmask chip via enable register
+ * @d: irq_data
+ *
+ * Chip has separate enable/disable registers instead of a single mask
+ * register.
+ */
+void irq_gc_unmask_enable_reg(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->enable);
+	gc->mask_cache |= mask;
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_ack - Ack pending interrupt
+ * @d: irq_data
+ */
+void irq_gc_ack(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->ack);
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_mask_disable_reg_and_ack- Mask and ack pending interrupt
+ * @d: irq_data
+ */
+void irq_gc_mask_disable_reg_and_ack(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->mask);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->ack);
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_eoi - EOI interrupt
+ * @d: irq_data
+ */
+void irq_gc_eoi(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->eoi);
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_set_wake - Set/clr wake bit for an interrupt
+ * @d: irq_data
+ *
+ * For chips where the wake from suspend functionality is not
+ * configured in a separate register and the wakeup active state is
+ * just stored in a bitmask.
+ */
+int irq_gc_set_wake(struct irq_data *d, unsigned int on)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	if (!(mask & gc->wake_enabled))
+		return -EINVAL;
+
+	irq_gc_lock(gc);
+	if (on)
+		gc->wake_active |= mask;
+	else
+		gc->wake_active &= ~mask;
+	irq_gc_unlock(gc);
+	return 0;
+}
+
+/**
+ * irq_alloc_generic_chip - Allocate a generic chip and initialize it
+ * @name:	Name of the irq chip
+ * @num_ct:	Number of irq_chip_type instances associated with this
+ * @irq_base:	Interrupt base nr for this chip
+ * @reg_base:	Register base address (virtual)
+ * @handler:	Default flow handler associated with this chip
+ *
+ * Returns an initialized irq_chip_generic structure. The chip defaults
+ * to the primary (index 0) irq_chip_type and @handler
+ */
+struct irq_chip_generic *
+irq_alloc_generic_chip(const char *name, int num_ct, unsigned int irq_base,
+		       void __iomem *reg_base, irq_flow_handler_t handler)
+{
+	struct irq_chip_generic *gc;
+	unsigned long sz = sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
+
+	gc = kzalloc(sz, GFP_KERNEL);
+	if (gc) {
+		raw_spin_lock_init(&gc->lock);
+		gc->num_ct = num_ct;
+		gc->irq_base = irq_base;
+		gc->reg_base = reg_base;
+		gc->chip_types->chip.name = name;
+		gc->chip_types->handler = handler;
+	}
+	return gc;
+}
+
+/*
+ * Separate lockdep class for interrupt chip which can nest irq_desc
+ * lock.
+ */
+static struct lock_class_key irq_nested_lock_class;
+
+/**
+ * irq_setup_generic_chip - Setup a range of interrupts with a generic chip
+ * @gc:		Generic irq chip holding all data
+ * @msk:	Bitmask holding the irqs to initialize relative to gc->irq_base
+ * @flags:	Flags for initialization
+ * @clr:	IRQ_* bits to clear
+ * @set:	IRQ_* bits to set
+ *
+ * Set up max. 32 interrupts starting from gc->irq_base. Note, this
+ * initializes all interrupts to the primary irq_chip_type and its
+ * associated handler.
+ */
+void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
+			    enum irq_gc_flags flags, unsigned int clr,
+			    unsigned int set)
+{
+	struct irq_chip_type *ct = gc->chip_types;
+	unsigned int i;
+
+	/* Init mask cache ? */
+	if (flags & IRQ_GC_INIT_MASK_CACHE)
+		gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask);
+
+	for (i = gc->irq_base; msk; msk >>= 1, i++) {
+		if (!msk & 0x01)
+			continue;
+
+		if (flags & IRQ_GC_INIT_NESTED_LOCK)
+			irq_set_lockdep_class(i, &irq_nested_lock_class);
+
+		irq_set_chip_and_handler(i, &ct->chip, ct->handler);
+		irq_set_chip_data(i, gc);
+		irq_modify_status(i, clr, set);
+	}
+	gc->irq_cnt = i - gc->irq_base;
+}
+
+/**
+ * irq_setup_alt_chip - Switch to alternative chip
+ * @d:		irq_data for this interrupt
+ * @type	Flow type to be initialized
+ *
+ * Only to be called from chip->irq_set_type() callbacks.
+ */
+int irq_setup_alt_chip(struct irq_data *d, unsigned int type)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	struct irq_chip_type *ct = gc->chip_types;
+	unsigned int i;
+
+	for (i = 0; i < gc->num_ct; i++, ct++) {
+		if (ct->type & type) {
+			d->chip = &ct->chip;
+			irq_data_to_desc(d)->handle_irq = ct->handler;
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
-- 
cgit v1.2.3-71-gd317


From cfefd21e693dca791bf9ecfc9dd3794facad533c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 15 Apr 2011 22:36:08 +0200
Subject: genirq: Add chip suspend and resume callbacks

These callbacks are only called in the syscore suspend/resume code on
interrupt chips which have been registered via the generic irq chip
mechanism. Calling those callbacks per irq would be rather icky, but
with the generic irq chip mechanism we can call this per registered
chip.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arm-kernel@lists.infradead.org
---
 include/linux/irq.h       | 11 ++++++
 kernel/irq/generic-chip.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 2ba2f1216790..8b4538446636 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -280,6 +280,9 @@ static inline void irqd_clr_chained_irq_inprogress(struct irq_data *d)
  * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips
  * @irq_cpu_online:	configure an interrupt source for a secondary CPU
  * @irq_cpu_offline:	un-configure an interrupt source for a secondary CPU
+ * @irq_suspend:	function called from core code on suspend once per chip
+ * @irq_resume:		function called from core code on resume once per chip
+ * @irq_pm_shutdown:	function called from core code on shutdown once per chip
  * @irq_print_chip:	optional to print special chip info in show_interrupts
  * @flags:		chip specific flags
  *
@@ -309,6 +312,10 @@ struct irq_chip {
 	void		(*irq_cpu_online)(struct irq_data *data);
 	void		(*irq_cpu_offline)(struct irq_data *data);
 
+	void		(*irq_suspend)(struct irq_data *data);
+	void		(*irq_resume)(struct irq_data *data);
+	void		(*irq_pm_shutdown)(struct irq_data *data);
+
 	void		(*irq_print_chip)(struct irq_data *data, struct seq_file *p);
 
 	unsigned long	flags;
@@ -626,6 +633,7 @@ struct irq_chip_type {
  * @wake_active:	Interrupt is marked as an wakeup from suspend source
  * @num_ct:		Number of available irq_chip_type instances (usually 1)
  * @private:		Private data for non generic chip callbacks
+ * @list:		List head for keeping track of instances
  * @chip_types:		Array of interrupt irq_chip_types
  *
  * Note, that irq_chip_generic can have multiple irq_chip_type
@@ -646,6 +654,7 @@ struct irq_chip_generic {
 	u32			wake_active;
 	unsigned int		num_ct;
 	void			*private;
+	struct list_head	list;
 	struct irq_chip_type	chip_types[0];
 };
 
@@ -680,6 +689,8 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
 			    enum irq_gc_flags flags, unsigned int clr,
 			    unsigned int set);
 int irq_setup_alt_chip(struct irq_data *d, unsigned int type);
+void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
+			     unsigned int clr, unsigned int set);
 
 static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d)
 {
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index eb23e5924260..31a9db711906 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -8,9 +8,13 @@
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
+#include <linux/syscore_ops.h>
 
 #include "internals.h"
 
+static LIST_HEAD(gc_list);
+static DEFINE_RAW_SPINLOCK(gc_lock);
+
 static inline struct irq_chip_regs *cur_regs(struct irq_data *d)
 {
 	return &container_of(d->chip, struct irq_chip_type, chip)->regs;
@@ -219,6 +223,10 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
 	struct irq_chip_type *ct = gc->chip_types;
 	unsigned int i;
 
+	raw_spin_lock(&gc_lock);
+	list_add_tail(&gc->list, &gc_list);
+	raw_spin_unlock(&gc_lock);
+
 	/* Init mask cache ? */
 	if (flags & IRQ_GC_INIT_MASK_CACHE)
 		gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask);
@@ -259,3 +267,88 @@ int irq_setup_alt_chip(struct irq_data *d, unsigned int type)
 	}
 	return -EINVAL;
 }
+
+/**
+ * irq_remove_generic_chip - Remove a chip
+ * @gc:		Generic irq chip holding all data
+ * @msk:	Bitmask holding the irqs to initialize relative to gc->irq_base
+ * @clr:	IRQ_* bits to clear
+ * @set:	IRQ_* bits to set
+ *
+ * Remove up to 32 interrupts starting from gc->irq_base.
+ */
+void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
+			     unsigned int clr, unsigned int set)
+{
+	unsigned int i = gc->irq_base;
+
+	raw_spin_lock(&gc_lock);
+	list_del(&gc->list);
+	raw_spin_unlock(&gc_lock);
+
+	for (; msk; msk >>= 1, i++) {
+		if (!msk & 0x01)
+			continue;
+
+		/* Remove handler first. That will mask the irq line */
+		irq_set_handler(i, NULL);
+		irq_set_chip(i, &no_irq_chip);
+		irq_set_chip_data(i, NULL);
+		irq_modify_status(i, clr, set);
+	}
+}
+
+#ifdef CONFIG_PM
+static int irq_gc_suspend(void)
+{
+	struct irq_chip_generic *gc;
+
+	list_for_each_entry(gc, &gc_list, list) {
+		struct irq_chip_type *ct = gc->chip_types;
+
+		if (ct->chip.irq_suspend)
+			ct->chip.irq_suspend(irq_get_irq_data(gc->irq_base));
+	}
+	return 0;
+}
+
+static void irq_gc_resume(void)
+{
+	struct irq_chip_generic *gc;
+
+	list_for_each_entry(gc, &gc_list, list) {
+		struct irq_chip_type *ct = gc->chip_types;
+
+		if (ct->chip.irq_resume)
+			ct->chip.irq_resume(irq_get_irq_data(gc->irq_base));
+	}
+}
+#else
+#define irq_gc_suspend NULL
+#define irq_gc_resume NULL
+#endif
+
+static void irq_gc_shutdown(void)
+{
+	struct irq_chip_generic *gc;
+
+	list_for_each_entry(gc, &gc_list, list) {
+		struct irq_chip_type *ct = gc->chip_types;
+
+		if (ct->chip.irq_pm_shutdown)
+			ct->chip.irq_pm_shutdown(irq_get_irq_data(gc->irq_base));
+	}
+}
+
+static struct syscore_ops irq_gc_syscore_ops = {
+	.suspend = irq_gc_suspend,
+	.resume = irq_gc_resume,
+	.shutdown = irq_gc_shutdown,
+};
+
+static int __init irq_gc_init_ops(void)
+{
+	register_syscore_ops(&irq_gc_syscore_ops);
+	return 0;
+}
+device_initcall(irq_gc_init_ops);
-- 
cgit v1.2.3-71-gd317


From 625f2a378e5a10f45fdc37932fc9f8a21676de9e Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Fri, 22 Apr 2011 11:19:10 -0600
Subject: sched: Get rid of lock_depth

Neil Brown pointed out that lock_depth somehow escaped the BKL
removal work.  Let's get rid of it now.

Note that the perf scripting utilities still have a bunch of
code for dealing with common_lock_depth in tracepoints; I have
left that in place in case anybody wants to use that code with
older kernels.

Suggested-by: Neil Brown <neilb@suse.de>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110422111910.456c0e84@bike.lwn.net
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/trace/kprobetrace.txt             |  1 -
 include/linux/init_task.h                       |  1 -
 include/linux/sched.h                           |  6 ------
 kernel/fork.c                                   |  1 -
 kernel/mutex.c                                  |  7 -------
 kernel/sched.c                                  | 11 +----------
 kernel/sched_debug.c                            |  4 ----
 kernel/trace/trace_kprobe.c                     |  1 -
 tools/perf/Documentation/perf-script-perl.txt   |  1 -
 tools/perf/Documentation/perf-script-python.txt |  1 -
 10 files changed, 1 insertion(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 6d27ab8d6e9f..c83bd6b4e6e8 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -120,7 +120,6 @@ format:
         field:unsigned char common_flags;       offset:2;       size:1; signed:0;
         field:unsigned char common_preempt_count;       offset:3; size:1;signed:0;
         field:int common_pid;   offset:4;       size:4; signed:1;
-        field:int common_lock_depth;    offset:8;       size:4; signed:1;
 
         field:unsigned long __probe_ip; offset:12;      size:4; signed:0;
         field:int __probe_nargs;        offset:16;      size:4; signed:1;
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index caa151fbebb7..689496bb6654 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -134,7 +134,6 @@ extern struct cred init_cred;
 	.stack		= &init_thread_info,				\
 	.usage		= ATOMIC_INIT(2),				\
 	.flags		= PF_KTHREAD,					\
-	.lock_depth	= -1,						\
 	.prio		= MAX_PRIO-20,					\
 	.static_prio	= MAX_PRIO-20,					\
 	.normal_prio	= MAX_PRIO-20,					\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 171ba24b08a7..013314a56105 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -731,10 +731,6 @@ struct sched_info {
 	/* timestamps */
 	unsigned long long last_arrival,/* when we last ran on a cpu */
 			   last_queued;	/* when we were last queued to run */
-#ifdef CONFIG_SCHEDSTATS
-	/* BKL stats */
-	unsigned int bkl_count;
-#endif
 };
 #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
 
@@ -1190,8 +1186,6 @@ struct task_struct {
 	unsigned int flags;	/* per process flags, defined below */
 	unsigned int ptrace;
 
-	int lock_depth;		/* BKL lock depth */
-
 #ifdef CONFIG_SMP
 	struct task_struct *wake_entry;
 	int on_cpu;
diff --git a/kernel/fork.c b/kernel/fork.c
index e7548dee636b..aca62871a4f9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1103,7 +1103,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	posix_cpu_timers_init(p);
 
-	p->lock_depth = -1;		/* -1 = no lock */
 	do_posix_clock_monotonic_gettime(&p->start_time);
 	p->real_start_time = p->start_time;
 	monotonic_to_bootbased(&p->real_start_time);
diff --git a/kernel/mutex.c b/kernel/mutex.c
index fe4706cb0c5b..2c938e2337cd 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -162,13 +162,6 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	for (;;) {
 		struct task_struct *owner;
 
-		/*
-		 * If we own the BKL, then don't spin. The owner of
-		 * the mutex might be waiting on us to release the BKL.
-		 */
-		if (unlikely(current->lock_depth >= 0))
-			break;
-
 		/*
 		 * If there's an owner, wait for it to either
 		 * release the lock or go to sleep.
diff --git a/kernel/sched.c b/kernel/sched.c
index 8cb0a5769a16..9cde2dd229c9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4121,12 +4121,6 @@ static inline void schedule_debug(struct task_struct *prev)
 	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
 
 	schedstat_inc(this_rq(), sched_count);
-#ifdef CONFIG_SCHEDSTATS
-	if (unlikely(prev->lock_depth >= 0)) {
-		schedstat_inc(this_rq(), rq_sched_info.bkl_count);
-		schedstat_inc(prev, sched_info.bkl_count);
-	}
-#endif
 }
 
 static void put_prev_task(struct rq *rq, struct task_struct *prev)
@@ -5852,11 +5846,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 
 	/* Set the preempt count _outside_ the spinlocks! */
-#if defined(CONFIG_PREEMPT)
-	task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
-#else
 	task_thread_info(idle)->preempt_count = 0;
-#endif
+
 	/*
 	 * The idle tasks have their own, simple scheduling class:
 	 */
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 3669bec6e130..a6710a112b4f 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -296,9 +296,6 @@ static void print_cpu(struct seq_file *m, int cpu)
 	P(ttwu_count);
 	P(ttwu_local);
 
-	SEQ_printf(m, "  .%-30s: %d\n", "bkl_count",
-				rq->rq_sched_info.bkl_count);
-
 #undef P
 #undef P64
 #endif
@@ -441,7 +438,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	P(se.statistics.wait_count);
 	PN(se.statistics.iowait_sum);
 	P(se.statistics.iowait_count);
-	P(sched_info.bkl_count);
 	P(se.nr_migrations);
 	P(se.statistics.nr_migrations_cold);
 	P(se.statistics.nr_failed_migrations_affine);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 35d55a386145..f925c45f0afa 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -53,7 +53,6 @@ const char *reserved_field_names[] = {
 	"common_preempt_count",
 	"common_pid",
 	"common_tgid",
-	"common_lock_depth",
 	FIELD_STRING_IP,
 	FIELD_STRING_RETIP,
 	FIELD_STRING_FUNC,
diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt
index 5bb41e55a3ac..3152cca15501 100644
--- a/tools/perf/Documentation/perf-script-perl.txt
+++ b/tools/perf/Documentation/perf-script-perl.txt
@@ -63,7 +63,6 @@ The format file for the sched_wakep event defines the following fields
         field:unsigned char common_flags;
         field:unsigned char common_preempt_count;
         field:int common_pid;
-        field:int common_lock_depth;
 
         field:char comm[TASK_COMM_LEN];
         field:pid_t pid;
diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt
index 36b38277422c..471022069119 100644
--- a/tools/perf/Documentation/perf-script-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -463,7 +463,6 @@ The format file for the sched_wakep event defines the following fields
         field:unsigned char common_flags;
         field:unsigned char common_preempt_count;
         field:int common_pid;
-        field:int common_lock_depth;
 
         field:char comm[TASK_COMM_LEN];
         field:pid_t pid;
-- 
cgit v1.2.3-71-gd317


From dea3667bc3c2a0521e8d8855e407a49d9d70028c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 24 Apr 2011 07:58:46 -0700
Subject: vfs: get rid of insane dentry hashing rules

The dentry hashing rules have been really quite complicated for a long
while, in odd ways.  That made functions like __d_drop() very fragile
and non-obvious.

In particular, whether a dentry was hashed or not was indicated with an
explicit DCACHE_UNHASHED bit.  That's despite the fact that the hash
abstraction that the dentries use actually have a 'is this entry hashed
or not' model (which is a simple test of the 'pprev' pointer).

The reason that was done is because we used the normal 'is this entry
unhashed' model to mark whether the dentry had _ever_ been hashed in the
dentry hash tables, and that logic goes back many years (commit
b3423415fbc2: "dcache: avoid RCU for never-hashed dentries").

That, in turn, meant that __d_drop had totally different unhashing logic
for the dentry hash table case and for the anonymous dcache case,
because in order to use the "is this dentry hashed" logic as a flag for
whether it had ever been on the RCU hash table, we had to unhash such a
dentry differently so that we'd never think that it wasn't 'unhashed'
and wouldn't be free'd correctly.

That's just insane.  It made the logic really hard to follow, when there
were two different kinds of "unhashed" states, and one of them (the one
that used "list_bl_unhashed()") really had nothing at all to do with
being unhashed per se, but with a very subtle lifetime rule instead.

So turn all of it around, and make it logical.

Instead of having a DENTRY_UNHASHED bit in d_flags to indicate whether
the dentry is on the hash chains or not, use the hash chain unhashed
logic for that.  Suddenly "d_unhashed()" just uses "list_bl_unhashed()",
and everything makes sense.

And for the lifetime rule, just use an explicit DENTRY_RCUACCEES bit.
If we ever insert the dentry into the dentry hash table so that it is
visible to RCU lookup, we mark it DENTRY_RCUACCESS to show that it now
needs the RCU lifetime rules.  Now suddently that test at dentry free
time makes sense too.

And because unhashing now is sane and doesn't depend on where the dentry
got unhashed from (because the dentry hash chain details doesn't have
some subtle side effects), we can re-unify the __d_drop() logic and use
common code for the unhashing.

Also fix one more open-coded hash chain bit_spin_lock() that I missed in
the previous chain locking cleanup commit.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c            | 42 ++++++++++++++++--------------------------
 include/linux/dcache.h |  4 ++--
 2 files changed, 18 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 7108c15685dd..d600a0af3b2e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -164,8 +164,8 @@ static void d_free(struct dentry *dentry)
 	if (dentry->d_op && dentry->d_op->d_release)
 		dentry->d_op->d_release(dentry);
 
-	/* if dentry was never inserted into hash, immediate free is OK */
-	if (hlist_bl_unhashed(&dentry->d_hash))
+	/* if dentry was never visible to RCU, immediate free is OK */
+	if (!(dentry->d_flags & DCACHE_RCUACCESS))
 		__d_free(&dentry->d_u.d_rcu);
 	else
 		call_rcu(&dentry->d_u.d_rcu, __d_free);
@@ -327,28 +327,19 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
  */
 void __d_drop(struct dentry *dentry)
 {
-	if (!(dentry->d_flags & DCACHE_UNHASHED)) {
+	if (!d_unhashed(dentry)) {
 		struct hlist_bl_head *b;
-		if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) {
+		if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
 			b = &dentry->d_sb->s_anon;
-			spin_lock_bucket(b);
-			dentry->d_flags |= DCACHE_UNHASHED;
-			hlist_bl_del_init(&dentry->d_hash);
-			spin_unlock_bucket(b);
-		} else {
-			struct hlist_bl_head *b;
+		else
 			b = d_hash(dentry->d_parent, dentry->d_name.hash);
-			spin_lock_bucket(b);
-			/*
-			 * We may not actually need to put DCACHE_UNHASHED
-			 * manipulations under the hash lock, but follow
-			 * the principle of least surprise.
-			 */
-			dentry->d_flags |= DCACHE_UNHASHED;
-			hlist_bl_del_rcu(&dentry->d_hash);
-			spin_unlock_bucket(b);
-			dentry_rcuwalk_barrier(dentry);
-		}
+
+		spin_lock_bucket(b);
+		__hlist_bl_del(&dentry->d_hash);
+		dentry->d_hash.pprev = NULL;
+		spin_unlock_bucket(b);
+
+		dentry_rcuwalk_barrier(dentry);
 	}
 }
 EXPORT_SYMBOL(__d_drop);
@@ -1301,7 +1292,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	dname[name->len] = 0;
 
 	dentry->d_count = 1;
-	dentry->d_flags = DCACHE_UNHASHED;
+	dentry->d_flags = 0;
 	spin_lock_init(&dentry->d_lock);
 	seqcount_init(&dentry->d_seq);
 	dentry->d_inode = NULL;
@@ -1603,10 +1594,9 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	tmp->d_inode = inode;
 	tmp->d_flags |= DCACHE_DISCONNECTED;
 	list_add(&tmp->d_alias, &inode->i_dentry);
-	bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
-	tmp->d_flags &= ~DCACHE_UNHASHED;
+	spin_lock_bucket(&tmp->d_sb->s_anon);
 	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
-	__bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
+	spin_unlock_bucket(&tmp->d_sb->s_anon);
 	spin_unlock(&tmp->d_lock);
 	spin_unlock(&inode->i_lock);
 	security_d_instantiate(tmp, inode);
@@ -2087,7 +2077,7 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
 {
 	BUG_ON(!d_unhashed(entry));
 	spin_lock_bucket(b);
- 	entry->d_flags &= ~DCACHE_UNHASHED;
+	entry->d_flags |= DCACHE_RCUACCESS;
 	hlist_bl_add_head_rcu(&entry->d_hash, b);
 	spin_unlock_bucket(b);
 }
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index f2afed4fa945..19d90a55541d 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -197,7 +197,7 @@ struct dentry_operations {
       * typically using d_splice_alias. */
 
 #define DCACHE_REFERENCED	0x0008  /* Recently used, don't discard. */
-#define DCACHE_UNHASHED		0x0010	
+#define DCACHE_RCUACCESS	0x0010	/* Entry has ever been RCU-visible */
 #define DCACHE_INOTIFY_PARENT_WATCHED 0x0020
      /* Parent inode is watched by inotify */
 
@@ -384,7 +384,7 @@ extern struct dentry *dget_parent(struct dentry *dentry);
  
 static inline int d_unhashed(struct dentry *dentry)
 {
-	return (dentry->d_flags & DCACHE_UNHASHED);
+	return hlist_bl_unhashed(&dentry->d_hash);
 }
 
 static inline int d_unlinked(struct dentry *dentry)
-- 
cgit v1.2.3-71-gd317


From 3f7ac1d6671ebca7a955853f7127c937f7befbd3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 16 Mar 2011 11:14:25 +0100
Subject: libata: Kill unused ATA_DFLAG_{H|D}IPM flags

ATA_DFLAG_{H|D}IPM flags are no longer used.  Kill them.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 include/linux/libata.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 7f675aa81d87..3b4d223a6aff 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -137,8 +137,6 @@ enum {
 	ATA_DFLAG_ACPI_PENDING	= (1 << 5), /* ACPI resume action pending */
 	ATA_DFLAG_ACPI_FAILED	= (1 << 6), /* ACPI on devcfg has failed */
 	ATA_DFLAG_AN		= (1 << 7), /* AN configured */
-	ATA_DFLAG_HIPM		= (1 << 8), /* device supports HIPM */
-	ATA_DFLAG_DIPM		= (1 << 9), /* device supports DIPM */
 	ATA_DFLAG_DMADIR	= (1 << 10), /* device requires DMADIR */
 	ATA_DFLAG_CFG_MASK	= (1 << 12) - 1,
 
-- 
cgit v1.2.3-71-gd317


From ae01b2493c3bf03c504c32ac4ebb01d528508db3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 16 Mar 2011 11:14:55 +0100
Subject: libata: Implement ATA_FLAG_NO_DIPM and apply it to mcp65

NVIDIA mcp65 familiy of controllers cause command timeouts when DIPM
is used.  Implement ATA_FLAG_NO_DIPM and apply it.

This problem was reported by Stefan Bader in the following thread.

 http://thread.gmane.org/gmane.linux.ide/48841

stable: applicable to 2.6.37 and 38.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Stefan Bader <stefan.bader@canonical.com>
Cc: stable@kernel.org
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/ata/ahci.c      | 2 +-
 drivers/ata/libata-eh.c | 6 ++++--
 include/linux/libata.h  | 1 +
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 39d829cd82dd..cbd38e130f05 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -150,7 +150,7 @@ static const struct ata_port_info ahci_port_info[] = {
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_NO_FPDMA_AA | AHCI_HFLAG_NO_PMP |
 				 AHCI_HFLAG_YES_NCQ),
-		.flags		= AHCI_FLAG_COMMON,
+		.flags		= AHCI_FLAG_COMMON | ATA_FLAG_NO_DIPM,
 		.pio_mask	= ATA_PIO4,
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_ops,
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 88cd22fa65cd..f26f2fe3480a 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -3316,6 +3316,7 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 	struct ata_eh_context *ehc = &link->eh_context;
 	struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL;
 	enum ata_lpm_policy old_policy = link->lpm_policy;
+	bool no_dipm = ap->flags & ATA_FLAG_NO_DIPM;
 	unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM;
 	unsigned int err_mask;
 	int rc;
@@ -3332,7 +3333,7 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 	 */
 	ata_for_each_dev(dev, link, ENABLED) {
 		bool hipm = ata_id_has_hipm(dev->id);
-		bool dipm = ata_id_has_dipm(dev->id);
+		bool dipm = ata_id_has_dipm(dev->id) && !no_dipm;
 
 		/* find the first enabled and LPM enabled devices */
 		if (!link_dev)
@@ -3389,7 +3390,8 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 
 	/* host config updated, enable DIPM if transitioning to MIN_POWER */
 	ata_for_each_dev(dev, link, ENABLED) {
-		if (policy == ATA_LPM_MIN_POWER && ata_id_has_dipm(dev->id)) {
+		if (policy == ATA_LPM_MIN_POWER && !no_dipm &&
+		    ata_id_has_dipm(dev->id)) {
 			err_mask = ata_dev_set_feature(dev,
 					SETFEATURES_SATA_ENABLE, SATA_DIPM);
 			if (err_mask && err_mask != AC_ERR_DEV) {
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 3b4d223a6aff..04f32a3eb26b 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -196,6 +196,7 @@ enum {
 					      * management */
 	ATA_FLAG_SW_ACTIVITY	= (1 << 22), /* driver supports sw activity
 					      * led */
+	ATA_FLAG_NO_DIPM	= (1 << 23), /* host not happy with DIPM */
 
 	/* bits 24:31 of ap->flags are reserved for LLD specific flags */
 
-- 
cgit v1.2.3-71-gd317


From fd954ae124e8a866e9cc1bc3de9a07be5492f608 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 24 Apr 2011 14:28:18 -0400
Subject: NFSv4.1: Don't loop forever in nfs4_proc_create_session

If a server for some reason keeps sending NFS4ERR_DELAY errors, we can end
up looping forever inside nfs4_proc_create_session, and so the usual
mechanisms for detecting if the nfs_client is dead don't work.

Fix this by ensuring that we loop inside the nfs4_state_manager thread
instead.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h          |  1 +
 fs/nfs/nfs4proc.c         | 45 +++++++--------------------------------------
 fs/nfs/nfs4state.c        | 46 +++++++++++++++++++++++++++++++---------------
 include/linux/nfs_fs_sb.h |  1 +
 4 files changed, 40 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e1c261ddd65d..c4a69833dd0d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -47,6 +47,7 @@ enum nfs4_client_state {
 	NFS4CLNT_LAYOUTRECALL,
 	NFS4CLNT_SESSION_RESET,
 	NFS4CLNT_RECALL_SLOT,
+	NFS4CLNT_LEASE_CONFIRM,
 };
 
 enum nfs4_session_state {
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 628e35f75307..50c814828fcb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3754,18 +3754,17 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
 		status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
 		if (status != -NFS4ERR_CLID_INUSE)
 			break;
-		if (signalled())
+		if (loop != 0) {
+			++clp->cl_id_uniquifier;
 			break;
-		if (loop++ & 1)
-			ssleep(clp->cl_lease_time / HZ + 1);
-		else
-			if (++clp->cl_id_uniquifier == 0)
-				break;
+		}
+		++loop;
+		ssleep(clp->cl_lease_time / HZ + 1);
 	}
 	return status;
 }
 
-static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
+int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
 		struct nfs4_setclientid_res *arg,
 		struct rpc_cred *cred)
 {
@@ -3790,26 +3789,6 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
 	return status;
 }
 
-int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
-		struct nfs4_setclientid_res *arg,
-		struct rpc_cred *cred)
-{
-	long timeout = 0;
-	int err;
-	do {
-		err = _nfs4_proc_setclientid_confirm(clp, arg, cred);
-		switch (err) {
-			case 0:
-				return err;
-			case -NFS4ERR_RESOURCE:
-				/* The IBM lawyers misread another document! */
-			case -NFS4ERR_DELAY:
-				err = nfs4_delay(clp->cl_rpcclient, &timeout);
-		}
-	} while (err == 0);
-	return err;
-}
-
 struct nfs4_delegreturndata {
 	struct nfs4_delegreturnargs args;
 	struct nfs4_delegreturnres res;
@@ -5222,20 +5201,10 @@ int nfs4_proc_create_session(struct nfs_client *clp)
 	int status;
 	unsigned *ptr;
 	struct nfs4_session *session = clp->cl_session;
-	long timeout = 0;
-	int err;
 
 	dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
 
-	do {
-		status = _nfs4_proc_create_session(clp);
-		if (status == -NFS4ERR_DELAY) {
-			err = nfs4_delay(clp->cl_rpcclient, &timeout);
-			if (err)
-				status = err;
-		}
-	} while (status == -NFS4ERR_DELAY);
-
+	status = _nfs4_proc_create_session(clp);
 	if (status)
 		goto out;
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 4dfb34b43ffb..4a810c8b0753 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -64,10 +64,15 @@ static LIST_HEAD(nfs4_clientid_list);
 
 int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 {
-	struct nfs4_setclientid_res clid;
+	struct nfs4_setclientid_res clid = {
+		.clientid = clp->cl_clientid,
+		.confirm = clp->cl_confirm,
+	};
 	unsigned short port;
 	int status;
 
+	if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
+		goto do_confirm;
 	port = nfs_callback_tcpport;
 	if (clp->cl_addr.ss_family == AF_INET6)
 		port = nfs_callback_tcpport6;
@@ -75,10 +80,14 @@ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 	status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
 	if (status != 0)
 		goto out;
+	clp->cl_clientid = clid.clientid;
+	clp->cl_confirm = clid.confirm;
+	set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+do_confirm:
 	status = nfs4_proc_setclientid_confirm(clp, &clid, cred);
 	if (status != 0)
 		goto out;
-	clp->cl_clientid = clid.clientid;
+	clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
 	nfs4_schedule_state_renewal(clp);
 out:
 	return status;
@@ -230,13 +239,18 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	int status;
 
+	if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
+		goto do_confirm;
 	nfs4_begin_drain_session(clp);
 	status = nfs4_proc_exchange_id(clp, cred);
 	if (status != 0)
 		goto out;
+	set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+do_confirm:
 	status = nfs4_proc_create_session(clp);
 	if (status != 0)
 		goto out;
+	clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
 	nfs41_setup_state_renewal(clp);
 	nfs_mark_client_ready(clp, NFS_CS_READY);
 out:
@@ -1584,20 +1598,22 @@ static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
  */
 static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
 {
-	if (nfs4_has_session(clp)) {
-		switch (status) {
-		case -NFS4ERR_DELAY:
-		case -NFS4ERR_CLID_INUSE:
-		case -EAGAIN:
-			break;
+	switch (status) {
+	case -NFS4ERR_CLID_INUSE:
+	case -NFS4ERR_STALE_CLIENTID:
+		clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+		break;
+	case -NFS4ERR_DELAY:
+	case -EAGAIN:
+		ssleep(1);
+		break;
 
-		case -EKEYEXPIRED:
-			nfs4_warn_keyexpired(clp->cl_hostname);
-		case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
-					 * in nfs4_exchange_id */
-		default:
-			return;
-		}
+	case -EKEYEXPIRED:
+		nfs4_warn_keyexpired(clp->cl_hostname);
+	case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
+				 * in nfs4_exchange_id */
+	default:
+		return;
 	}
 	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 }
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 216cea5db0aa..87694ca86914 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -47,6 +47,7 @@ struct nfs_client {
 
 #ifdef CONFIG_NFS_V4
 	u64			cl_clientid;	/* constant */
+	nfs4_verifier		cl_confirm;	/* Clientid verifier */
 	unsigned long		cl_state;
 
 	spinlock_t		cl_lock;
-- 
cgit v1.2.3-71-gd317


From 7494d00c7b826b6ceb79ec33892bd0ef59be5614 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 24 Apr 2011 14:28:45 -0400
Subject: SUNRPC: Allow RPC calls to return ETIMEDOUT instead of EIO

On occasion, it is useful for the NFS layer to distinguish between
soft timeouts and other EIO errors due to (say) encoding errors,
or authentication errors.

The following patch ensures that the default behaviour of the RPC
layer remains to return EIO on soft timeouts (until we have
audited all the callers).

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h | 3 ++-
 net/sunrpc/clnt.c            | 5 ++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 3b94f804b852..f73c482ec9c6 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -128,12 +128,13 @@ struct rpc_task_setup {
 #define RPC_TASK_SOFT		0x0200		/* Use soft timeouts */
 #define RPC_TASK_SOFTCONN	0x0400		/* Fail if can't connect */
 #define RPC_TASK_SENT		0x0800		/* message was sent */
+#define RPC_TASK_TIMEOUT	0x1000		/* fail with ETIMEDOUT on timeout */
 
 #define RPC_IS_ASYNC(t)		((t)->tk_flags & RPC_TASK_ASYNC)
 #define RPC_IS_SWAPPER(t)	((t)->tk_flags & RPC_TASK_SWAPPER)
 #define RPC_DO_ROOTOVERRIDE(t)	((t)->tk_flags & RPC_TASK_ROOTCREDS)
 #define RPC_ASSASSINATED(t)	((t)->tk_flags & RPC_TASK_KILLED)
-#define RPC_IS_SOFT(t)		((t)->tk_flags & RPC_TASK_SOFT)
+#define RPC_IS_SOFT(t)		((t)->tk_flags & (RPC_TASK_SOFT|RPC_TASK_TIMEOUT))
 #define RPC_IS_SOFTCONN(t)	((t)->tk_flags & RPC_TASK_SOFTCONN)
 #define RPC_WAS_SENT(t)		((t)->tk_flags & RPC_TASK_SENT)
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e7a96e478f63..8d83f9d48713 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1508,7 +1508,10 @@ call_timeout(struct rpc_task *task)
 		if (clnt->cl_chatty)
 			printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
 				clnt->cl_protname, clnt->cl_server);
-		rpc_exit(task, -EIO);
+		if (task->tk_flags & RPC_TASK_TIMEOUT)
+			rpc_exit(task, -ETIMEDOUT);
+		else
+			rpc_exit(task, -EIO);
 		return;
 	}
 
-- 
cgit v1.2.3-71-gd317


From bf26c018490c2fce7fe9b629083b96ce0e6ad019 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 7 Apr 2011 16:53:20 +0200
Subject: ptrace: Prepare to fix racy accesses on task breakpoints

When a task is traced and is in a stopped state, the tracer
may execute a ptrace request to examine the tracee state and
get its task struct. Right after, the tracee can be killed
and thus its breakpoints released.
This can happen concurrently when the tracer is in the middle
of reading or modifying these breakpoints, leading to dereferencing
a freed pointer.

Hence, to prepare the fix, create a generic breakpoint reference
holding API. When a reference on the breakpoints of a task is
held, the breakpoints won't be released until the last reference
is dropped. After that, no more ptrace request on the task's
breakpoints can be serviced for the tracer.

Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: v2.6.33.. <stable@kernel.org>
Link: http://lkml.kernel.org/r/1302284067-7860-2-git-send-email-fweisbec@gmail.com
---
 include/linux/ptrace.h | 13 ++++++++++++-
 include/linux/sched.h  |  3 +++
 kernel/exit.c          |  2 +-
 kernel/ptrace.c        | 17 +++++++++++++++++
 4 files changed, 33 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index a1147e5dd245..9178d5cc0b01 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -189,6 +189,10 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 		child->ptrace = current->ptrace;
 		__ptrace_link(child, current->parent);
 	}
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	atomic_set(&child->ptrace_bp_refcnt, 1);
+#endif
 }
 
 /**
@@ -350,6 +354,13 @@ extern int task_current_syscall(struct task_struct *target, long *callno,
 				unsigned long args[6], unsigned int maxargs,
 				unsigned long *sp, unsigned long *pc);
 
-#endif
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+extern int ptrace_get_breakpoints(struct task_struct *tsk);
+extern void ptrace_put_breakpoints(struct task_struct *tsk);
+#else
+static inline void ptrace_put_breakpoints(struct task_struct *tsk) { }
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+#endif /* __KERNEL */
 
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 18d63cea2848..781abd137673 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1537,6 +1537,9 @@ struct task_struct {
 		unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
 	} memcg_batch;
 #endif
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	atomic_t ptrace_bp_refcnt;
+#endif
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
diff --git a/kernel/exit.c b/kernel/exit.c
index f5d2f63bae0b..8dd874181542 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1016,7 +1016,7 @@ NORET_TYPE void do_exit(long code)
 	/*
 	 * FIXME: do that only when needed, using sched_exit tracepoint
 	 */
-	flush_ptrace_hw_breakpoint(tsk);
+	ptrace_put_breakpoints(tsk);
 
 	exit_notify(tsk, group_dead);
 #ifdef CONFIG_NUMA
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 0fc1eed28d27..dc7ab65f3b36 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -22,6 +22,7 @@
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
 #include <linux/regset.h>
+#include <linux/hw_breakpoint.h>
 
 
 /*
@@ -879,3 +880,19 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
 	return ret;
 }
 #endif	/* CONFIG_COMPAT */
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+int ptrace_get_breakpoints(struct task_struct *tsk)
+{
+	if (atomic_inc_not_zero(&tsk->ptrace_bp_refcnt))
+		return 0;
+
+	return -1;
+}
+
+void ptrace_put_breakpoints(struct task_struct *tsk)
+{
+	if (atomic_dec_and_test(&tsk->ptrace_bp_refcnt))
+		flush_ptrace_hw_breakpoint(tsk);
+}
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-- 
cgit v1.2.3-71-gd317


From f2f5f2a1cedc803a5a517557d436e6cb10c007de Mon Sep 17 00:00:00 2001
From: Vasanthakumar Thiagarajan <vasanth@atheros.com>
Date: Tue, 19 Apr 2011 19:29:01 +0530
Subject: ath9k_hw: Get AHB clock information from ath9k_platform_data

Add a bool in ath9k_platform_data to pass AHB clock speed information.
Driver needs this to configure PLL on some SOCs.

Signed-off-by: Vasanthakumar Thiagarajan <vasanth@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/hw.h   | 2 ++
 drivers/net/wireless/ath/ath9k/init.c | 1 +
 include/linux/ath9k_platform.h        | 2 ++
 3 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h
index 450b64263bc9..5a4ba09a2f1c 100644
--- a/drivers/net/wireless/ath/ath9k/hw.h
+++ b/drivers/net/wireless/ath/ath9k/hw.h
@@ -846,6 +846,8 @@ struct ath_hw {
 
 	/* Enterprise mode cap */
 	u32 ent_mode;
+
+	bool is_clk_25mhz;
 };
 
 struct ath_bus_ops {
diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index 1ac8318d82a3..e78b6aefa108 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -574,6 +574,7 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc, u16 subsysid,
 		sc->sc_ah->gpio_mask = pdata->gpio_mask;
 		sc->sc_ah->gpio_val = pdata->gpio_val;
 		sc->sc_ah->led_pin = pdata->led_pin;
+		ah->is_clk_25mhz = pdata->is_clk_25mhz;
 	}
 
 	common = ath9k_hw_common(ah);
diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h
index 020387a114e3..60a7c49dcb49 100644
--- a/include/linux/ath9k_platform.h
+++ b/include/linux/ath9k_platform.h
@@ -28,6 +28,8 @@ struct ath9k_platform_data {
 	int led_pin;
 	u32 gpio_mask;
 	u32 gpio_val;
+
+	bool is_clk_25mhz;
 };
 
 #endif /* _LINUX_ATH9K_PLATFORM_H */
-- 
cgit v1.2.3-71-gd317


From 9f2e731d1d278d853def1567735d8a823668a3c8 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Wed, 20 Apr 2011 11:12:30 +0200
Subject: ssb: cc: add & fix defines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We probably got false positive results for checking PLL being down.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ssb/ssb_driver_chipcommon.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h
index 2cdf249b4e5f..4f2d77a0c021 100644
--- a/include/linux/ssb/ssb_driver_chipcommon.h
+++ b/include/linux/ssb/ssb_driver_chipcommon.h
@@ -131,6 +131,9 @@
 #define SSB_CHIPCO_GPIOIRQ		0x0074
 #define SSB_CHIPCO_WATCHDOG		0x0080
 #define SSB_CHIPCO_GPIOTIMER		0x0088		/* LED powersave (corerev >= 16) */
+#define  SSB_CHIPCO_GPIOTIMER_OFFTIME	0x0000FFFF
+#define  SSB_CHIPCO_GPIOTIMER_OFFTIME_SHIFT	0
+#define  SSB_CHIPCO_GPIOTIMER_ONTIME	0xFFFF0000
 #define  SSB_CHIPCO_GPIOTIMER_ONTIME_SHIFT	16
 #define SSB_CHIPCO_GPIOTOUTM		0x008C		/* LED powersave (corerev >= 16) */
 #define SSB_CHIPCO_CLOCK_N		0x0090
@@ -189,8 +192,10 @@
 #define  SSB_CHIPCO_CLKCTLST_HAVEALPREQ	0x00000008 /* ALP available request */
 #define  SSB_CHIPCO_CLKCTLST_HAVEHTREQ	0x00000010 /* HT available request */
 #define  SSB_CHIPCO_CLKCTLST_HWCROFF	0x00000020 /* Force HW clock request off */
-#define  SSB_CHIPCO_CLKCTLST_HAVEHT	0x00010000 /* HT available */
-#define  SSB_CHIPCO_CLKCTLST_HAVEALP	0x00020000 /* APL available */
+#define  SSB_CHIPCO_CLKCTLST_HAVEALP	0x00010000 /* ALP available */
+#define  SSB_CHIPCO_CLKCTLST_HAVEHT	0x00020000 /* HT available */
+#define  SSB_CHIPCO_CLKCTLST_4328A0_HAVEHT	0x00010000 /* 4328a0 has reversed bits */
+#define  SSB_CHIPCO_CLKCTLST_4328A0_HAVEALP	0x00020000 /* 4328a0 has reversed bits */
 #define SSB_CHIPCO_HW_WORKAROUND	0x01E4 /* Hardware workaround (rev >= 20) */
 #define SSB_CHIPCO_UART0_DATA		0x0300
 #define SSB_CHIPCO_UART0_IMR		0x0304
-- 
cgit v1.2.3-71-gd317


From 9835a30e980561082beb02ce724f6e555787bc19 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sun, 24 Apr 2011 11:04:19 +0200
Subject: ssb: cc: clear GPIOPULL registers on init
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/driver_chipcommon.c           | 6 ++++++
 include/linux/ssb/ssb_driver_chipcommon.h | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ssb/driver_chipcommon.c b/drivers/ssb/driver_chipcommon.c
index 7c031fdc8205..b4b3733aefcf 100644
--- a/drivers/ssb/driver_chipcommon.c
+++ b/drivers/ssb/driver_chipcommon.c
@@ -260,6 +260,12 @@ void ssb_chipcommon_init(struct ssb_chipcommon *cc)
 	if (cc->dev->id.revision >= 11)
 		cc->status = chipco_read32(cc, SSB_CHIPCO_CHIPSTAT);
 	ssb_dprintk(KERN_INFO PFX "chipcommon status is 0x%x\n", cc->status);
+
+	if (cc->dev->id.revision >= 20) {
+		chipco_write32(cc, SSB_CHIPCO_GPIOPULLUP, 0);
+		chipco_write32(cc, SSB_CHIPCO_GPIOPULLDOWN, 0);
+	}
+
 	ssb_pmu_init(cc);
 	chipco_powercontrol_init(cc);
 	ssb_chipco_set_clockmode(cc, SSB_CLKMODE_FAST);
diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h
index 4f2d77a0c021..a08d693d8324 100644
--- a/include/linux/ssb/ssb_driver_chipcommon.h
+++ b/include/linux/ssb/ssb_driver_chipcommon.h
@@ -123,6 +123,8 @@
 #define SSB_CHIPCO_FLASHDATA		0x0048
 #define SSB_CHIPCO_BCAST_ADDR		0x0050
 #define SSB_CHIPCO_BCAST_DATA		0x0054
+#define SSB_CHIPCO_GPIOPULLUP		0x0058		/* Rev >= 20 only */
+#define SSB_CHIPCO_GPIOPULLDOWN		0x005C		/* Rev >= 20 only */
 #define SSB_CHIPCO_GPIOIN		0x0060
 #define SSB_CHIPCO_GPIOOUT		0x0064
 #define SSB_CHIPCO_GPIOOUTEN		0x0068
-- 
cgit v1.2.3-71-gd317


From ad58671cf32c74a8d6e8f51e63e9cf4e7a73bf1e Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@cam.ac.uk>
Date: Tue, 19 Apr 2011 12:43:45 +0100
Subject: Add a strtobool function matching semantics of existing in kernel
 equivalents

This is a rename of the usr_strtobool proposal, which was a renamed,
relocated and fixed version of previous kstrtobool RFC

Signed-off-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/string.h |  1 +
 lib/string.c           | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index a716ee2a8adb..a176db2f2c85 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -123,6 +123,7 @@ extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
 extern void argv_free(char **argv);
 
 extern bool sysfs_streq(const char *s1, const char *s2);
+extern int strtobool(const char *s, bool *res);
 
 #ifdef CONFIG_BINARY_PRINTF
 int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
diff --git a/lib/string.c b/lib/string.c
index f71bead1be3e..01fad9b203e1 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -535,6 +535,35 @@ bool sysfs_streq(const char *s1, const char *s2)
 }
 EXPORT_SYMBOL(sysfs_streq);
 
+/**
+ * strtobool - convert common user inputs into boolean values
+ * @s: input string
+ * @res: result
+ *
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
+ * Otherwise it will return -EINVAL.  Value pointed to by res is
+ * updated upon finding a match.
+ */
+int strtobool(const char *s, bool *res)
+{
+	switch (s[0]) {
+	case 'y':
+	case 'Y':
+	case '1':
+		*res = true;
+		break;
+	case 'n':
+	case 'N':
+	case '0':
+		*res = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(strtobool);
+
 #ifndef __HAVE_ARCH_MEMSET
 /**
  * memset - Fill a region of memory with the given value
-- 
cgit v1.2.3-71-gd317


From 3dd2ee4824b668a635d6d2bb6bc73f33708cab9f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 25 Apr 2011 18:10:58 -0700
Subject: bit_spinlock: don't play preemption games inside the busy loop

When we are waiting for the bit-lock to be released, and are looping
over the 'cpu_relax()' should not be doing anything else - otherwise we
miss the point of trying to do the whole 'cpu_relax()'.

Do the preemption enable/disable around the loop, rather than inside of
it.

Noticed when I was looking at the code generation for the dcache
__d_drop usage, and the code just looked very odd.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bit_spinlock.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h
index e612575a2596..b4326bfa684f 100644
--- a/include/linux/bit_spinlock.h
+++ b/include/linux/bit_spinlock.h
@@ -23,11 +23,11 @@ static inline void bit_spin_lock(int bitnum, unsigned long *addr)
 	preempt_disable();
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 	while (unlikely(test_and_set_bit_lock(bitnum, addr))) {
-		while (test_bit(bitnum, addr)) {
-			preempt_enable();
+		preempt_enable();
+		do {
 			cpu_relax();
-			preempt_disable();
-		}
+		} while (test_bit(bitnum, addr));
+		preempt_disable();
 	}
 #endif
 	__acquire(bitlock);
-- 
cgit v1.2.3-71-gd317


From 1879fd6a26571fd4e8e1f4bb3e7537bc936b1fe7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 25 Apr 2011 14:01:36 -0400
Subject: add hlist_bl_lock/unlock helpers

Now that the whole dcache_hash_bucket crap is gone, go all the way and
also remove the weird locking layering violations for locking the hash
buckets.  Add hlist_bl_lock/unlock helpers to move the locking into the
list abstraction instead of requiring each caller to open code it.
After all allowing for the bit locks is the whole point of these helpers
over the plain hlist variant.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c             | 22 ++++++----------------
 fs/gfs2/glock.c         |  6 ++----
 include/linux/list_bl.h | 11 +++++++++++
 3 files changed, 19 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index d600a0af3b2e..22a0ef41bad1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -109,16 +109,6 @@ static inline struct hlist_bl_head *d_hash(struct dentry *parent,
 	return dentry_hashtable + (hash & D_HASHMASK);
 }
 
-static inline void spin_lock_bucket(struct hlist_bl_head *b)
-{
-	bit_spin_lock(0, (unsigned long *)&b->first);
-}
-
-static inline void spin_unlock_bucket(struct hlist_bl_head *b)
-{
-	__bit_spin_unlock(0, (unsigned long *)&b->first);
-}
-
 /* Statistics gathering. */
 struct dentry_stat_t dentry_stat = {
 	.age_limit = 45,
@@ -334,10 +324,10 @@ void __d_drop(struct dentry *dentry)
 		else
 			b = d_hash(dentry->d_parent, dentry->d_name.hash);
 
-		spin_lock_bucket(b);
+		hlist_bl_lock(b);
 		__hlist_bl_del(&dentry->d_hash);
 		dentry->d_hash.pprev = NULL;
-		spin_unlock_bucket(b);
+		hlist_bl_unlock(b);
 
 		dentry_rcuwalk_barrier(dentry);
 	}
@@ -1594,9 +1584,9 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	tmp->d_inode = inode;
 	tmp->d_flags |= DCACHE_DISCONNECTED;
 	list_add(&tmp->d_alias, &inode->i_dentry);
-	spin_lock_bucket(&tmp->d_sb->s_anon);
+	hlist_bl_lock(&tmp->d_sb->s_anon);
 	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
-	spin_unlock_bucket(&tmp->d_sb->s_anon);
+	hlist_bl_unlock(&tmp->d_sb->s_anon);
 	spin_unlock(&tmp->d_lock);
 	spin_unlock(&inode->i_lock);
 	security_d_instantiate(tmp, inode);
@@ -2076,10 +2066,10 @@ EXPORT_SYMBOL(d_delete);
 static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
 {
 	BUG_ON(!d_unhashed(entry));
-	spin_lock_bucket(b);
+	hlist_bl_lock(b);
 	entry->d_flags |= DCACHE_RCUACCESS;
 	hlist_bl_add_head_rcu(&entry->d_hash, b);
-	spin_unlock_bucket(b);
+	hlist_bl_unlock(b);
 }
 
 static void _d_rehash(struct dentry * entry)
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f07643e21bfa..7a4fb630a320 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -93,14 +93,12 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp,
 
 static inline void spin_lock_bucket(unsigned int hash)
 {
-	struct hlist_bl_head *bl = &gl_hash_table[hash];
-	bit_spin_lock(0, (unsigned long *)bl);
+	hlist_bl_lock(&gl_hash_table[hash]);
 }
 
 static inline void spin_unlock_bucket(unsigned int hash)
 {
-	struct hlist_bl_head *bl = &gl_hash_table[hash];
-	__bit_spin_unlock(0, (unsigned long *)bl);
+	hlist_bl_unlock(&gl_hash_table[hash]);
 }
 
 static void gfs2_glock_dealloc(struct rcu_head *rcu)
diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
index 5bad17d1acde..31f9d75adc5b 100644
--- a/include/linux/list_bl.h
+++ b/include/linux/list_bl.h
@@ -2,6 +2,7 @@
 #define _LINUX_LIST_BL_H
 
 #include <linux/list.h>
+#include <linux/bit_spinlock.h>
 
 /*
  * Special version of lists, where head of the list has a lock in the lowest
@@ -114,6 +115,16 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n)
 	}
 }
 
+static inline void hlist_bl_lock(struct hlist_bl_head *b)
+{
+	bit_spin_lock(0, (unsigned long *)b);
+}
+
+static inline void hlist_bl_unlock(struct hlist_bl_head *b)
+{
+	__bit_spin_unlock(0, (unsigned long *)b);
+}
+
 /**
  * hlist_bl_for_each_entry	- iterate over list of given type
  * @tpos:	the type * to use as a loop cursor.
-- 
cgit v1.2.3-71-gd317


From 94403f8863d0d1d2005291b2ef0719c2534aa303 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 24 Apr 2011 08:18:31 +0200
Subject: perf events: Add stalled cycles generic event -
 PERF_COUNT_HW_STALLED_CYCLES

The new PERF_COUNT_HW_STALLED_CYCLES event tries to approximate
cycles the CPU does nothing useful, because it is stalled on a
cache-miss or some other condition.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-fue11vymwqsoo5to72jxxjyl@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 3 +++
 include/linux/perf_event.h             | 1 +
 tools/perf/util/parse-events.c         | 1 +
 tools/perf/util/python.c               | 1 +
 4 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 9ae4a2aa7398..efa2704c9dfd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1413,6 +1413,9 @@ static __init int intel_pmu_init(void)
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.extra_regs = intel_nehalem_extra_regs;
 
+		/* Install the stalled-cycles event: 0xff: All reasons, 0xa2: Resource stalls */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0xffa2;
+
 		if (ebx & 0x40) {
 			/*
 			 * Erratum AAJ80 detected, we work it around by using
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ee9f1e782800..ac636dd20a0c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -52,6 +52,7 @@ enum perf_hw_id {
 	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
 	PERF_COUNT_HW_BRANCH_MISSES		= 5,
 	PERF_COUNT_HW_BUS_CYCLES		= 6,
+	PERF_COUNT_HW_STALLED_CYCLES		= 7,
 
 	PERF_COUNT_HW_MAX,			/* non-ABI */
 };
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 952b4ae3d954..1869e4c646db 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -38,6 +38,7 @@ static struct event_symbol event_symbols[] = {
   { CHW(BRANCH_INSTRUCTIONS),	"branch-instructions",	"branches"	},
   { CHW(BRANCH_MISSES),		"branch-misses",	""		},
   { CHW(BUS_CYCLES),		"bus-cycles",		""		},
+  { CHW(STALLED_CYCLES),	"stalled-cycles",	""		},
 
   { CSW(CPU_CLOCK),		"cpu-clock",		""		},
   { CSW(TASK_CLOCK),		"task-clock",		""		},
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index f5e38451fdc5..406f613ee619 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -798,6 +798,7 @@ static struct {
 	{ "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
 	{ "COUNT_HW_BRANCH_MISSES",	  PERF_COUNT_HW_BRANCH_MISSES },
 	{ "COUNT_HW_BUS_CYCLES",	  PERF_COUNT_HW_BUS_CYCLES },
+	{ "COUNT_HW_STALLED_CYCLES",	  PERF_COUNT_HW_STALLED_CYCLES },
 	{ "COUNT_HW_CACHE_L1D",		  PERF_COUNT_HW_CACHE_L1D },
 	{ "COUNT_HW_CACHE_L1I",		  PERF_COUNT_HW_CACHE_L1I },
 	{ "COUNT_HW_CACHE_LL",	  	  PERF_COUNT_HW_CACHE_LL },
-- 
cgit v1.2.3-71-gd317


From 04ad1fb2640a4f23e99ccb705c179d64abac03f2 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sat, 23 Apr 2011 19:30:29 +0200
Subject: ssb: update reject bit for Target State Low
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

My 14e4:4315 is SSB_IDLOW_SSBREV_26:
read32 0xfaafcff8 -> 0x600422d5
My 14e4:4328 is SSB_IDLOW_SSBREV_24:
read32 0xfaafcff8 -> 0x400422c5
My 14e4:432b is SSB_IDLOW_SSBREV_26 again:
read32 0xfaafcff8 -> 0x600422d5

For all of them wl driver is using 0x2 reject bit:
write32(0xf98) <- 0x00010002
So it seems SSB 2.3 is the exception using another bit.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/main.c           | 15 +++++++--------
 include/linux/ssb/ssb_regs.h |  2 +-
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 74aa2cca7d8c..ad3da93a428c 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -1117,23 +1117,22 @@ static u32 ssb_tmslow_reject_bitmask(struct ssb_device *dev)
 {
 	u32 rev = ssb_read32(dev, SSB_IDLOW) & SSB_IDLOW_SSBREV;
 
-	/* The REJECT bit changed position in TMSLOW between
-	 * Backplane revisions. */
+	/* The REJECT bit seems to be different for Backplane rev 2.3 */
 	switch (rev) {
 	case SSB_IDLOW_SSBREV_22:
-		return SSB_TMSLOW_REJECT_22;
+	case SSB_IDLOW_SSBREV_24:
+	case SSB_IDLOW_SSBREV_26:
+		return SSB_TMSLOW_REJECT;
 	case SSB_IDLOW_SSBREV_23:
 		return SSB_TMSLOW_REJECT_23;
-	case SSB_IDLOW_SSBREV_24:     /* TODO - find the proper REJECT bits */
-	case SSB_IDLOW_SSBREV_25:     /* same here */
-	case SSB_IDLOW_SSBREV_26:     /* same here */
+	case SSB_IDLOW_SSBREV_25:     /* TODO - find the proper REJECT bit */
 	case SSB_IDLOW_SSBREV_27:     /* same here */
-		return SSB_TMSLOW_REJECT_23;	/* this is a guess */
+		return SSB_TMSLOW_REJECT;	/* this is a guess */
 	default:
 		printk(KERN_INFO "ssb: Backplane Revision 0x%.8X\n", rev);
 		WARN_ON(1);
 	}
-	return (SSB_TMSLOW_REJECT_22 | SSB_TMSLOW_REJECT_23);
+	return (SSB_TMSLOW_REJECT | SSB_TMSLOW_REJECT_23);
 }
 
 int ssb_device_is_enabled(struct ssb_device *dev)
diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h
index 402955ae48ce..efbf459d571c 100644
--- a/include/linux/ssb/ssb_regs.h
+++ b/include/linux/ssb/ssb_regs.h
@@ -97,7 +97,7 @@
 #define  SSB_INTVEC_ENET1	0x00000040 /* Enable interrupts for enet 1 */
 #define SSB_TMSLOW		0x0F98     /* SB Target State Low */
 #define  SSB_TMSLOW_RESET	0x00000001 /* Reset */
-#define  SSB_TMSLOW_REJECT_22	0x00000002 /* Reject (Backplane rev 2.2) */
+#define  SSB_TMSLOW_REJECT	0x00000002 /* Reject (Standard Backplane) */
 #define  SSB_TMSLOW_REJECT_23	0x00000004 /* Reject (Backplane rev 2.3) */
 #define  SSB_TMSLOW_CLOCK	0x00010000 /* Clock Enable */
 #define  SSB_TMSLOW_FGC		0x00020000 /* Force Gated Clocks On */
-- 
cgit v1.2.3-71-gd317


From 304529b1b6f8612ccbb4582e997051b48b94f4a4 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Fri, 1 Apr 2011 14:32:09 -0700
Subject: time: Add timekeeping_inject_sleeptime
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some platforms cannot implement read_persistent_clock, as
their RTC devices are only accessible when interrupts are enabled.
This keeps them from being used by the timekeeping code on resume
to measure the time in suspend.

The RTC layer tries to work around this, by calling do_settimeofday
on resume after irqs are reenabled to set the time properly. However,
this only corrects CLOCK_REALTIME, and does not properly adjust
the sleep time value. This causes btime in /proc/stat to be incorrect
as well as making the new CLOCK_BOTTTIME inaccurate.

This patch resolves the issue by introducing a new timekeeping hook
to allow the RTC layer to inject the sleep time on resume.

The code also checks to make sure that read_persistent_clock is
nonfunctional before setting the sleep time, so that should the RTC's
HCTOSYS option be configured in on a system that does support
read_persistent_clock we will not increase the total_sleep_time twice.

CC: Arve Hjønnevåg <arve@android.com>
CC: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/rtc/class.c       | 23 ++++++++-----------
 include/linux/time.h      |  1 +
 kernel/time/timekeeping.c | 56 ++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 63 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 39013867cbd6..4194e59e14cd 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -41,26 +41,21 @@ static void rtc_device_release(struct device *dev)
  * system's wall clock; restore it on resume().
  */
 
-static struct timespec	delta;
 static time_t		oldtime;
+static struct timespec	oldts;
 
 static int rtc_suspend(struct device *dev, pm_message_t mesg)
 {
 	struct rtc_device	*rtc = to_rtc_device(dev);
 	struct rtc_time		tm;
-	struct timespec		ts = current_kernel_time();
 
 	if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0)
 		return 0;
 
 	rtc_read_time(rtc, &tm);
+	ktime_get_ts(&oldts);
 	rtc_tm_to_time(&tm, &oldtime);
 
-	/* RTC precision is 1 second; adjust delta for avg 1/2 sec err */
-	set_normalized_timespec(&delta,
-				ts.tv_sec - oldtime,
-				ts.tv_nsec - (NSEC_PER_SEC >> 1));
-
 	return 0;
 }
 
@@ -70,10 +65,12 @@ static int rtc_resume(struct device *dev)
 	struct rtc_time		tm;
 	time_t			newtime;
 	struct timespec		time;
+	struct timespec		newts;
 
 	if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0)
 		return 0;
 
+	ktime_get_ts(&newts);
 	rtc_read_time(rtc, &tm);
 	if (rtc_valid_tm(&tm) != 0) {
 		pr_debug("%s:  bogus resume time\n", dev_name(&rtc->dev));
@@ -85,15 +82,13 @@ static int rtc_resume(struct device *dev)
 			pr_debug("%s:  time travel!\n", dev_name(&rtc->dev));
 		return 0;
 	}
+	/* calculate the RTC time delta */
+	set_normalized_timespec(&time, newtime - oldtime, 0);
 
-	/* restore wall clock using delta against this RTC;
-	 * adjust again for avg 1/2 second RTC sampling error
-	 */
-	set_normalized_timespec(&time,
-				newtime + delta.tv_sec,
-				(NSEC_PER_SEC >> 1) + delta.tv_nsec);
-	do_settimeofday(&time);
+	/* subtract kernel time between rtc_suspend to rtc_resume */
+	time = timespec_sub(time, timespec_sub(newts, oldts));
 
+	timekeeping_inject_sleeptime(&time);
 	return 0;
 }
 
diff --git a/include/linux/time.h b/include/linux/time.h
index 454a26205787..4ea5a75fcacd 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -126,6 +126,7 @@ struct timespec __current_kernel_time(void); /* does not take xtime_lock */
 struct timespec get_monotonic_coarse(void);
 void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
 				struct timespec *wtom, struct timespec *sleep);
+void timekeeping_inject_sleeptime(struct timespec *delta);
 
 #define CURRENT_TIME		(current_kernel_time())
 #define CURRENT_TIME_SEC	((struct timespec) { get_seconds(), 0 })
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 8ad5d576755e..8e6a05a5915a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -595,6 +595,58 @@ void __init timekeeping_init(void)
 /* time in seconds when suspend began */
 static struct timespec timekeeping_suspend_time;
 
+/**
+ * __timekeeping_inject_sleeptime - Internal function to add sleep interval
+ * @delta: pointer to a timespec delta value
+ *
+ * Takes a timespec offset measuring a suspend interval and properly
+ * adds the sleep offset to the timekeeping variables.
+ */
+static void __timekeeping_inject_sleeptime(struct timespec *delta)
+{
+	xtime = timespec_add(xtime, *delta);
+	wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta);
+	total_sleep_time = timespec_add(total_sleep_time, *delta);
+}
+
+
+/**
+ * timekeeping_inject_sleeptime - Adds suspend interval to timeekeeping values
+ * @delta: pointer to a timespec delta value
+ *
+ * This hook is for architectures that cannot support read_persistent_clock
+ * because their RTC/persistent clock is only accessible when irqs are enabled.
+ *
+ * This function should only be called by rtc_resume(), and allows
+ * a suspend offset to be injected into the timekeeping values.
+ */
+void timekeeping_inject_sleeptime(struct timespec *delta)
+{
+	unsigned long flags;
+	struct timespec ts;
+
+	/* Make sure we don't set the clock twice */
+	read_persistent_clock(&ts);
+	if (!(ts.tv_sec == 0 && ts.tv_nsec == 0))
+		return;
+
+	write_seqlock_irqsave(&xtime_lock, flags);
+	timekeeping_forward_now();
+
+	__timekeeping_inject_sleeptime(delta);
+
+	timekeeper.ntp_error = 0;
+	ntp_clear();
+	update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+				timekeeper.mult);
+
+	write_sequnlock_irqrestore(&xtime_lock, flags);
+
+	/* signal hrtimers about time change */
+	clock_was_set();
+}
+
+
 /**
  * timekeeping_resume - Resumes the generic timekeeping subsystem.
  *
@@ -615,9 +667,7 @@ static void timekeeping_resume(void)
 
 	if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
 		ts = timespec_sub(ts, timekeeping_suspend_time);
-		xtime = timespec_add(xtime, ts);
-		wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
-		total_sleep_time = timespec_add(total_sleep_time, ts);
+		__timekeeping_inject_sleeptime(&ts);
 	}
 	/* re-base the last cycle value */
 	timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
-- 
cgit v1.2.3-71-gd317


From 88d19cf37952a7e1e38b2bf87a00f0e857e63180 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 3 Jan 2011 18:59:43 -0800
Subject: timers: Add rb_init_node() to allow for stack allocated rb nodes

In cases where a timerqueue_node or some structure that utilizes
a timerqueue_node is allocated on the stack, gcc would give warnings
caused by the timerqueue_init()'s calling RB_CLEAR_NODE, which
self-references the nodes uninitialized data.

The solution is to create an rb_init_node() function that zeros
the rb_node structure out and then calls RB_CLEAR_NODE(), and
then call the new init function from timerqueue_init().

CC: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/rbtree.h     | 8 ++++++++
 include/linux/timerqueue.h | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 7066acb2c530..033b507b33b1 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -136,6 +136,14 @@ static inline void rb_set_color(struct rb_node *rb, int color)
 #define RB_EMPTY_NODE(node)	(rb_parent(node) == node)
 #define RB_CLEAR_NODE(node)	(rb_set_parent(node, node))
 
+static inline void rb_init_node(struct rb_node *rb)
+{
+	rb->rb_parent_color = 0;
+	rb->rb_right = NULL;
+	rb->rb_left = NULL;
+	RB_CLEAR_NODE(rb);
+}
+
 extern void rb_insert_color(struct rb_node *, struct rb_root *);
 extern void rb_erase(struct rb_node *, struct rb_root *);
 
diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
index a520fd70a59f..5088727478fd 100644
--- a/include/linux/timerqueue.h
+++ b/include/linux/timerqueue.h
@@ -39,7 +39,7 @@ struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head)
 
 static inline void timerqueue_init(struct timerqueue_node *node)
 {
-	RB_CLEAR_NODE(&node->node);
+	rb_init_node(&node->node);
 }
 
 static inline void timerqueue_init_head(struct timerqueue_head *head)
-- 
cgit v1.2.3-71-gd317


From ff3ead96d17f47ee70c294a5cc2cce9b61e82f0f Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Tue, 11 Jan 2011 09:42:13 -0800
Subject: timers: Introduce in-kernel alarm-timer interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This provides the in kernel interface and infrastructure for
alarm-timers.

Alarm-timers are a hybrid style timer, similar to hrtimers,
but when the system is suspended, the RTC device is set to
fire and wake the system for when the soonest alarm-timer
expires.

The concept for Alarm-timers was inspired by the Android Alarm
driver (by Arve Hjønnevåg) found in the Android kernel tree.

See: http://android.git.kernel.org/?p=kernel/common.git;a=blob;f=drivers/rtc/alarm.c;h=1250edfbdf3302f5e4ea6194847c6ef4bb7beb1c;hb=android-2.6.36

This in-kernel interface should be fairly compatible with the
Android alarm driver in-kernel interface, but has the advantage
of utilizing the new RTC timerqueue code instead of doing direct
RTC manipulation.

CC: Arve Hjønnevåg <arve@android.com>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Alessandro Zummo <a.zummo@towertech.it>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/alarmtimer.h |  30 ++++
 kernel/time/Makefile       |   2 +-
 kernel/time/alarmtimer.c   | 375 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 406 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/alarmtimer.h
 create mode 100644 kernel/time/alarmtimer.c

(limited to 'include/linux')

diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h
new file mode 100644
index 000000000000..6b364b2e2074
--- /dev/null
+++ b/include/linux/alarmtimer.h
@@ -0,0 +1,30 @@
+#ifndef _LINUX_ALARMTIMER_H
+#define _LINUX_ALARMTIMER_H
+
+#include <linux/time.h>
+#include <linux/hrtimer.h>
+#include <linux/timerqueue.h>
+#include <linux/rtc.h>
+
+enum alarmtimer_type {
+	ALARM_REALTIME,
+	ALARM_BOOTTIME,
+
+	ALARM_NUMTYPE,
+};
+
+struct alarm {
+	struct timerqueue_node	node;
+	ktime_t			period;
+	void			(*function)(struct alarm *);
+	enum alarmtimer_type	type;
+	char			enabled;
+	void			*data;
+};
+
+void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
+		void (*function)(struct alarm *));
+void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period);
+void alarm_cancel(struct alarm *alarm);
+
+#endif
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index b0425991e9ac..e2fd74b8e8c2 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,5 +1,5 @@
 obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
-obj-y += timeconv.o posix-clock.o
+obj-y += timeconv.o posix-clock.o alarmtimer.o
 
 obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)		+= clockevents.o
 obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= tick-common.o
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
new file mode 100644
index 000000000000..48c2ee949e61
--- /dev/null
+++ b/kernel/time/alarmtimer.c
@@ -0,0 +1,375 @@
+/*
+ * Alarmtimer interface
+ *
+ * This interface provides a timer which is similarto hrtimers,
+ * but triggers a RTC alarm if the box is suspend.
+ *
+ * This interface is influenced by the Android RTC Alarm timer
+ * interface.
+ *
+ * Copyright (C) 2010 IBM Corperation
+ *
+ * Author: John Stultz <john.stultz@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/time.h>
+#include <linux/hrtimer.h>
+#include <linux/timerqueue.h>
+#include <linux/rtc.h>
+#include <linux/alarmtimer.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/posix-timers.h>
+#include <linux/workqueue.h>
+#include <linux/freezer.h>
+
+
+static struct alarm_base {
+	spinlock_t		lock;
+	struct timerqueue_head	timerqueue;
+	struct hrtimer		timer;
+	ktime_t			(*gettime)(void);
+	clockid_t		base_clockid;
+	struct work_struct	irqwork;
+} alarm_bases[ALARM_NUMTYPE];
+
+static struct rtc_timer		rtctimer;
+static struct rtc_device	*rtcdev;
+
+static ktime_t freezer_delta;
+static DEFINE_SPINLOCK(freezer_delta_lock);
+
+
+/**************************************************************************
+ * alarmtimer management code
+ */
+
+/*
+ * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue
+ * @base: pointer to the base where the timer is being run
+ * @alarm: pointer to alarm being enqueued.
+ *
+ * Adds alarm to a alarm_base timerqueue and if necessary sets
+ * an hrtimer to run.
+ *
+ * Must hold base->lock when calling.
+ */
+static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm)
+{
+	timerqueue_add(&base->timerqueue, &alarm->node);
+	if (&alarm->node == timerqueue_getnext(&base->timerqueue)) {
+		hrtimer_try_to_cancel(&base->timer);
+		hrtimer_start(&base->timer, alarm->node.expires,
+				HRTIMER_MODE_ABS);
+	}
+}
+
+/*
+ * alarmtimer_remove - Removes an alarm timer from an alarm_base timerqueue
+ * @base: pointer to the base where the timer is running
+ * @alarm: pointer to alarm being removed
+ *
+ * Removes alarm to a alarm_base timerqueue and if necessary sets
+ * a new timer to run.
+ *
+ * Must hold base->lock when calling.
+ */
+static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm)
+{
+	struct timerqueue_node *next = timerqueue_getnext(&base->timerqueue);
+
+	timerqueue_del(&base->timerqueue, &alarm->node);
+	if (next == &alarm->node) {
+		hrtimer_try_to_cancel(&base->timer);
+		next = timerqueue_getnext(&base->timerqueue);
+		if (!next)
+			return;
+		hrtimer_start(&base->timer, next->expires, HRTIMER_MODE_ABS);
+	}
+}
+
+/*
+ * alarmtimer_do_work - Handles alarm being fired.
+ * @work: pointer to workqueue being run
+ *
+ * When a timer fires, this runs through the timerqueue to see
+ * which alarm timers, and run those that expired. If there are
+ * more alarm timers queued, we set the hrtimer to fire in the
+ * future.
+ */
+void alarmtimer_do_work(struct work_struct *work)
+{
+	struct alarm_base *base = container_of(work, struct alarm_base,
+						irqwork);
+	struct timerqueue_node *next;
+	unsigned long flags;
+	ktime_t now;
+
+	spin_lock_irqsave(&base->lock, flags);
+	now = base->gettime();
+	while ((next = timerqueue_getnext(&base->timerqueue))) {
+		struct alarm *alarm;
+		ktime_t expired = next->expires;
+
+		if (expired.tv64 >= now.tv64)
+			break;
+
+		alarm = container_of(next, struct alarm, node);
+
+		timerqueue_del(&base->timerqueue, &alarm->node);
+		alarm->enabled = 0;
+		/* Re-add periodic timers */
+		if (alarm->period.tv64) {
+			alarm->node.expires = ktime_add(expired, alarm->period);
+			timerqueue_add(&base->timerqueue, &alarm->node);
+			alarm->enabled = 1;
+		}
+		spin_unlock_irqrestore(&base->lock, flags);
+		if (alarm->function)
+			alarm->function(alarm);
+		spin_lock_irqsave(&base->lock, flags);
+	}
+
+	if (next) {
+		hrtimer_start(&base->timer, next->expires,
+				HRTIMER_MODE_ABS);
+	}
+	spin_unlock_irqrestore(&base->lock, flags);
+}
+
+
+/*
+ * alarmtimer_fired - Handles alarm hrtimer being fired.
+ * @timer: pointer to hrtimer being run
+ *
+ * When a timer fires, this schedules the do_work function to
+ * be run.
+ */
+static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
+{
+	struct alarm_base *base = container_of(timer, struct alarm_base, timer);
+	schedule_work(&base->irqwork);
+	return HRTIMER_NORESTART;
+}
+
+
+/*
+ * alarmtimer_suspend - Suspend time callback
+ * @dev: unused
+ * @state: unused
+ *
+ * When we are going into suspend, we look through the bases
+ * to see which is the soonest timer to expire. We then
+ * set an rtc timer to fire that far into the future, which
+ * will wake us from suspend.
+ */
+static int alarmtimer_suspend(struct device *dev)
+{
+	struct rtc_time tm;
+	ktime_t min, now;
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&freezer_delta_lock, flags);
+	min = freezer_delta;
+	freezer_delta = ktime_set(0, 0);
+	spin_unlock_irqrestore(&freezer_delta_lock, flags);
+
+	/* If we have no rtcdev, just return */
+	if (!rtcdev)
+		return 0;
+
+	/* Find the soonest timer to expire*/
+	for (i = 0; i < ALARM_NUMTYPE; i++) {
+		struct alarm_base *base = &alarm_bases[i];
+		struct timerqueue_node *next;
+		ktime_t delta;
+
+		spin_lock_irqsave(&base->lock, flags);
+		next = timerqueue_getnext(&base->timerqueue);
+		spin_unlock_irqrestore(&base->lock, flags);
+		if (!next)
+			continue;
+		delta = ktime_sub(next->expires, base->gettime());
+		if (!min.tv64 || (delta.tv64 < min.tv64))
+			min = delta;
+	}
+	if (min.tv64 == 0)
+		return 0;
+
+	/* XXX - Should we enforce a minimum sleep time? */
+	WARN_ON(min.tv64 < NSEC_PER_SEC);
+
+	/* Setup an rtc timer to fire that far in the future */
+	rtc_timer_cancel(rtcdev, &rtctimer);
+	rtc_read_time(rtcdev, &tm);
+	now = rtc_tm_to_ktime(tm);
+	now = ktime_add(now, min);
+
+	rtc_timer_start(rtcdev, &rtctimer, now, ktime_set(0, 0));
+
+	return 0;
+}
+
+
+/**************************************************************************
+ * alarm kernel interface code
+ */
+
+/*
+ * alarm_init - Initialize an alarm structure
+ * @alarm: ptr to alarm to be initialized
+ * @type: the type of the alarm
+ * @function: callback that is run when the alarm fires
+ *
+ * In-kernel interface to initializes the alarm structure.
+ */
+void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
+		void (*function)(struct alarm *))
+{
+	timerqueue_init(&alarm->node);
+	alarm->period = ktime_set(0, 0);
+	alarm->function = function;
+	alarm->type = type;
+	alarm->enabled = 0;
+}
+
+/*
+ * alarm_start - Sets an alarm to fire
+ * @alarm: ptr to alarm to set
+ * @start: time to run the alarm
+ * @period: period at which the alarm will recur
+ *
+ * In-kernel interface set an alarm timer.
+ */
+void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period)
+{
+	struct alarm_base *base = &alarm_bases[alarm->type];
+	unsigned long flags;
+
+	spin_lock_irqsave(&base->lock, flags);
+	if (alarm->enabled)
+		alarmtimer_remove(base, alarm);
+	alarm->node.expires = start;
+	alarm->period = period;
+	alarmtimer_enqueue(base, alarm);
+	alarm->enabled = 1;
+	spin_unlock_irqrestore(&base->lock, flags);
+}
+
+/*
+ * alarm_cancel - Tries to cancel an alarm timer
+ * @alarm: ptr to alarm to be canceled
+ *
+ * In-kernel interface to cancel an alarm timer.
+ */
+void alarm_cancel(struct alarm *alarm)
+{
+	struct alarm_base *base = &alarm_bases[alarm->type];
+	unsigned long flags;
+
+	spin_lock_irqsave(&base->lock, flags);
+	if (alarm->enabled)
+		alarmtimer_remove(base, alarm);
+	alarm->enabled = 0;
+	spin_unlock_irqrestore(&base->lock, flags);
+}
+
+
+
+/**************************************************************************
+ * alarmtimer initialization code
+ */
+
+/* Suspend hook structures */
+static const struct dev_pm_ops alarmtimer_pm_ops = {
+	.suspend = alarmtimer_suspend,
+};
+
+static struct platform_driver alarmtimer_driver = {
+	.driver = {
+		.name = "alarmtimer",
+		.pm = &alarmtimer_pm_ops,
+	}
+};
+
+/**
+ * alarmtimer_init - Initialize alarm timer code
+ *
+ * This function initializes the alarm bases and registers
+ * the posix clock ids.
+ */
+static int __init alarmtimer_init(void)
+{
+	int error = 0;
+	int i;
+
+	/* Initialize alarm bases */
+	alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME;
+	alarm_bases[ALARM_REALTIME].gettime = &ktime_get_real;
+	alarm_bases[ALARM_BOOTTIME].base_clockid = CLOCK_BOOTTIME;
+	alarm_bases[ALARM_BOOTTIME].gettime = &ktime_get_boottime;
+	for (i = 0; i < ALARM_NUMTYPE; i++) {
+		timerqueue_init_head(&alarm_bases[i].timerqueue);
+		spin_lock_init(&alarm_bases[i].lock);
+		hrtimer_init(&alarm_bases[i].timer,
+				alarm_bases[i].base_clockid,
+				HRTIMER_MODE_ABS);
+		alarm_bases[i].timer.function = alarmtimer_fired;
+		INIT_WORK(&alarm_bases[i].irqwork, alarmtimer_do_work);
+	}
+	error = platform_driver_register(&alarmtimer_driver);
+	platform_device_register_simple("alarmtimer", -1, NULL, 0);
+
+	return error;
+}
+device_initcall(alarmtimer_init);
+
+/**
+ * has_wakealarm - check rtc device has wakealarm ability
+ * @dev: current device
+ * @name_ptr: name to be returned
+ *
+ * This helper function checks to see if the rtc device can wake
+ * from suspend.
+ */
+static int __init has_wakealarm(struct device *dev, void *name_ptr)
+{
+	struct rtc_device *candidate = to_rtc_device(dev);
+
+	if (!candidate->ops->set_alarm)
+		return 0;
+	if (!device_may_wakeup(candidate->dev.parent))
+		return 0;
+
+	*(const char **)name_ptr = dev_name(dev);
+	return 1;
+}
+
+/**
+ * alarmtimer_init_late - Late initializing of alarmtimer code
+ *
+ * This function locates a rtc device to use for wakealarms.
+ * Run as late_initcall to make sure rtc devices have been
+ * registered.
+ */
+static int __init alarmtimer_init_late(void)
+{
+	char *str;
+
+	/* Find an rtc device and init the rtc_timer */
+	class_find_device(rtc_class, NULL, &str, has_wakealarm);
+	if (str)
+		rtcdev = rtc_class_open(str);
+	if (!rtcdev) {
+		printk(KERN_WARNING "No RTC device found, ALARM timers will"
+			" not wake from suspend");
+	}
+	rtc_timer_init(&rtctimer, NULL, NULL);
+
+	return 0;
+}
+late_initcall(alarmtimer_init_late);
-- 
cgit v1.2.3-71-gd317


From 9a7adcf5c6dea63d2e47e6f6d2f7a6c9f48b9337 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Tue, 11 Jan 2011 09:54:33 -0800
Subject: timers: Posix interface for alarm-timers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch exposes alarm-timers to userland via the posix clock
and timers interface, using two new clockids: CLOCK_REALTIME_ALARM
and CLOCK_BOOTTIME_ALARM. Both clockids behave identically to
CLOCK_REALTIME and CLOCK_BOOTTIME, respectively, but timers
set against the _ALARM suffixed clockids will wake the system if
it is suspended.

Some background can be found here:
	https://lwn.net/Articles/429925/

The concept for Alarm-timers was inspired by the Android Alarm
driver (by Arve Hjønnevåg) found in the Android kernel tree.

See: http://android.git.kernel.org/?p=kernel/common.git;a=blob;f=drivers/rtc/alarm.c;h=1250edfbdf3302f5e4ea6194847c6ef4bb7beb1c;hb=android-2.6.36

While the in-kernel interface is pretty similar between
alarm-timers and Android alarm driver, the user-space interface
for the Android alarm driver is via ioctls to a new char device.
As mentioned above, I've instead chosen to export this functionality
via the posix interface, as it seemed a little simpler and avoids
creating duplicate interfaces to things like CLOCK_REALTIME and
CLOCK_MONOTONIC under alternate names (ie:ANDROID_ALARM_RTC and
ANDROID_ALARM_SYSTEMTIME).

The semantics of the Android alarm driver are different from what
this posix interface provides. For instance, threads other then
the thread waiting on the Android alarm driver are able to modify
the alarm being waited on. Also this interface does not allow
the same wakelock semantics that the Android driver provides
(ie: kernel takes a wakelock on RTC alarm-interupt, and holds it
through process wakeup, and while the process runs, until the
process either closes the char device or calls back in to wait
on a new alarm).

One potential way to implement similar semantics may be via
the timerfd infrastructure, but this needs more research.

There may also need to be some sort of sysfs system level policy
hooks that allow alarm timers to be disabled to keep them
from firing at inappropriate times (ie: laptop in a well insulated
bag, mid-flight).

CC: Arve Hjønnevåg <arve@android.com>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Alessandro Zummo <a.zummo@towertech.it>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/capability.h   |   7 +-
 include/linux/posix-timers.h |   2 +
 include/linux/time.h         |   2 +
 kernel/time/alarmtimer.c     | 330 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 340 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 16ee8b49a200..7cb23eae693d 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -355,7 +355,12 @@ struct cpu_vfs_cap_data {
 
 #define CAP_SYSLOG           34
 
-#define CAP_LAST_CAP         CAP_SYSLOG
+/* Allow triggering something that will wake the system */
+
+#define CAP_WAKE_ALARM            35
+
+
+#define CAP_LAST_CAP         CAP_WAKE_ALARM
 
 #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
 
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index d51243ae0726..808227d40a64 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -5,6 +5,7 @@
 #include <linux/list.h>
 #include <linux/sched.h>
 #include <linux/timex.h>
+#include <linux/alarmtimer.h>
 
 union cpu_time_count {
 	cputime_t cpu;
@@ -80,6 +81,7 @@ struct k_itimer {
 			unsigned long incr;
 			unsigned long expires;
 		} mmtimer;
+		struct alarm alarmtimer;
 	} it;
 };
 
diff --git a/include/linux/time.h b/include/linux/time.h
index 4ea5a75fcacd..b3061782dec3 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -295,6 +295,8 @@ struct itimerval {
 #define CLOCK_REALTIME_COARSE		5
 #define CLOCK_MONOTONIC_COARSE		6
 #define CLOCK_BOOTTIME			7
+#define CLOCK_REALTIME_ALARM		8
+#define CLOCK_BOOTTIME_ALARM		9
 
 /*
  * The IDs of various hardware clocks:
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 48c2ee949e61..4058ad79d55f 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -215,6 +215,21 @@ static int alarmtimer_suspend(struct device *dev)
 }
 
 
+static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
+{
+	ktime_t delta;
+	unsigned long flags;
+	struct alarm_base *base = &alarm_bases[type];
+
+	delta = ktime_sub(absexp, base->gettime());
+
+	spin_lock_irqsave(&freezer_delta_lock, flags);
+	if (!freezer_delta.tv64 || (delta.tv64 < freezer_delta.tv64))
+		freezer_delta = delta;
+	spin_unlock_irqrestore(&freezer_delta_lock, flags);
+}
+
+
 /**************************************************************************
  * alarm kernel interface code
  */
@@ -279,6 +294,309 @@ void alarm_cancel(struct alarm *alarm)
 }
 
 
+/**************************************************************************
+ * alarm posix interface code
+ */
+
+/*
+ * clock2alarm - helper that converts from clockid to alarmtypes
+ * @clockid: clockid.
+ *
+ * Helper function that converts from clockids to alarmtypes
+ */
+static enum alarmtimer_type clock2alarm(clockid_t clockid)
+{
+	if (clockid == CLOCK_REALTIME_ALARM)
+		return ALARM_REALTIME;
+	if (clockid == CLOCK_BOOTTIME_ALARM)
+		return ALARM_BOOTTIME;
+	return -1;
+}
+
+/*
+ * alarm_handle_timer - Callback for posix timers
+ * @alarm: alarm that fired
+ *
+ * Posix timer callback for expired alarm timers.
+ */
+static void alarm_handle_timer(struct alarm *alarm)
+{
+	struct k_itimer *ptr = container_of(alarm, struct k_itimer,
+						it.alarmtimer);
+	if (posix_timer_event(ptr, 0) != 0)
+		ptr->it_overrun++;
+}
+
+/*
+ * alarm_clock_getres - posix getres interface
+ * @which_clock: clockid
+ * @tp: timespec to fill
+ *
+ * Returns the granularity of underlying alarm base clock
+ */
+static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp)
+{
+	clockid_t baseid = alarm_bases[clock2alarm(which_clock)].base_clockid;
+
+	return hrtimer_get_res(baseid, tp);
+}
+
+/**
+ * alarm_clock_get - posix clock_get interface
+ * @which_clock: clockid
+ * @tp: timespec to fill.
+ *
+ * Provides the underlying alarm base time.
+ */
+static int alarm_clock_get(clockid_t which_clock, struct timespec *tp)
+{
+	struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)];
+
+	*tp = ktime_to_timespec(base->gettime());
+	return 0;
+}
+
+/**
+ * alarm_timer_create - posix timer_create interface
+ * @new_timer: k_itimer pointer to manage
+ *
+ * Initializes the k_itimer structure.
+ */
+static int alarm_timer_create(struct k_itimer *new_timer)
+{
+	enum  alarmtimer_type type;
+	struct alarm_base *base;
+
+	if (!capable(CAP_WAKE_ALARM))
+		return -EPERM;
+
+	type = clock2alarm(new_timer->it_clock);
+	base = &alarm_bases[type];
+	alarm_init(&new_timer->it.alarmtimer, type, alarm_handle_timer);
+	return 0;
+}
+
+/**
+ * alarm_timer_get - posix timer_get interface
+ * @new_timer: k_itimer pointer
+ * @cur_setting: itimerspec data to fill
+ *
+ * Copies the itimerspec data out from the k_itimer
+ */
+static void alarm_timer_get(struct k_itimer *timr,
+				struct itimerspec *cur_setting)
+{
+	cur_setting->it_interval =
+			ktime_to_timespec(timr->it.alarmtimer.period);
+	cur_setting->it_value =
+			ktime_to_timespec(timr->it.alarmtimer.node.expires);
+	return;
+}
+
+/**
+ * alarm_timer_del - posix timer_del interface
+ * @timr: k_itimer pointer to be deleted
+ *
+ * Cancels any programmed alarms for the given timer.
+ */
+static int alarm_timer_del(struct k_itimer *timr)
+{
+	alarm_cancel(&timr->it.alarmtimer);
+	return 0;
+}
+
+/**
+ * alarm_timer_set - posix timer_set interface
+ * @timr: k_itimer pointer to be deleted
+ * @flags: timer flags
+ * @new_setting: itimerspec to be used
+ * @old_setting: itimerspec being replaced
+ *
+ * Sets the timer to new_setting, and starts the timer.
+ */
+static int alarm_timer_set(struct k_itimer *timr, int flags,
+				struct itimerspec *new_setting,
+				struct itimerspec *old_setting)
+{
+	/* Save old values */
+	old_setting->it_interval =
+			ktime_to_timespec(timr->it.alarmtimer.period);
+	old_setting->it_value =
+			ktime_to_timespec(timr->it.alarmtimer.node.expires);
+
+	/* If the timer was already set, cancel it */
+	alarm_cancel(&timr->it.alarmtimer);
+
+	/* start the timer */
+	alarm_start(&timr->it.alarmtimer,
+			timespec_to_ktime(new_setting->it_value),
+			timespec_to_ktime(new_setting->it_interval));
+	return 0;
+}
+
+/**
+ * alarmtimer_nsleep_wakeup - Wakeup function for alarm_timer_nsleep
+ * @alarm: ptr to alarm that fired
+ *
+ * Wakes up the task that set the alarmtimer
+ */
+static void alarmtimer_nsleep_wakeup(struct alarm *alarm)
+{
+	struct task_struct *task = (struct task_struct *)alarm->data;
+
+	alarm->data = NULL;
+	if (task)
+		wake_up_process(task);
+}
+
+/**
+ * alarmtimer_do_nsleep - Internal alarmtimer nsleep implementation
+ * @alarm: ptr to alarmtimer
+ * @absexp: absolute expiration time
+ *
+ * Sets the alarm timer and sleeps until it is fired or interrupted.
+ */
+static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp)
+{
+	alarm->data = (void *)current;
+	do {
+		set_current_state(TASK_INTERRUPTIBLE);
+		alarm_start(alarm, absexp, ktime_set(0, 0));
+		if (likely(alarm->data))
+			schedule();
+
+		alarm_cancel(alarm);
+	} while (alarm->data && !signal_pending(current));
+
+	__set_current_state(TASK_RUNNING);
+
+	return (alarm->data == NULL);
+}
+
+
+/**
+ * update_rmtp - Update remaining timespec value
+ * @exp: expiration time
+ * @type: timer type
+ * @rmtp: user pointer to remaining timepsec value
+ *
+ * Helper function that fills in rmtp value with time between
+ * now and the exp value
+ */
+static int update_rmtp(ktime_t exp, enum  alarmtimer_type type,
+			struct timespec __user *rmtp)
+{
+	struct timespec rmt;
+	ktime_t rem;
+
+	rem = ktime_sub(exp, alarm_bases[type].gettime());
+
+	if (rem.tv64 <= 0)
+		return 0;
+	rmt = ktime_to_timespec(rem);
+
+	if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
+		return -EFAULT;
+
+	return 1;
+
+}
+
+/**
+ * alarm_timer_nsleep_restart - restartblock alarmtimer nsleep
+ * @restart: ptr to restart block
+ *
+ * Handles restarted clock_nanosleep calls
+ */
+static long __sched alarm_timer_nsleep_restart(struct restart_block *restart)
+{
+	enum  alarmtimer_type type = restart->nanosleep.index;
+	ktime_t exp;
+	struct timespec __user  *rmtp;
+	struct alarm alarm;
+	int ret = 0;
+
+	exp.tv64 = restart->nanosleep.expires;
+	alarm_init(&alarm, type, alarmtimer_nsleep_wakeup);
+
+	if (alarmtimer_do_nsleep(&alarm, exp))
+		goto out;
+
+	if (freezing(current))
+		alarmtimer_freezerset(exp, type);
+
+	rmtp = restart->nanosleep.rmtp;
+	if (rmtp) {
+		ret = update_rmtp(exp, type, rmtp);
+		if (ret <= 0)
+			goto out;
+	}
+
+
+	/* The other values in restart are already filled in */
+	ret = -ERESTART_RESTARTBLOCK;
+out:
+	return ret;
+}
+
+/**
+ * alarm_timer_nsleep - alarmtimer nanosleep
+ * @which_clock: clockid
+ * @flags: determins abstime or relative
+ * @tsreq: requested sleep time (abs or rel)
+ * @rmtp: remaining sleep time saved
+ *
+ * Handles clock_nanosleep calls against _ALARM clockids
+ */
+static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
+		     struct timespec *tsreq, struct timespec __user *rmtp)
+{
+	enum  alarmtimer_type type = clock2alarm(which_clock);
+	struct alarm alarm;
+	ktime_t exp;
+	int ret = 0;
+	struct restart_block *restart;
+
+	if (!capable(CAP_WAKE_ALARM))
+		return -EPERM;
+
+	alarm_init(&alarm, type, alarmtimer_nsleep_wakeup);
+
+	exp = timespec_to_ktime(*tsreq);
+	/* Convert (if necessary) to absolute time */
+	if (flags != TIMER_ABSTIME) {
+		ktime_t now = alarm_bases[type].gettime();
+		exp = ktime_add(now, exp);
+	}
+
+	if (alarmtimer_do_nsleep(&alarm, exp))
+		goto out;
+
+	if (freezing(current))
+		alarmtimer_freezerset(exp, type);
+
+	/* abs timers don't set remaining time or restart */
+	if (flags == TIMER_ABSTIME) {
+		ret = -ERESTARTNOHAND;
+		goto out;
+	}
+
+	if (rmtp) {
+		ret = update_rmtp(exp, type, rmtp);
+		if (ret <= 0)
+			goto out;
+	}
+
+	restart = &current_thread_info()->restart_block;
+	restart->fn = alarm_timer_nsleep_restart;
+	restart->nanosleep.index = type;
+	restart->nanosleep.expires = exp.tv64;
+	restart->nanosleep.rmtp = rmtp;
+	ret = -ERESTART_RESTARTBLOCK;
+
+out:
+	return ret;
+}
 
 /**************************************************************************
  * alarmtimer initialization code
@@ -306,6 +624,18 @@ static int __init alarmtimer_init(void)
 {
 	int error = 0;
 	int i;
+	struct k_clock alarm_clock = {
+		.clock_getres	= alarm_clock_getres,
+		.clock_get	= alarm_clock_get,
+		.timer_create	= alarm_timer_create,
+		.timer_set	= alarm_timer_set,
+		.timer_del	= alarm_timer_del,
+		.timer_get	= alarm_timer_get,
+		.nsleep		= alarm_timer_nsleep,
+	};
+
+	posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock);
+	posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock);
 
 	/* Initialize alarm bases */
 	alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME;
-- 
cgit v1.2.3-71-gd317


From 28331a46d88459788c8fca72dbb0415cd7f514c9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 27 Apr 2011 13:47:52 -0400
Subject: NFSv4: Ensure we request the ordinary fileid when doing readdirplus

When readdir() returns a directory entry for the root of a mounted
filesystem, Linux follows the old convention of returning the inode
number of the covered directory (despite newer versions of POSIX declaring
that this is a bug).
To ensure this continues to work, the NFSv4 readdir implementation requests
the 'mounted-on-fileid' from the server.

However, readdirplus also needs to instantiate an inode for this entry, and
for that, we also need to request the real fileid as per this patch.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c        | 31 ++++++++++++++-----------------
 include/linux/nfs_xdr.h |  2 ++
 2 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index ba952bdc4d62..7310d2ec5de8 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1452,26 +1452,25 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
 
 static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
 {
-	uint32_t attrs[2] = {0, 0};
+	uint32_t attrs[2] = {
+		FATTR4_WORD0_RDATTR_ERROR,
+		FATTR4_WORD1_MOUNTED_ON_FILEID,
+	};
 	uint32_t dircount = readdir->count >> 1;
 	__be32 *p;
 
 	if (readdir->plus) {
 		attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
-			FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE;
+			FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE|FATTR4_WORD0_FILEID;
 		attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER|
 			FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV|
 			FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
 			FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
 		dircount >>= 1;
 	}
-	attrs[0] |= FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID;
-	attrs[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID;
-	/* Switch to mounted_on_fileid if the server supports it */
-	if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
-		attrs[0] &= ~FATTR4_WORD0_FILEID;
-	else
-		attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
+	/* Use mounted_on_fileid only if the server supports it */
+	if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID))
+		attrs[0] |= FATTR4_WORD0_FILEID;
 
 	p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
 	*p++ = cpu_to_be32(OP_READDIR);
@@ -3140,7 +3139,7 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
 			goto out_overflow;
 		xdr_decode_hyper(p, fileid);
 		bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
-		ret = NFS_ATTR_FATTR_FILEID;
+		ret = NFS_ATTR_FATTR_MOUNTED_ON_FILEID;
 	}
 	dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
 	return ret;
@@ -4002,7 +4001,6 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
 {
 	int status;
 	umode_t fmode = 0;
-	uint64_t fileid;
 	uint32_t type;
 
 	status = decode_attr_type(xdr, bitmap, &type);
@@ -4101,13 +4099,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
 		goto xdr_error;
 	fattr->valid |= status;
 
-	status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid);
+	status = decode_attr_mounted_on_fileid(xdr, bitmap, &fattr->mounted_on_fileid);
 	if (status < 0)
 		goto xdr_error;
-	if (status != 0 && !(fattr->valid & status)) {
-		fattr->fileid = fileid;
-		fattr->valid |= status;
-	}
+	fattr->valid |= status;
 
 xdr_error:
 	dprintk("%s: xdr returned %d\n", __func__, -status);
@@ -6411,7 +6406,9 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 	if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
 					entry->server, 1) < 0)
 		goto out_overflow;
-	if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
+	if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
+		entry->ino = entry->fattr->mounted_on_fileid;
+	else if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
 		entry->ino = entry->fattr->fileid;
 
 	entry->d_type = DT_UNKNOWN;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 78b101e487ea..890dce242639 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -50,6 +50,7 @@ struct nfs_fattr {
 	} du;
 	struct nfs_fsid		fsid;
 	__u64			fileid;
+	__u64			mounted_on_fileid;
 	struct timespec		atime;
 	struct timespec		mtime;
 	struct timespec		ctime;
@@ -83,6 +84,7 @@ struct nfs_fattr {
 #define NFS_ATTR_FATTR_PRECHANGE	(1U << 18)
 #define NFS_ATTR_FATTR_V4_REFERRAL	(1U << 19)	/* NFSv4 referral */
 #define NFS_ATTR_FATTR_MOUNTPOINT	(1U << 20)	/* Treat as mountpoint */
+#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID		(1U << 21)
 
 #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \
 		| NFS_ATTR_FATTR_MODE \
-- 
cgit v1.2.3-71-gd317


From 26fc8775b51484d8c0a671198639c6d5ae60533e Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Fri, 15 Apr 2011 20:08:19 +0200
Subject: mmc: fix a race between card-detect rescan and clock-gate work
 instances

Currently there is a race in the MMC core between a card-detect
rescan work and the clock-gating work, scheduled from a command
completion. Fix it by removing the dedicated clock-gating mutex
and using the MMC standard locking mechanism instead.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Cc: Simon Horman <horms@verge.net.au>
Cc: Magnus Damm <damm@opensource.se>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Cc: <stable@kernel.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/host.c  | 9 ++++-----
 include/linux/mmc/host.h | 1 -
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 461e6a17fb90..2b200c1cfbba 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -94,7 +94,7 @@ static void mmc_host_clk_gate_delayed(struct mmc_host *host)
 		spin_unlock_irqrestore(&host->clk_lock, flags);
 		return;
 	}
-	mutex_lock(&host->clk_gate_mutex);
+	mmc_claim_host(host);
 	spin_lock_irqsave(&host->clk_lock, flags);
 	if (!host->clk_requests) {
 		spin_unlock_irqrestore(&host->clk_lock, flags);
@@ -104,7 +104,7 @@ static void mmc_host_clk_gate_delayed(struct mmc_host *host)
 		pr_debug("%s: gated MCI clock\n", mmc_hostname(host));
 	}
 	spin_unlock_irqrestore(&host->clk_lock, flags);
-	mutex_unlock(&host->clk_gate_mutex);
+	mmc_release_host(host);
 }
 
 /*
@@ -130,7 +130,7 @@ void mmc_host_clk_ungate(struct mmc_host *host)
 {
 	unsigned long flags;
 
-	mutex_lock(&host->clk_gate_mutex);
+	mmc_claim_host(host);
 	spin_lock_irqsave(&host->clk_lock, flags);
 	if (host->clk_gated) {
 		spin_unlock_irqrestore(&host->clk_lock, flags);
@@ -140,7 +140,7 @@ void mmc_host_clk_ungate(struct mmc_host *host)
 	}
 	host->clk_requests++;
 	spin_unlock_irqrestore(&host->clk_lock, flags);
-	mutex_unlock(&host->clk_gate_mutex);
+	mmc_release_host(host);
 }
 
 /**
@@ -215,7 +215,6 @@ static inline void mmc_host_clk_init(struct mmc_host *host)
 	host->clk_gated = false;
 	INIT_WORK(&host->clk_gate_work, mmc_host_clk_gate_work);
 	spin_lock_init(&host->clk_lock);
-	mutex_init(&host->clk_gate_mutex);
 }
 
 /**
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index bcb793ec7374..eb792cb6d745 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -183,7 +183,6 @@ struct mmc_host {
 	struct work_struct	clk_gate_work; /* delayed clock gate */
 	unsigned int		clk_old;	/* old clock value cache */
 	spinlock_t		clk_lock;	/* lock for clk fields */
-	struct mutex		clk_gate_mutex;	/* mutex for clock gating */
 #endif
 
 	/* host specific block data */
-- 
cgit v1.2.3-71-gd317


From 0a14842f5a3c0e88a1e59fac5c3025db39721f74 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 20 Apr 2011 09:27:32 +0000
Subject: net: filter: Just In Time compiler for x86-64

In order to speedup packet filtering, here is an implementation of a
JIT compiler for x86_64

It is disabled by default, and must be enabled by the admin.

echo 1 >/proc/sys/net/core/bpf_jit_enable

It uses module_alloc() and module_free() to get memory in the 2GB text
kernel range since we call helpers functions from the generated code.

EAX : BPF A accumulator
EBX : BPF X accumulator
RDI : pointer to skb   (first argument given to JIT function)
RBP : frame pointer (even if CONFIG_FRAME_POINTER=n)
r9d : skb->len - skb->data_len (headlen)
r8  : skb->data

To get a trace of generated code, use :

echo 2 >/proc/sys/net/core/bpf_jit_enable

Example of generated code :

# tcpdump -p -n -s 0 -i eth1 host 192.168.20.0/24

flen=18 proglen=147 pass=3 image=ffffffffa00b5000
JIT code: ffffffffa00b5000: 55 48 89 e5 48 83 ec 60 48 89 5d f8 44 8b 4f 60
JIT code: ffffffffa00b5010: 44 2b 4f 64 4c 8b 87 b8 00 00 00 be 0c 00 00 00
JIT code: ffffffffa00b5020: e8 24 7b f7 e0 3d 00 08 00 00 75 28 be 1a 00 00
JIT code: ffffffffa00b5030: 00 e8 fe 7a f7 e0 24 00 3d 00 14 a8 c0 74 49 be
JIT code: ffffffffa00b5040: 1e 00 00 00 e8 eb 7a f7 e0 24 00 3d 00 14 a8 c0
JIT code: ffffffffa00b5050: 74 36 eb 3b 3d 06 08 00 00 74 07 3d 35 80 00 00
JIT code: ffffffffa00b5060: 75 2d be 1c 00 00 00 e8 c8 7a f7 e0 24 00 3d 00
JIT code: ffffffffa00b5070: 14 a8 c0 74 13 be 26 00 00 00 e8 b5 7a f7 e0 24
JIT code: ffffffffa00b5080: 00 3d 00 14 a8 c0 75 07 b8 ff ff 00 00 eb 02 31
JIT code: ffffffffa00b5090: c0 c9 c3

BPF program is 144 bytes long, so native program is almost same size ;)

(000) ldh      [12]
(001) jeq      #0x800           jt 2    jf 8
(002) ld       [26]
(003) and      #0xffffff00
(004) jeq      #0xc0a81400      jt 16   jf 5
(005) ld       [30]
(006) and      #0xffffff00
(007) jeq      #0xc0a81400      jt 16   jf 17
(008) jeq      #0x806           jt 10   jf 9
(009) jeq      #0x8035          jt 10   jf 17
(010) ld       [28]
(011) and      #0xffffff00
(012) jeq      #0xc0a81400      jt 16   jf 13
(013) ld       [38]
(014) and      #0xffffff00
(015) jeq      #0xc0a81400      jt 16   jf 17
(016) ret      #65535
(017) ret      #0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/sysctl/net.txt |  11 +
 MAINTAINERS                  |   1 +
 arch/x86/Kbuild              |   1 +
 arch/x86/Kconfig             |   1 +
 arch/x86/net/Makefile        |   4 +
 arch/x86/net/bpf_jit.S       | 140 +++++++++
 arch/x86/net/bpf_jit_comp.c  | 654 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/filter.h       |  76 +++++
 include/linux/netdevice.h    |   1 +
 include/linux/skbuff.h       |   2 +-
 net/Kconfig                  |  13 +
 net/core/filter.c            |  65 +----
 net/core/sysctl_net_core.c   |   9 +
 net/packet/af_packet.c       |   2 +-
 14 files changed, 918 insertions(+), 62 deletions(-)
 create mode 100644 arch/x86/net/Makefile
 create mode 100644 arch/x86/net/bpf_jit.S
 create mode 100644 arch/x86/net/bpf_jit_comp.c

(limited to 'include/linux')

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index cbd05ffc606b..3201a7097e4d 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -32,6 +32,17 @@ Table : Subdirectories in /proc/sys/net
 1. /proc/sys/net/core - Network core options
 -------------------------------------------------------
 
+bpf_jit_enable
+--------------
+
+This enables Berkeley Packet Filter Just in Time compiler.
+Currently supported on x86_64 architecture, bpf_jit provides a framework
+to speed packet filtering, the one used by tcpdump/libpcap for example.
+Values :
+	0 - disable the JIT (default value)
+	1 - enable the JIT
+	2 - enable the JIT and ask the compiler to emit traces on kernel log.
+
 rmem_default
 ------------
 
diff --git a/MAINTAINERS b/MAINTAINERS
index b5266ad50167..17c0917a26ea 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4372,6 +4372,7 @@ S:	Maintained
 F:	net/ipv4/
 F:	net/ipv6/
 F:	include/net/ip*
+F:	arch/x86/net/*
 
 NETWORKING [LABELED] (NetLabel, CIPSO, Labeled IPsec, SECMARK)
 M:	Paul Moore <paul.moore@hp.com>
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 0e103236b754..0e9dec6cadd1 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -15,3 +15,4 @@ obj-y += vdso/
 obj-$(CONFIG_IA32_EMULATION) += ia32/
 
 obj-y += platform/
+obj-y += net/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc6c53a95bfd..855a1bdc437d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -72,6 +72,7 @@ config X86
 	select IRQ_FORCED_THREADING
 	select USE_GENERIC_SMP_HELPERS if SMP
 	select ARCH_NO_SYSDEV_OPS
+	select HAVE_BPF_JIT if X86_64
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS)
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
new file mode 100644
index 000000000000..90568c33ddb0
--- /dev/null
+++ b/arch/x86/net/Makefile
@@ -0,0 +1,4 @@
+#
+# Arch-specific network modules
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
new file mode 100644
index 000000000000..66870223f8c5
--- /dev/null
+++ b/arch/x86/net/bpf_jit.S
@@ -0,0 +1,140 @@
+/* bpf_jit.S : BPF JIT helper functions
+ *
+ * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
+/*
+ * Calling convention :
+ * rdi : skb pointer
+ * esi : offset of byte(s) to fetch in skb (can be scratched)
+ * r8  : copy of skb->data
+ * r9d : hlen = skb->len - skb->data_len
+ */
+#define SKBDATA	%r8
+
+sk_load_word_ind:
+	.globl	sk_load_word_ind
+
+	add	%ebx,%esi	/* offset += X */
+#	test    %esi,%esi	/* if (offset < 0) goto bpf_error; */
+	js	bpf_error
+
+sk_load_word:
+	.globl	sk_load_word
+
+	mov	%r9d,%eax		# hlen
+	sub	%esi,%eax		# hlen - offset
+	cmp	$3,%eax
+	jle	bpf_slow_path_word
+	mov     (SKBDATA,%rsi),%eax
+	bswap   %eax  			/* ntohl() */
+	ret
+
+
+sk_load_half_ind:
+	.globl sk_load_half_ind
+
+	add	%ebx,%esi	/* offset += X */
+	js	bpf_error
+
+sk_load_half:
+	.globl	sk_load_half
+
+	mov	%r9d,%eax
+	sub	%esi,%eax		#	hlen - offset
+	cmp	$1,%eax
+	jle	bpf_slow_path_half
+	movzwl	(SKBDATA,%rsi),%eax
+	rol	$8,%ax			# ntohs()
+	ret
+
+sk_load_byte_ind:
+	.globl sk_load_byte_ind
+	add	%ebx,%esi	/* offset += X */
+	js	bpf_error
+
+sk_load_byte:
+	.globl	sk_load_byte
+
+	cmp	%esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
+	jle	bpf_slow_path_byte
+	movzbl	(SKBDATA,%rsi),%eax
+	ret
+
+/**
+ * sk_load_byte_msh - BPF_S_LDX_B_MSH helper
+ *
+ * Implements BPF_S_LDX_B_MSH : ldxb  4*([offset]&0xf)
+ * Must preserve A accumulator (%eax)
+ * Inputs : %esi is the offset value, already known positive
+ */
+ENTRY(sk_load_byte_msh)
+	CFI_STARTPROC
+	cmp	%esi,%r9d      /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
+	jle	bpf_slow_path_byte_msh
+	movzbl	(SKBDATA,%rsi),%ebx
+	and	$15,%bl
+	shl	$2,%bl
+	ret
+	CFI_ENDPROC
+ENDPROC(sk_load_byte_msh)
+
+bpf_error:
+# force a return 0 from jit handler
+	xor		%eax,%eax
+	mov		-8(%rbp),%rbx
+	leaveq
+	ret
+
+/* rsi contains offset and can be scratched */
+#define bpf_slow_path_common(LEN)		\
+	push	%rdi;    /* save skb */		\
+	push	%r9;				\
+	push	SKBDATA;			\
+/* rsi already has offset */			\
+	mov	$LEN,%ecx;	/* len */	\
+	lea	-12(%rbp),%rdx;			\
+	call	skb_copy_bits;			\
+	test    %eax,%eax;			\
+	pop	SKBDATA;			\
+	pop	%r9;				\
+	pop	%rdi
+
+
+bpf_slow_path_word:
+	bpf_slow_path_common(4)
+	js	bpf_error
+	mov	-12(%rbp),%eax
+	bswap	%eax
+	ret
+
+bpf_slow_path_half:
+	bpf_slow_path_common(2)
+	js	bpf_error
+	mov	-12(%rbp),%ax
+	rol	$8,%ax
+	movzwl	%ax,%eax
+	ret
+
+bpf_slow_path_byte:
+	bpf_slow_path_common(1)
+	js	bpf_error
+	movzbl	-12(%rbp),%eax
+	ret
+
+bpf_slow_path_byte_msh:
+	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
+	bpf_slow_path_common(1)
+	js	bpf_error
+	movzbl	-12(%rbp),%eax
+	and	$15,%al
+	shl	$2,%al
+	xchg	%eax,%ebx
+	ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
new file mode 100644
index 000000000000..bfab3fa10edc
--- /dev/null
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -0,0 +1,654 @@
+/* bpf_jit_comp.c : BPF JIT compiler
+ *
+ * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/moduleloader.h>
+#include <asm/cacheflush.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+
+/*
+ * Conventions :
+ *  EAX : BPF A accumulator
+ *  EBX : BPF X accumulator
+ *  RDI : pointer to skb   (first argument given to JIT function)
+ *  RBP : frame pointer (even if CONFIG_FRAME_POINTER=n)
+ *  ECX,EDX,ESI : scratch registers
+ *  r9d : skb->len - skb->data_len (headlen)
+ *  r8  : skb->data
+ * -8(RBP) : saved RBX value
+ * -16(RBP)..-80(RBP) : BPF_MEMWORDS values
+ */
+int bpf_jit_enable __read_mostly;
+
+/*
+ * assembly code in arch/x86/net/bpf_jit.S
+ */
+extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
+
+static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
+{
+	if (len == 1)
+		*ptr = bytes;
+	else if (len == 2)
+		*(u16 *)ptr = bytes;
+	else {
+		*(u32 *)ptr = bytes;
+		barrier();
+	}
+	return ptr + len;
+}
+
+#define EMIT(bytes, len)	do { prog = emit_code(prog, bytes, len); } while (0)
+
+#define EMIT1(b1)		EMIT(b1, 1)
+#define EMIT2(b1, b2)		EMIT((b1) + ((b2) << 8), 2)
+#define EMIT3(b1, b2, b3)	EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
+#define EMIT4(b1, b2, b3, b4)   EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
+#define EMIT1_off32(b1, off)	do { EMIT1(b1); EMIT(off, 4);} while (0)
+
+#define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */
+#define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */
+
+static inline bool is_imm8(int value)
+{
+	return value <= 127 && value >= -128;
+}
+
+static inline bool is_near(int offset)
+{
+	return offset <= 127 && offset >= -128;
+}
+
+#define EMIT_JMP(offset)						\
+do {									\
+	if (offset) {							\
+		if (is_near(offset))					\
+			EMIT2(0xeb, offset); /* jmp .+off8 */		\
+		else							\
+			EMIT1_off32(0xe9, offset); /* jmp .+off32 */	\
+	}								\
+} while (0)
+
+/* list of x86 cond jumps opcodes (. + s8)
+ * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
+ */
+#define X86_JB  0x72
+#define X86_JAE 0x73
+#define X86_JE  0x74
+#define X86_JNE 0x75
+#define X86_JBE 0x76
+#define X86_JA  0x77
+
+#define EMIT_COND_JMP(op, offset)				\
+do {								\
+	if (is_near(offset))					\
+		EMIT2(op, offset); /* jxx .+off8 */		\
+	else {							\
+		EMIT2(0x0f, op + 0x10);				\
+		EMIT(offset, 4); /* jxx .+off32 */		\
+	}							\
+} while (0)
+
+#define COND_SEL(CODE, TOP, FOP)	\
+	case CODE:			\
+		t_op = TOP;		\
+		f_op = FOP;		\
+		goto cond_branch
+
+
+#define SEEN_DATAREF 1 /* might call external helpers */
+#define SEEN_XREG    2 /* ebx is used */
+#define SEEN_MEM     4 /* use mem[] for temporary storage */
+
+static inline void bpf_flush_icache(void *start, void *end)
+{
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
+	smp_wmb();
+	flush_icache_range((unsigned long)start, (unsigned long)end);
+	set_fs(old_fs);
+}
+
+
+void bpf_jit_compile(struct sk_filter *fp)
+{
+	u8 temp[64];
+	u8 *prog;
+	unsigned int proglen, oldproglen = 0;
+	int ilen, i;
+	int t_offset, f_offset;
+	u8 t_op, f_op, seen = 0, pass;
+	u8 *image = NULL;
+	u8 *func;
+	int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */
+	unsigned int cleanup_addr; /* epilogue code offset */
+	unsigned int *addrs;
+	const struct sock_filter *filter = fp->insns;
+	int flen = fp->len;
+
+	if (!bpf_jit_enable)
+		return;
+
+	addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
+	if (addrs == NULL)
+		return;
+
+	/* Before first pass, make a rough estimation of addrs[]
+	 * each bpf instruction is translated to less than 64 bytes
+	 */
+	for (proglen = 0, i = 0; i < flen; i++) {
+		proglen += 64;
+		addrs[i] = proglen;
+	}
+	cleanup_addr = proglen; /* epilogue address */
+
+	for (pass = 0; pass < 10; pass++) {
+		/* no prologue/epilogue for trivial filters (RET something) */
+		proglen = 0;
+		prog = temp;
+
+		if (seen) {
+			EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
+			EMIT4(0x48, 0x83, 0xec, 96);	/* subq  $96,%rsp	*/
+			/* note : must save %rbx in case bpf_error is hit */
+			if (seen & (SEEN_XREG | SEEN_DATAREF))
+				EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */
+			if (seen & SEEN_XREG)
+				CLEAR_X(); /* make sure we dont leek kernel memory */
+
+			/*
+			 * If this filter needs to access skb data,
+			 * loads r9 and r8 with :
+			 *  r9 = skb->len - skb->data_len
+			 *  r8 = skb->data
+			 */
+			if (seen & SEEN_DATAREF) {
+				if (offsetof(struct sk_buff, len) <= 127)
+					/* mov    off8(%rdi),%r9d */
+					EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len));
+				else {
+					/* mov    off32(%rdi),%r9d */
+					EMIT3(0x44, 0x8b, 0x8f);
+					EMIT(offsetof(struct sk_buff, len), 4);
+				}
+				if (is_imm8(offsetof(struct sk_buff, data_len)))
+					/* sub    off8(%rdi),%r9d */
+					EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len));
+				else {
+					EMIT3(0x44, 0x2b, 0x8f);
+					EMIT(offsetof(struct sk_buff, data_len), 4);
+				}
+
+				if (is_imm8(offsetof(struct sk_buff, data)))
+					/* mov off8(%rdi),%r8 */
+					EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data));
+				else {
+					/* mov off32(%rdi),%r8 */
+					EMIT3(0x4c, 0x8b, 0x87);
+					EMIT(offsetof(struct sk_buff, data), 4);
+				}
+			}
+		}
+
+		switch (filter[0].code) {
+		case BPF_S_RET_K:
+		case BPF_S_LD_W_LEN:
+		case BPF_S_ANC_PROTOCOL:
+		case BPF_S_ANC_IFINDEX:
+		case BPF_S_ANC_MARK:
+		case BPF_S_ANC_RXHASH:
+		case BPF_S_ANC_CPU:
+		case BPF_S_ANC_QUEUE:
+		case BPF_S_LD_W_ABS:
+		case BPF_S_LD_H_ABS:
+		case BPF_S_LD_B_ABS:
+			/* first instruction sets A register (or is RET 'constant') */
+			break;
+		default:
+			/* make sure we dont leak kernel information to user */
+			CLEAR_A(); /* A = 0 */
+		}
+
+		for (i = 0; i < flen; i++) {
+			unsigned int K = filter[i].k;
+
+			switch (filter[i].code) {
+			case BPF_S_ALU_ADD_X: /* A += X; */
+				seen |= SEEN_XREG;
+				EMIT2(0x01, 0xd8);		/* add %ebx,%eax */
+				break;
+			case BPF_S_ALU_ADD_K: /* A += K; */
+				if (!K)
+					break;
+				if (is_imm8(K))
+					EMIT3(0x83, 0xc0, K);	/* add imm8,%eax */
+				else
+					EMIT1_off32(0x05, K);	/* add imm32,%eax */
+				break;
+			case BPF_S_ALU_SUB_X: /* A -= X; */
+				seen |= SEEN_XREG;
+				EMIT2(0x29, 0xd8);		/* sub    %ebx,%eax */
+				break;
+			case BPF_S_ALU_SUB_K: /* A -= K */
+				if (!K)
+					break;
+				if (is_imm8(K))
+					EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */
+				else
+					EMIT1_off32(0x2d, K); /* sub imm32,%eax */
+				break;
+			case BPF_S_ALU_MUL_X: /* A *= X; */
+				seen |= SEEN_XREG;
+				EMIT3(0x0f, 0xaf, 0xc3);	/* imul %ebx,%eax */
+				break;
+			case BPF_S_ALU_MUL_K: /* A *= K */
+				if (is_imm8(K))
+					EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */
+				else {
+					EMIT2(0x69, 0xc0);		/* imul imm32,%eax */
+					EMIT(K, 4);
+				}
+				break;
+			case BPF_S_ALU_DIV_X: /* A /= X; */
+				seen |= SEEN_XREG;
+				EMIT2(0x85, 0xdb);	/* test %ebx,%ebx */
+				if (pc_ret0 != -1)
+					EMIT_COND_JMP(X86_JE, addrs[pc_ret0] - (addrs[i] - 4));
+				else {
+					EMIT_COND_JMP(X86_JNE, 2 + 5);
+					CLEAR_A();
+					EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */
+				}
+				EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */
+				break;
+			case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */
+				EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */
+				EMIT(K, 4);
+				EMIT4(0x48, 0xc1, 0xe8, 0x20); /* shr $0x20,%rax */
+				break;
+			case BPF_S_ALU_AND_X:
+				seen |= SEEN_XREG;
+				EMIT2(0x21, 0xd8);		/* and %ebx,%eax */
+				break;
+			case BPF_S_ALU_AND_K:
+				if (K >= 0xFFFFFF00) {
+					EMIT2(0x24, K & 0xFF); /* and imm8,%al */
+				} else if (K >= 0xFFFF0000) {
+					EMIT2(0x66, 0x25);	/* and imm16,%ax */
+					EMIT2(K, 2);
+				} else {
+					EMIT1_off32(0x25, K);	/* and imm32,%eax */
+				}
+				break;
+			case BPF_S_ALU_OR_X:
+				seen |= SEEN_XREG;
+				EMIT2(0x09, 0xd8);		/* or %ebx,%eax */
+				break;
+			case BPF_S_ALU_OR_K:
+				if (is_imm8(K))
+					EMIT3(0x83, 0xc8, K); /* or imm8,%eax */
+				else
+					EMIT1_off32(0x0d, K);	/* or imm32,%eax */
+				break;
+			case BPF_S_ALU_LSH_X: /* A <<= X; */
+				seen |= SEEN_XREG;
+				EMIT4(0x89, 0xd9, 0xd3, 0xe0);	/* mov %ebx,%ecx; shl %cl,%eax */
+				break;
+			case BPF_S_ALU_LSH_K:
+				if (K == 0)
+					break;
+				else if (K == 1)
+					EMIT2(0xd1, 0xe0); /* shl %eax */
+				else
+					EMIT3(0xc1, 0xe0, K);
+				break;
+			case BPF_S_ALU_RSH_X: /* A >>= X; */
+				seen |= SEEN_XREG;
+				EMIT4(0x89, 0xd9, 0xd3, 0xe8);	/* mov %ebx,%ecx; shr %cl,%eax */
+				break;
+			case BPF_S_ALU_RSH_K: /* A >>= K; */
+				if (K == 0)
+					break;
+				else if (K == 1)
+					EMIT2(0xd1, 0xe8); /* shr %eax */
+				else
+					EMIT3(0xc1, 0xe8, K);
+				break;
+			case BPF_S_ALU_NEG:
+				EMIT2(0xf7, 0xd8);		/* neg %eax */
+				break;
+			case BPF_S_RET_K:
+				if (!K) {
+					if (pc_ret0 == -1)
+						pc_ret0 = i;
+					CLEAR_A();
+				} else {
+					EMIT1_off32(0xb8, K);	/* mov $imm32,%eax */
+				}
+				/* fallinto */
+			case BPF_S_RET_A:
+				if (seen) {
+					if (i != flen - 1) {
+						EMIT_JMP(cleanup_addr - addrs[i]);
+						break;
+					}
+					if (seen & SEEN_XREG)
+						EMIT4(0x48, 0x8b, 0x5d, 0xf8);  /* mov  -8(%rbp),%rbx */
+					EMIT1(0xc9);		/* leaveq */
+				}
+				EMIT1(0xc3);		/* ret */
+				break;
+			case BPF_S_MISC_TAX: /* X = A */
+				seen |= SEEN_XREG;
+				EMIT2(0x89, 0xc3);	/* mov    %eax,%ebx */
+				break;
+			case BPF_S_MISC_TXA: /* A = X */
+				seen |= SEEN_XREG;
+				EMIT2(0x89, 0xd8);	/* mov    %ebx,%eax */
+				break;
+			case BPF_S_LD_IMM: /* A = K */
+				if (!K)
+					CLEAR_A();
+				else
+					EMIT1_off32(0xb8, K); /* mov $imm32,%eax */
+				break;
+			case BPF_S_LDX_IMM: /* X = K */
+				seen |= SEEN_XREG;
+				if (!K)
+					CLEAR_X();
+				else
+					EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */
+				break;
+			case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */
+				seen |= SEEN_MEM;
+				EMIT3(0x8b, 0x45, 0xf0 - K*4);
+				break;
+			case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */
+				seen |= SEEN_XREG | SEEN_MEM;
+				EMIT3(0x8b, 0x5d, 0xf0 - K*4);
+				break;
+			case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */
+				seen |= SEEN_MEM;
+				EMIT3(0x89, 0x45, 0xf0 - K*4);
+				break;
+			case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
+				seen |= SEEN_XREG | SEEN_MEM;
+				EMIT3(0x89, 0x5d, 0xf0 - K*4);
+				break;
+			case BPF_S_LD_W_LEN: /*	A = skb->len; */
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
+				if (is_imm8(offsetof(struct sk_buff, len)))
+					/* mov    off8(%rdi),%eax */
+					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len));
+				else {
+					EMIT2(0x8b, 0x87);
+					EMIT(offsetof(struct sk_buff, len), 4);
+				}
+				break;
+			case BPF_S_LDX_W_LEN: /* X = skb->len; */
+				seen |= SEEN_XREG;
+				if (is_imm8(offsetof(struct sk_buff, len)))
+					/* mov off8(%rdi),%ebx */
+					EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len));
+				else {
+					EMIT2(0x8b, 0x9f);
+					EMIT(offsetof(struct sk_buff, len), 4);
+				}
+				break;
+			case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
+				if (is_imm8(offsetof(struct sk_buff, protocol))) {
+					/* movzwl off8(%rdi),%eax */
+					EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol));
+				} else {
+					EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
+					EMIT(offsetof(struct sk_buff, protocol), 4);
+				}
+				EMIT2(0x86, 0xc4); /* ntohs() : xchg   %al,%ah */
+				break;
+			case BPF_S_ANC_IFINDEX:
+				if (is_imm8(offsetof(struct sk_buff, dev))) {
+					/* movq off8(%rdi),%rax */
+					EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev));
+				} else {
+					EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */
+					EMIT(offsetof(struct sk_buff, dev), 4);
+				}
+				EMIT3(0x48, 0x85, 0xc0);	/* test %rax,%rax */
+				EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6));
+				BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+				EMIT2(0x8b, 0x80);	/* mov off32(%rax),%eax */
+				EMIT(offsetof(struct net_device, ifindex), 4);
+				break;
+			case BPF_S_ANC_MARK:
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+				if (is_imm8(offsetof(struct sk_buff, mark))) {
+					/* mov off8(%rdi),%eax */
+					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark));
+				} else {
+					EMIT2(0x8b, 0x87);
+					EMIT(offsetof(struct sk_buff, mark), 4);
+				}
+				break;
+			case BPF_S_ANC_RXHASH:
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4);
+				if (is_imm8(offsetof(struct sk_buff, rxhash))) {
+					/* mov off8(%rdi),%eax */
+					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, rxhash));
+				} else {
+					EMIT2(0x8b, 0x87);
+					EMIT(offsetof(struct sk_buff, rxhash), 4);
+				}
+				break;
+			case BPF_S_ANC_QUEUE:
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
+				if (is_imm8(offsetof(struct sk_buff, queue_mapping))) {
+					/* movzwl off8(%rdi),%eax */
+					EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping));
+				} else {
+					EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
+					EMIT(offsetof(struct sk_buff, queue_mapping), 4);
+				}
+				break;
+			case BPF_S_ANC_CPU:
+#ifdef CONFIG_SMP
+				EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */
+				EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */
+#else
+				CLEAR_A();
+#endif
+				break;
+			case BPF_S_LD_W_ABS:
+				func = sk_load_word;
+common_load:			seen |= SEEN_DATAREF;
+				if ((int)K < 0)
+					goto out;
+				t_offset = func - (image + addrs[i]);
+				EMIT1_off32(0xbe, K); /* mov imm32,%esi */
+				EMIT1_off32(0xe8, t_offset); /* call */
+				break;
+			case BPF_S_LD_H_ABS:
+				func = sk_load_half;
+				goto common_load;
+			case BPF_S_LD_B_ABS:
+				func = sk_load_byte;
+				goto common_load;
+			case BPF_S_LDX_B_MSH:
+				if ((int)K < 0) {
+					if (pc_ret0 != -1) {
+						EMIT_JMP(addrs[pc_ret0] - addrs[i]);
+						break;
+					}
+					CLEAR_A();
+					EMIT_JMP(cleanup_addr - addrs[i]);
+					break;
+				}
+				seen |= SEEN_DATAREF | SEEN_XREG;
+				t_offset = sk_load_byte_msh - (image + addrs[i]);
+				EMIT1_off32(0xbe, K);	/* mov imm32,%esi */
+				EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
+				break;
+			case BPF_S_LD_W_IND:
+				func = sk_load_word_ind;
+common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
+				t_offset = func - (image + addrs[i]);
+				EMIT1_off32(0xbe, K);	/* mov imm32,%esi   */
+				EMIT1_off32(0xe8, t_offset);	/* call sk_load_xxx_ind */
+				break;
+			case BPF_S_LD_H_IND:
+				func = sk_load_half_ind;
+				goto common_load_ind;
+			case BPF_S_LD_B_IND:
+				func = sk_load_byte_ind;
+				goto common_load_ind;
+			case BPF_S_JMP_JA:
+				t_offset = addrs[i + K] - addrs[i];
+				EMIT_JMP(t_offset);
+				break;
+			COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE);
+			COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB);
+			COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE);
+			COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE);
+			COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE);
+			COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB);
+			COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE);
+			COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE);
+
+cond_branch:			f_offset = addrs[i + filter[i].jf] - addrs[i];
+				t_offset = addrs[i + filter[i].jt] - addrs[i];
+
+				/* same targets, can avoid doing the test :) */
+				if (filter[i].jt == filter[i].jf) {
+					EMIT_JMP(t_offset);
+					break;
+				}
+
+				switch (filter[i].code) {
+				case BPF_S_JMP_JGT_X:
+				case BPF_S_JMP_JGE_X:
+				case BPF_S_JMP_JEQ_X:
+					seen |= SEEN_XREG;
+					EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */
+					break;
+				case BPF_S_JMP_JSET_X:
+					seen |= SEEN_XREG;
+					EMIT2(0x85, 0xd8); /* test %ebx,%eax */
+					break;
+				case BPF_S_JMP_JEQ_K:
+					if (K == 0) {
+						EMIT2(0x85, 0xc0); /* test   %eax,%eax */
+						break;
+					}
+				case BPF_S_JMP_JGT_K:
+				case BPF_S_JMP_JGE_K:
+					if (K <= 127)
+						EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */
+					else
+						EMIT1_off32(0x3d, K); /* cmp imm32,%eax */
+					break;
+				case BPF_S_JMP_JSET_K:
+					if (K <= 0xFF)
+						EMIT2(0xa8, K); /* test imm8,%al */
+					else if (!(K & 0xFFFF00FF))
+						EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */
+					else if (K <= 0xFFFF) {
+						EMIT2(0x66, 0xa9); /* test imm16,%ax */
+						EMIT(K, 2);
+					} else {
+						EMIT1_off32(0xa9, K); /* test imm32,%eax */
+					}
+					break;
+				}
+				if (filter[i].jt != 0) {
+					if (filter[i].jf)
+						t_offset += is_near(f_offset) ? 2 : 6;
+					EMIT_COND_JMP(t_op, t_offset);
+					if (filter[i].jf)
+						EMIT_JMP(f_offset);
+					break;
+				}
+				EMIT_COND_JMP(f_op, f_offset);
+				break;
+			default:
+				/* hmm, too complex filter, give up with jit compiler */
+				goto out;
+			}
+			ilen = prog - temp;
+			if (image) {
+				if (unlikely(proglen + ilen > oldproglen)) {
+					pr_err("bpb_jit_compile fatal error\n");
+					kfree(addrs);
+					module_free(NULL, image);
+					return;
+				}
+				memcpy(image + proglen, temp, ilen);
+			}
+			proglen += ilen;
+			addrs[i] = proglen;
+			prog = temp;
+		}
+		/* last bpf instruction is always a RET :
+		 * use it to give the cleanup instruction(s) addr
+		 */
+		cleanup_addr = proglen - 1; /* ret */
+		if (seen)
+			cleanup_addr -= 1; /* leaveq */
+		if (seen & SEEN_XREG)
+			cleanup_addr -= 4; /* mov  -8(%rbp),%rbx */
+
+		if (image) {
+			WARN_ON(proglen != oldproglen);
+			break;
+		}
+		if (proglen == oldproglen) {
+			image = module_alloc(max_t(unsigned int,
+						   proglen,
+						   sizeof(struct work_struct)));
+			if (!image)
+				goto out;
+		}
+		oldproglen = proglen;
+	}
+	if (bpf_jit_enable > 1)
+		pr_err("flen=%d proglen=%u pass=%d image=%p\n",
+		       flen, proglen, pass, image);
+
+	if (image) {
+		if (bpf_jit_enable > 1)
+			print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS,
+				       16, 1, image, proglen, false);
+
+		bpf_flush_icache(image, image + proglen);
+
+		fp->bpf_func = (void *)image;
+	}
+out:
+	kfree(addrs);
+	return;
+}
+
+static void jit_free_defer(struct work_struct *arg)
+{
+	module_free(NULL, arg);
+}
+
+/* run from softirq, we must use a work_struct to call
+ * module_free() from process context
+ */
+void bpf_jit_free(struct sk_filter *fp)
+{
+	if (fp->bpf_func != sk_run_filter) {
+		struct work_struct *work = (struct work_struct *)fp->bpf_func;
+
+		INIT_WORK(work, jit_free_defer);
+		schedule_work(work);
+	}
+}
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 45266b75409a..4609b85e559d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -135,6 +135,8 @@ struct sk_filter
 {
 	atomic_t		refcnt;
 	unsigned int         	len;	/* Number of filter blocks */
+	unsigned int		(*bpf_func)(const struct sk_buff *skb,
+					    const struct sock_filter *filter);
 	struct rcu_head		rcu;
 	struct sock_filter     	insns[0];
 };
@@ -153,6 +155,80 @@ extern unsigned int sk_run_filter(const struct sk_buff *skb,
 extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 extern int sk_detach_filter(struct sock *sk);
 extern int sk_chk_filter(struct sock_filter *filter, int flen);
+
+#ifdef CONFIG_BPF_JIT
+extern void bpf_jit_compile(struct sk_filter *fp);
+extern void bpf_jit_free(struct sk_filter *fp);
+#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
+#else
+static inline void bpf_jit_compile(struct sk_filter *fp)
+{
+}
+static inline void bpf_jit_free(struct sk_filter *fp)
+{
+}
+#define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
+#endif
+
+enum {
+	BPF_S_RET_K = 1,
+	BPF_S_RET_A,
+	BPF_S_ALU_ADD_K,
+	BPF_S_ALU_ADD_X,
+	BPF_S_ALU_SUB_K,
+	BPF_S_ALU_SUB_X,
+	BPF_S_ALU_MUL_K,
+	BPF_S_ALU_MUL_X,
+	BPF_S_ALU_DIV_X,
+	BPF_S_ALU_AND_K,
+	BPF_S_ALU_AND_X,
+	BPF_S_ALU_OR_K,
+	BPF_S_ALU_OR_X,
+	BPF_S_ALU_LSH_K,
+	BPF_S_ALU_LSH_X,
+	BPF_S_ALU_RSH_K,
+	BPF_S_ALU_RSH_X,
+	BPF_S_ALU_NEG,
+	BPF_S_LD_W_ABS,
+	BPF_S_LD_H_ABS,
+	BPF_S_LD_B_ABS,
+	BPF_S_LD_W_LEN,
+	BPF_S_LD_W_IND,
+	BPF_S_LD_H_IND,
+	BPF_S_LD_B_IND,
+	BPF_S_LD_IMM,
+	BPF_S_LDX_W_LEN,
+	BPF_S_LDX_B_MSH,
+	BPF_S_LDX_IMM,
+	BPF_S_MISC_TAX,
+	BPF_S_MISC_TXA,
+	BPF_S_ALU_DIV_K,
+	BPF_S_LD_MEM,
+	BPF_S_LDX_MEM,
+	BPF_S_ST,
+	BPF_S_STX,
+	BPF_S_JMP_JA,
+	BPF_S_JMP_JEQ_K,
+	BPF_S_JMP_JEQ_X,
+	BPF_S_JMP_JGE_K,
+	BPF_S_JMP_JGE_X,
+	BPF_S_JMP_JGT_K,
+	BPF_S_JMP_JGT_X,
+	BPF_S_JMP_JSET_K,
+	BPF_S_JMP_JSET_X,
+	/* Ancillary data */
+	BPF_S_ANC_PROTOCOL,
+	BPF_S_ANC_PKTTYPE,
+	BPF_S_ANC_IFINDEX,
+	BPF_S_ANC_NLATTR,
+	BPF_S_ANC_NLATTR_NEST,
+	BPF_S_ANC_MARK,
+	BPF_S_ANC_QUEUE,
+	BPF_S_ANC_HATYPE,
+	BPF_S_ANC_RXHASH,
+	BPF_S_ANC_CPU,
+};
+
 #endif /* __KERNEL__ */
 
 #endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cb8178ab3c52..364bcf212f71 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2514,6 +2514,7 @@ extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
+extern int		bpf_jit_enable;
 extern int		netdev_set_master(struct net_device *dev, struct net_device *master);
 extern int netdev_set_bond_master(struct net_device *dev,
 				  struct net_device *master);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d0ae90af0b40..79aafbbf430a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -391,8 +391,8 @@ struct sk_buff {
 
 	__u32			rxhash;
 
+	__u16			queue_mapping;
 	kmemcheck_bitfield_begin(flags2);
-	__u16			queue_mapping:16;
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
diff --git a/net/Kconfig b/net/Kconfig
index 79cabf1ee68b..745fb02d2fda 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -232,6 +232,19 @@ config XPS
 	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
 	default y
 
+config HAVE_BPF_JIT
+	bool
+
+config BPF_JIT
+	bool "enable BPF Just In Time compiler"
+	depends on HAVE_BPF_JIT
+	---help---
+	  Berkeley Packet Filter filtering capabilities are normally handled
+	  by an interpreter. This option allows kernel to generate a native
+	  code when filter is loaded in memory. This should speedup
+	  packet sniffing (libpcap/tcpdump). Note : Admin should enable
+	  this feature changing /proc/sys/net/core/bpf_jit_enable
+
 menu "Network testing"
 
 config NET_PKTGEN
diff --git a/net/core/filter.c b/net/core/filter.c
index afb8afb066bb..0eb8c4466eaa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -39,65 +39,6 @@
 #include <linux/filter.h>
 #include <linux/reciprocal_div.h>
 
-enum {
-	BPF_S_RET_K = 1,
-	BPF_S_RET_A,
-	BPF_S_ALU_ADD_K,
-	BPF_S_ALU_ADD_X,
-	BPF_S_ALU_SUB_K,
-	BPF_S_ALU_SUB_X,
-	BPF_S_ALU_MUL_K,
-	BPF_S_ALU_MUL_X,
-	BPF_S_ALU_DIV_X,
-	BPF_S_ALU_AND_K,
-	BPF_S_ALU_AND_X,
-	BPF_S_ALU_OR_K,
-	BPF_S_ALU_OR_X,
-	BPF_S_ALU_LSH_K,
-	BPF_S_ALU_LSH_X,
-	BPF_S_ALU_RSH_K,
-	BPF_S_ALU_RSH_X,
-	BPF_S_ALU_NEG,
-	BPF_S_LD_W_ABS,
-	BPF_S_LD_H_ABS,
-	BPF_S_LD_B_ABS,
-	BPF_S_LD_W_LEN,
-	BPF_S_LD_W_IND,
-	BPF_S_LD_H_IND,
-	BPF_S_LD_B_IND,
-	BPF_S_LD_IMM,
-	BPF_S_LDX_W_LEN,
-	BPF_S_LDX_B_MSH,
-	BPF_S_LDX_IMM,
-	BPF_S_MISC_TAX,
-	BPF_S_MISC_TXA,
-	BPF_S_ALU_DIV_K,
-	BPF_S_LD_MEM,
-	BPF_S_LDX_MEM,
-	BPF_S_ST,
-	BPF_S_STX,
-	BPF_S_JMP_JA,
-	BPF_S_JMP_JEQ_K,
-	BPF_S_JMP_JEQ_X,
-	BPF_S_JMP_JGE_K,
-	BPF_S_JMP_JGE_X,
-	BPF_S_JMP_JGT_K,
-	BPF_S_JMP_JGT_X,
-	BPF_S_JMP_JSET_K,
-	BPF_S_JMP_JSET_X,
-	/* Ancillary data */
-	BPF_S_ANC_PROTOCOL,
-	BPF_S_ANC_PKTTYPE,
-	BPF_S_ANC_IFINDEX,
-	BPF_S_ANC_NLATTR,
-	BPF_S_ANC_NLATTR_NEST,
-	BPF_S_ANC_MARK,
-	BPF_S_ANC_QUEUE,
-	BPF_S_ANC_HATYPE,
-	BPF_S_ANC_RXHASH,
-	BPF_S_ANC_CPU,
-};
-
 /* No hurry in this branch */
 static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
 {
@@ -145,7 +86,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 	rcu_read_lock();
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter) {
-		unsigned int pkt_len = sk_run_filter(skb, filter->insns);
+		unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
 
 		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
 	}
@@ -638,6 +579,7 @@ void sk_filter_release_rcu(struct rcu_head *rcu)
 {
 	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
 
+	bpf_jit_free(fp);
 	kfree(fp);
 }
 EXPORT_SYMBOL(sk_filter_release_rcu);
@@ -672,6 +614,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 
 	atomic_set(&fp->refcnt, 1);
 	fp->len = fprog->len;
+	fp->bpf_func = sk_run_filter;
 
 	err = sk_chk_filter(fp->insns, fp->len);
 	if (err) {
@@ -679,6 +622,8 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 		return err;
 	}
 
+	bpf_jit_compile(fp);
+
 	old_fp = rcu_dereference_protected(sk->sk_filter,
 					   sock_owned_by_user(sk));
 	rcu_assign_pointer(sk->sk_filter, fp);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 385b6095fdc4..a829e3f60aeb 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -122,6 +122,15 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+#ifdef CONFIG_BPF_JIT
+	{
+		.procname	= "bpf_jit_enable",
+		.data		= &bpf_jit_enable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+#endif
 	{
 		.procname	= "netdev_tstamp_prequeue",
 		.data		= &netdev_tstamp_prequeue,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b5362e96022b..549527bca87a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -538,7 +538,7 @@ static inline unsigned int run_filter(const struct sk_buff *skb,
 	rcu_read_lock();
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter != NULL)
-		res = sk_run_filter(skb, filter->insns);
+		res = SK_RUN_FILTER(filter, skb);
 	rcu_read_unlock();
 
 	return res;
-- 
cgit v1.2.3-71-gd317


From acad9853b95df6a3887f52e0ec88e4a77119ee28 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 27 Apr 2011 23:08:51 -0700
Subject: Input: wm831x-ts - allow IRQ flags to be specified

This allows maximum flexibility for configuring the direct GPIO based
interrupts.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/wm831x-ts.c | 16 +++++++++++++---
 include/linux/mfd/wm831x/pdata.h      |  2 ++
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/wm831x-ts.c b/drivers/input/touchscreen/wm831x-ts.c
index b9373012b3e6..78e8705df20d 100644
--- a/drivers/input/touchscreen/wm831x-ts.c
+++ b/drivers/input/touchscreen/wm831x-ts.c
@@ -241,7 +241,7 @@ static __devinit int wm831x_ts_probe(struct platform_device *pdev)
 	struct wm831x_pdata *core_pdata = dev_get_platdata(pdev->dev.parent);
 	struct wm831x_touch_pdata *pdata = NULL;
 	struct input_dev *input_dev;
-	int error;
+	int error, irqf;
 
 	if (core_pdata)
 		pdata = core_pdata->touch;
@@ -314,9 +314,14 @@ static __devinit int wm831x_ts_probe(struct platform_device *pdev)
 	wm831x_set_bits(wm831x, WM831X_TOUCH_CONTROL_1,
 			WM831X_TCH_RATE_MASK, 6);
 
+	if (pdata && pdata->data_irqf)
+		irqf = pdata->data_irqf;
+	else
+		irqf = IRQF_TRIGGER_HIGH;
+
 	error = request_threaded_irq(wm831x_ts->data_irq,
 				     NULL, wm831x_ts_data_irq,
-				     IRQF_ONESHOT,
+				     irqf | IRQF_ONESHOT,
 				     "Touchscreen data", wm831x_ts);
 	if (error) {
 		dev_err(&pdev->dev, "Failed to request data IRQ %d: %d\n",
@@ -325,9 +330,14 @@ static __devinit int wm831x_ts_probe(struct platform_device *pdev)
 	}
 	disable_irq(wm831x_ts->data_irq);
 
+	if (pdata && pdata->pd_irqf)
+		irqf = pdata->pd_irqf;
+	else
+		irqf = IRQF_TRIGGER_HIGH;
+
 	error = request_threaded_irq(wm831x_ts->pd_irq,
 				     NULL, wm831x_ts_pen_down_irq,
-				     IRQF_ONESHOT,
+				     irqf | IRQF_ONESHOT,
 				     "Touchscreen pen down", wm831x_ts);
 	if (error) {
 		dev_err(&pdev->dev, "Failed to request pen down IRQ %d: %d\n",
diff --git a/include/linux/mfd/wm831x/pdata.h b/include/linux/mfd/wm831x/pdata.h
index 173086d42af4..6b0eb130efb8 100644
--- a/include/linux/mfd/wm831x/pdata.h
+++ b/include/linux/mfd/wm831x/pdata.h
@@ -81,7 +81,9 @@ struct wm831x_touch_pdata {
 	int rpu;               /** Pen down sensitivity resistor divider */
 	int pressure;          /** Report pressure (boolean) */
 	unsigned int data_irq; /** Touch data ready IRQ */
+	int data_irqf;         /** IRQ flags for data ready IRQ */
 	unsigned int pd_irq;   /** Touch pendown detect IRQ */
+	int pd_irqf;           /** IRQ flags for pen down IRQ */
 };
 
 enum wm831x_watchdog_action {
-- 
cgit v1.2.3-71-gd317


From e6fa16ab9c1e9b344428e6fea4d29e3cc4b28fb0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 27 Apr 2011 20:59:41 +0200
Subject: signal: sigprocmask() should do retarget_shared_pending()

In short, almost every changing of current->blocked is wrong, or at least
can lead to the unexpected results.

For example. Two threads T1 and T2, T1 sleeps in sigtimedwait/pause/etc.
kill(tgid, SIG) can pick T2 for TIF_SIGPENDING. If T2 calls sigprocmask()
and blocks SIG before it notices the pending signal, nobody else can handle
this pending shared signal.

I am not sure this is bug, but at least this looks strange imho. T1 should
not sleep forever, there is a signal which should wake it up.

This patch moves the code which actually changes ->blocked into the new
helper, set_current_blocked() and changes this code to call
retarget_shared_pending() as exit_signals() does. We should only care about
the signals we just blocked, we use "newset & ~current->blocked" as a mask.

We do not check !sigisemptyset(newblocked), retarget_shared_pending() is
cheap unless mask & shared_pending.

Note: for this particular case we could simply change sigprocmask() to
return -EINTR if signal_pending(), but then we should change other callers
and, more importantly, if we need this fix then set_current_blocked() will
have more callers and some of them can't restart. See the next patch as a
random example.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Matt Fleming <matt.fleming@linux.intel.com>
Acked-by: Tejun Heo <tj@kernel.org>
---
 include/linux/signal.h |  1 +
 kernel/signal.c        | 29 ++++++++++++++++++++++++-----
 2 files changed, 25 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index fcd2b14b1932..ba009c167275 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -243,6 +243,7 @@ extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
 				 siginfo_t *info);
 extern long do_sigpending(void __user *, unsigned long);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
+extern void set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 
 struct pt_regs;
diff --git a/kernel/signal.c b/kernel/signal.c
index e8308e3238c1..8aa3a2e226af 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2299,6 +2299,29 @@ long do_no_restart_syscall(struct restart_block *param)
 	return -EINTR;
 }
 
+/**
+ * set_current_blocked - change current->blocked mask
+ * @newset: new mask
+ *
+ * It is wrong to change ->blocked directly, this helper should be used
+ * to ensure the process can't miss a shared signal we are going to block.
+ */
+void set_current_blocked(const sigset_t *newset)
+{
+	struct task_struct *tsk = current;
+
+	spin_lock_irq(&tsk->sighand->siglock);
+	if (signal_pending(tsk) && !thread_group_empty(tsk)) {
+		sigset_t newblocked;
+		/* A set of now blocked but previously unblocked signals. */
+		signandsets(&newblocked, newset, &current->blocked);
+		retarget_shared_pending(tsk, &newblocked);
+	}
+	tsk->blocked = *newset;
+	recalc_sigpending();
+	spin_unlock_irq(&tsk->sighand->siglock);
+}
+
 /*
  * This is also useful for kernel threads that want to temporarily
  * (or permanently) block certain signals.
@@ -2330,11 +2353,7 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
 		return -EINVAL;
 	}
 
-	spin_lock_irq(&tsk->sighand->siglock);
-	tsk->blocked = newset;
-	recalc_sigpending();
-	spin_unlock_irq(&tsk->sighand->siglock);
-
+	set_current_blocked(&newset);
 	return 0;
 }
 
-- 
cgit v1.2.3-71-gd317


From 943df1485a8ff0e600729e082e568ece04d4de9e Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 27 Apr 2011 21:44:14 +0200
Subject: signal: introduce do_sigtimedwait() to factor out compat/native code

Factor out the common code in sys_rt_sigtimedwait/compat_sys_rt_sigtimedwait
to the new helper, do_sigtimedwait().

Add the comment to document the extra tick we add to timespec_to_jiffies(ts),
thanks to Linus who explained this to me.

Perhaps it would be better to move compat_sys_rt_sigtimedwait() into
signal.c under CONFIG_COMPAT, then we can make do_sigtimedwait() static.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Matt Fleming <matt.fleming@linux.intel.com>
---
 include/linux/signal.h |   2 +
 kernel/compat.c        |  41 ++++--------------
 kernel/signal.c        | 110 +++++++++++++++++++++++++++++--------------------
 3 files changed, 74 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index ba009c167275..782546d661ba 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -242,6 +242,8 @@ extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
 extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
 				 siginfo_t *info);
 extern long do_sigpending(void __user *, unsigned long);
+extern int do_sigtimedwait(const sigset_t *, siginfo_t *,
+				const struct timespec *);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
 extern void set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
diff --git a/kernel/compat.c b/kernel/compat.c
index 06cbb0619531..9214dcd087b7 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -890,10 +890,9 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
 {
 	compat_sigset_t s32;
 	sigset_t s;
-	int sig;
 	struct timespec t;
 	siginfo_t info;
-	long ret, timeout;
+	long ret;
 
 	if (sigsetsize != sizeof(sigset_t))
 		return -EINVAL;
@@ -901,45 +900,19 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
 	if (copy_from_user(&s32, uthese, sizeof(compat_sigset_t)))
 		return -EFAULT;
 	sigset_from_compat(&s, &s32);
-	sigdelsetmask(&s,sigmask(SIGKILL)|sigmask(SIGSTOP));
-	signotset(&s);
 
-	timeout = MAX_SCHEDULE_TIMEOUT;
 	if (uts) {
-		if (get_compat_timespec (&t, uts))
+		if (get_compat_timespec(&t, uts))
 			return -EFAULT;
-		if (!timespec_valid(&t))
-			return -EINVAL;
-		timeout = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
 	}
 
-	spin_lock_irq(&current->sighand->siglock);
-	sig = dequeue_signal(current, &s, &info);
-	if (!sig && timeout) {
-		current->real_blocked = current->blocked;
-		sigandsets(&current->blocked, &current->blocked, &s);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-
-		timeout = schedule_timeout_interruptible(timeout);
-
-		spin_lock_irq(&current->sighand->siglock);
-		sig = dequeue_signal(current, &s, &info);
-		current->blocked = current->real_blocked;
-		siginitset(&current->real_blocked, 0);
-		recalc_sigpending();
-	}
-	spin_unlock_irq(&current->sighand->siglock);
+	ret = do_sigtimedwait(&s, &info, uts ? &t : NULL);
 
-	if (sig) {
-		ret = sig;
-		if (uinfo) {
-			if (copy_siginfo_to_user32(uinfo, &info))
-				ret = -EFAULT;
-		}
-	} else {
-		ret = timeout?-EINTR:-EAGAIN;
+	if (ret > 0 && uinfo) {
+		if (copy_siginfo_to_user32(uinfo, &info))
+			ret = -EFAULT;
 	}
+
 	return ret;
 
 }
diff --git a/kernel/signal.c b/kernel/signal.c
index c734619554f6..1ab89f677424 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2503,6 +2503,66 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
 
 #endif
 
+/**
+ *  do_sigtimedwait - wait for queued signals specified in @which
+ *  @which: queued signals to wait for
+ *  @info: if non-null, the signal's siginfo is returned here
+ *  @ts: upper bound on process time suspension
+ */
+int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
+			const struct timespec *ts)
+{
+	struct task_struct *tsk = current;
+	long timeout = MAX_SCHEDULE_TIMEOUT;
+	sigset_t mask = *which;
+	int sig;
+
+	if (ts) {
+		if (!timespec_valid(ts))
+			return -EINVAL;
+		timeout = timespec_to_jiffies(ts);
+		/*
+		 * We can be close to the next tick, add another one
+		 * to ensure we will wait at least the time asked for.
+		 */
+		if (ts->tv_sec || ts->tv_nsec)
+			timeout++;
+	}
+
+	/*
+	 * Invert the set of allowed signals to get those we want to block.
+	 */
+	sigdelsetmask(&mask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+	signotset(&mask);
+
+	spin_lock_irq(&tsk->sighand->siglock);
+	sig = dequeue_signal(tsk, &mask, info);
+	if (!sig && timeout) {
+		/*
+		 * None ready, temporarily unblock those we're interested
+		 * while we are sleeping in so that we'll be awakened when
+		 * they arrive.
+		 */
+		tsk->real_blocked = tsk->blocked;
+		sigandsets(&tsk->blocked, &tsk->blocked, &mask);
+		recalc_sigpending();
+		spin_unlock_irq(&tsk->sighand->siglock);
+
+		timeout = schedule_timeout_interruptible(timeout);
+
+		spin_lock_irq(&tsk->sighand->siglock);
+		sig = dequeue_signal(tsk, &mask, info);
+		tsk->blocked = tsk->real_blocked;
+		siginitset(&tsk->real_blocked, 0);
+		recalc_sigpending();
+	}
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	if (sig)
+		return sig;
+	return timeout ? -EINTR : -EAGAIN;
+}
+
 /**
  *  sys_rt_sigtimedwait - synchronously wait for queued signals specified
  *			in @uthese
@@ -2515,11 +2575,10 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
 		siginfo_t __user *, uinfo, const struct timespec __user *, uts,
 		size_t, sigsetsize)
 {
-	int ret, sig;
 	sigset_t these;
 	struct timespec ts;
 	siginfo_t info;
-	long timeout;
+	int ret;
 
 	/* XXX: Don't preclude handling different sized sigset_t's.  */
 	if (sigsetsize != sizeof(sigset_t))
@@ -2528,55 +2587,16 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
 	if (copy_from_user(&these, uthese, sizeof(these)))
 		return -EFAULT;
 
-	/*
-	 * Invert the set of allowed signals to get those we
-	 * want to block.
-	 */
-	sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP));
-	signotset(&these);
-
-	timeout = MAX_SCHEDULE_TIMEOUT;
 	if (uts) {
 		if (copy_from_user(&ts, uts, sizeof(ts)))
 			return -EFAULT;
-		if (!timespec_valid(&ts))
-			return -EINVAL;
-		timeout = timespec_to_jiffies(&ts) + (ts.tv_sec || ts.tv_nsec);
 	}
 
-	spin_lock_irq(&current->sighand->siglock);
-	sig = dequeue_signal(current, &these, &info);
-	if (!sig && timeout) {
-		/*
-		 * None ready -- temporarily unblock those we're
-		 * interested while we are sleeping in so that we'll
-		 * be awakened when they arrive.
-		 */
-		current->real_blocked = current->blocked;
-		sigandsets(&current->blocked, &current->blocked, &these);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-
-		timeout = schedule_timeout_interruptible(timeout);
-
-		spin_lock_irq(&current->sighand->siglock);
-		sig = dequeue_signal(current, &these, &info);
-		current->blocked = current->real_blocked;
-		siginitset(&current->real_blocked, 0);
-		recalc_sigpending();
-	}
-	spin_unlock_irq(&current->sighand->siglock);
+	ret = do_sigtimedwait(&these, &info, uts ? &ts : NULL);
 
-	if (sig) {
-		ret = sig;
-		if (uinfo) {
-			if (copy_siginfo_to_user(uinfo, &info))
-				ret = -EFAULT;
-		}
-	} else {
-		ret = -EAGAIN;
-		if (timeout)
-			ret = -EINTR;
+	if (ret > 0 && uinfo) {
+		if (copy_siginfo_to_user(uinfo, &info))
+			ret = -EFAULT;
 	}
 
 	return ret;
-- 
cgit v1.2.3-71-gd317


From 702a5073fdb71eb29cd4912575289fb5044c1894 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 27 Apr 2011 22:01:27 +0200
Subject: signal: rename signandsets() to sigandnsets()

As Tejun and Linus pointed out, "nand" is the wrong name for "x & ~y",
it should be "andn". Rename signandsets() as suggested.

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
---
 include/linux/signal.h | 6 +++---
 kernel/signal.c        | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 782546d661ba..7e2526374fd7 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -123,13 +123,13 @@ _SIG_SET_BINOP(sigorsets, _sig_or)
 #define _sig_and(x,y)	((x) & (y))
 _SIG_SET_BINOP(sigandsets, _sig_and)
 
-#define _sig_nand(x,y)	((x) & ~(y))
-_SIG_SET_BINOP(signandsets, _sig_nand)
+#define _sig_andn(x,y)	((x) & ~(y))
+_SIG_SET_BINOP(sigandnsets, _sig_andn)
 
 #undef _SIG_SET_BINOP
 #undef _sig_or
 #undef _sig_and
-#undef _sig_nand
+#undef _sig_andn
 
 #define _SIG_SET_OP(name, op)						\
 static inline void name(sigset_t *set)					\
diff --git a/kernel/signal.c b/kernel/signal.c
index 4d97e11d7672..e7ee4e642c5a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -669,7 +669,7 @@ static int rm_from_queue_full(sigset_t *mask, struct sigpending *s)
 	if (sigisemptyset(&m))
 		return 0;
 
-	signandsets(&s->signal, &s->signal, mask);
+	sigandnsets(&s->signal, &s->signal, mask);
 	list_for_each_entry_safe(q, n, &s->list, list) {
 		if (sigismember(mask, q->info.si_signo)) {
 			list_del_init(&q->list);
@@ -2304,7 +2304,7 @@ static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset)
 	if (signal_pending(tsk) && !thread_group_empty(tsk)) {
 		sigset_t newblocked;
 		/* A set of now blocked but previously unblocked signals. */
-		signandsets(&newblocked, newset, &current->blocked);
+		sigandnsets(&newblocked, newset, &current->blocked);
 		retarget_shared_pending(tsk, &newblocked);
 	}
 	tsk->blocked = *newset;
@@ -2349,7 +2349,7 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
 		sigorsets(&newset, &tsk->blocked, set);
 		break;
 	case SIG_UNBLOCK:
-		signandsets(&newset, &tsk->blocked, set);
+		sigandnsets(&newset, &tsk->blocked, set);
 		break;
 	case SIG_SETMASK:
 		newset = *set;
-- 
cgit v1.2.3-71-gd317


From 78f11a255749d09025f54d4e2df4fbcb031530e2 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Wed, 27 Apr 2011 15:26:45 -0700
Subject: mm: thp: fix /dev/zero MAP_PRIVATE and vm_flags cleanups

The huge_memory.c THP page fault was allowed to run if vm_ops was null
(which would succeed for /dev/zero MAP_PRIVATE, as the f_op->mmap wouldn't
setup a special vma->vm_ops and it would fallback to regular anonymous
memory) but other THP logics weren't fully activated for vmas with vm_file
not NULL (/dev/zero has a not NULL vma->vm_file).

So this removes the vm_file checks so that /dev/zero also can safely use
THP (the other albeit safer approach to fix this bug would have been to
prevent the THP initial page fault to run if vm_file was set).

After removing the vm_file checks, this also makes huge_memory.c stricter
in khugepaged for the DEBUG_VM=y case.  It doesn't replace the vm_file
check with a is_pfn_mapping check (but it keeps checking for VM_PFNMAP
under VM_BUG_ON) because for a is_cow_mapping() mapping VM_PFNMAP should
only be allowed to exist before the first page fault, and in turn when
vma->anon_vma is null (so preventing khugepaged registration).  So I tend
to think the previous comment saying if vm_file was set, VM_PFNMAP might
have been set and we could still be registered in khugepaged (despite
anon_vma was not NULL to be registered in khugepaged) was too paranoid.
The is_linear_pfn_mapping check is also I think superfluous (as described
by comment) but under DEBUG_VM it is safe to stay.

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=33682

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reported-by: Caspar Zhang <bugs@casparzhang.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: <stable@kernel.org>		[2.6.38.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h |  2 +-
 include/linux/mm.h      |  3 ++-
 mm/huge_memory.c        | 43 ++++++++++++++++++++++++-------------------
 3 files changed, 27 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index df29c8fde36b..8847c8c29791 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -117,7 +117,7 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
 					 unsigned long end,
 					 long adjust_next)
 {
-	if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+	if (!vma->anon_vma || vma->vm_ops)
 		return;
 	__vma_adjust_trans_huge(vma, start, end, adjust_next);
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 692dbae6ffa7..2348db26bc3d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -137,7 +137,8 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_RandomReadHint(v)		((v)->vm_flags & VM_RAND_READ)
 
 /*
- * special vmas that are non-mergable, non-mlock()able
+ * Special vmas that are non-mergable, non-mlock()able.
+ * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
  */
 #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 470dcda10add..83326ad66d9b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1408,6 +1408,9 @@ out:
 	return ret;
 }
 
+#define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \
+		   VM_HUGETLB|VM_SHARED|VM_MAYSHARE)
+
 int hugepage_madvise(struct vm_area_struct *vma,
 		     unsigned long *vm_flags, int advice)
 {
@@ -1416,11 +1419,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
 		/*
 		 * Be somewhat over-protective like KSM for now!
 		 */
-		if (*vm_flags & (VM_HUGEPAGE |
-				 VM_SHARED   | VM_MAYSHARE   |
-				 VM_PFNMAP   | VM_IO      | VM_DONTEXPAND |
-				 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
-				 VM_MIXEDMAP | VM_SAO))
+		if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
 			return -EINVAL;
 		*vm_flags &= ~VM_NOHUGEPAGE;
 		*vm_flags |= VM_HUGEPAGE;
@@ -1436,11 +1435,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
 		/*
 		 * Be somewhat over-protective like KSM for now!
 		 */
-		if (*vm_flags & (VM_NOHUGEPAGE |
-				 VM_SHARED   | VM_MAYSHARE   |
-				 VM_PFNMAP   | VM_IO      | VM_DONTEXPAND |
-				 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
-				 VM_MIXEDMAP | VM_SAO))
+		if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP))
 			return -EINVAL;
 		*vm_flags &= ~VM_HUGEPAGE;
 		*vm_flags |= VM_NOHUGEPAGE;
@@ -1574,10 +1569,14 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
 		 * page fault if needed.
 		 */
 		return 0;
-	if (vma->vm_file || vma->vm_ops)
+	if (vma->vm_ops)
 		/* khugepaged not yet working on file or special mappings */
 		return 0;
-	VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+	/*
+	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
+	 * true too, verify it here.
+	 */
+	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
 	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 	hend = vma->vm_end & HPAGE_PMD_MASK;
 	if (hstart < hend)
@@ -1828,12 +1827,15 @@ static void collapse_huge_page(struct mm_struct *mm,
 	    (vma->vm_flags & VM_NOHUGEPAGE))
 		goto out;
 
-	/* VM_PFNMAP vmas may have vm_ops null but vm_file set */
-	if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+	if (!vma->anon_vma || vma->vm_ops)
 		goto out;
 	if (is_vma_temporary_stack(vma))
 		goto out;
-	VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+	/*
+	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
+	 * true too, verify it here.
+	 */
+	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
 
 	pgd = pgd_offset(mm, address);
 	if (!pgd_present(*pgd))
@@ -2066,13 +2068,16 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 			progress++;
 			continue;
 		}
-		/* VM_PFNMAP vmas may have vm_ops null but vm_file set */
-		if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+		if (!vma->anon_vma || vma->vm_ops)
 			goto skip;
 		if (is_vma_temporary_stack(vma))
 			goto skip;
-
-		VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+		/*
+		 * If is_pfn_mapping() is true is_learn_pfn_mapping()
+		 * must be true too, verify it here.
+		 */
+		VM_BUG_ON(is_linear_pfn_mapping(vma) ||
+			  vma->vm_flags & VM_NO_THP);
 
 		hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 		hend = vma->vm_end & HPAGE_PMD_MASK;
-- 
cgit v1.2.3-71-gd317


From 68972efa657040f891c7eda07c7da8c8dd576788 Mon Sep 17 00:00:00 2001
From: Paul Stewart <pstew@chromium.org>
Date: Thu, 28 Apr 2011 05:43:37 +0000
Subject: usbnet: Resubmit interrupt URB if device is open

Resubmit interrupt URB if device is open.  Use a flag set in
usbnet_open() to determine this state.  Also kill and free
interrupt URB in usbnet_disconnect().

[Rebased off git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git]

Signed-off-by: Paul Stewart <pstew@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 8 ++++++++
 include/linux/usb/usbnet.h | 1 +
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 069c1cf0fdf7..009bba3d753e 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -736,6 +736,7 @@ int usbnet_open (struct net_device *net)
 		}
 	}
 
+	set_bit(EVENT_DEV_OPEN, &dev->flags);
 	netif_start_queue (net);
 	netif_info(dev, ifup, dev->net,
 		   "open: enable queueing (rx %d, tx %d) mtu %d %s framing\n",
@@ -1259,6 +1260,9 @@ void usbnet_disconnect (struct usb_interface *intf)
 	if (dev->driver_info->unbind)
 		dev->driver_info->unbind (dev, intf);
 
+	usb_kill_urb(dev->interrupt);
+	usb_free_urb(dev->interrupt);
+
 	free_netdev(net);
 	usb_put_dev (xdev);
 }
@@ -1498,6 +1502,10 @@ int usbnet_resume (struct usb_interface *intf)
 	int                     retval;
 
 	if (!--dev->suspend_count) {
+		/* resume interrupt URBs */
+		if (dev->interrupt && test_bit(EVENT_DEV_OPEN, &dev->flags))
+			usb_submit_urb(dev->interrupt, GFP_NOIO);
+
 		spin_lock_irq(&dev->txq.lock);
 		while ((res = usb_get_from_anchor(&dev->deferred))) {
 
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 0e1855079fbb..605b0aa8d852 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -68,6 +68,7 @@ struct usbnet {
 #		define EVENT_RX_PAUSED	5
 #		define EVENT_DEV_WAKING 6
 #		define EVENT_DEV_ASLEEP 7
+#		define EVENT_DEV_OPEN	8
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
-- 
cgit v1.2.3-71-gd317


From 5d30b10bd68df007e7ae21e77d1e0ce184b53040 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Apr 2011 15:55:52 -0400
Subject: flex_array: flex_array_prealloc takes a number of elements, not an
 end

Change flex_array_prealloc to take the number of elements for which space
should be allocated instead of the last (inclusive) element. Users
and documentation are updated accordingly.  flex_arrays got introduced before
they had users.  When folks started using it, they ended up needing a
different API than was coded up originally.  This swaps over to the API that
folks apparently need.

Based-on-patch-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
Tested-by: Chris Richards <gizmo@giz-works.com>
Acked-by: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: stable@kernel.org [2.6.38+]
---
 Documentation/flexible-arrays.txt |  4 ++--
 include/linux/flex_array.h        |  2 +-
 lib/flex_array.c                  | 13 ++++++++-----
 security/selinux/ss/policydb.c    |  6 +++---
 4 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/flexible-arrays.txt b/Documentation/flexible-arrays.txt
index cb8a3a00cc92..df904aec9904 100644
--- a/Documentation/flexible-arrays.txt
+++ b/Documentation/flexible-arrays.txt
@@ -66,10 +66,10 @@ trick is to ensure that any needed memory allocations are done before
 entering atomic context, using:
 
     int flex_array_prealloc(struct flex_array *array, unsigned int start,
-			    unsigned int end, gfp_t flags);
+			    unsigned int nr_elements, gfp_t flags);
 
 This function will ensure that memory for the elements indexed in the range
-defined by start and end has been allocated.  Thereafter, a
+defined by start and nr_elements has been allocated.  Thereafter, a
 flex_array_put() call on an element in that range is guaranteed not to
 block.
 
diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index 70e4efabe0fb..ebeb2f3ad068 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -61,7 +61,7 @@ struct flex_array {
 struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 		gfp_t flags);
 int flex_array_prealloc(struct flex_array *fa, unsigned int start,
-		unsigned int end, gfp_t flags);
+		unsigned int nr_elements, gfp_t flags);
 void flex_array_free(struct flex_array *fa);
 void flex_array_free_parts(struct flex_array *fa);
 int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
diff --git a/lib/flex_array.c b/lib/flex_array.c
index c0ea40ba2082..0c33b24498ba 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -232,10 +232,10 @@ EXPORT_SYMBOL(flex_array_clear);
 
 /**
  * flex_array_prealloc - guarantee that array space exists
- * @fa:		the flex array for which to preallocate parts
- * @start:	index of first array element for which space is allocated
- * @end:	index of last (inclusive) element for which space is allocated
- * @flags:	page allocation flags
+ * @fa:			the flex array for which to preallocate parts
+ * @start:		index of first array element for which space is allocated
+ * @nr_elements:	number of elements for which space is allocated
+ * @flags:		page allocation flags
  *
  * This will guarantee that no future calls to flex_array_put()
  * will allocate memory.  It can be used if you are expecting to
@@ -245,13 +245,16 @@ EXPORT_SYMBOL(flex_array_clear);
  * Locking must be provided by the caller.
  */
 int flex_array_prealloc(struct flex_array *fa, unsigned int start,
-			unsigned int end, gfp_t flags)
+			unsigned int nr_elements, gfp_t flags)
 {
 	int start_part;
 	int end_part;
 	int part_nr;
+	unsigned int end;
 	struct flex_array_part *part;
 
+	end = start + nr_elements - 1;
+
 	if (start >= fa->total_nr_elements || end >= fa->total_nr_elements)
 		return -ENOSPC;
 	if (elements_fit_in_base(fa))
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index e7b850ad57ee..e6e7ce0d3d55 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -502,7 +502,7 @@ static int policydb_index(struct policydb *p)
 		goto out;
 
 	rc = flex_array_prealloc(p->type_val_to_struct_array, 0,
-				 p->p_types.nprim - 1, GFP_KERNEL | __GFP_ZERO);
+				 p->p_types.nprim, GFP_KERNEL | __GFP_ZERO);
 	if (rc)
 		goto out;
 
@@ -519,7 +519,7 @@ static int policydb_index(struct policydb *p)
 			goto out;
 
 		rc = flex_array_prealloc(p->sym_val_to_name[i],
-					 0, p->symtab[i].nprim - 1,
+					 0, p->symtab[i].nprim,
 					 GFP_KERNEL | __GFP_ZERO);
 		if (rc)
 			goto out;
@@ -2375,7 +2375,7 @@ int policydb_read(struct policydb *p, void *fp)
 		goto bad;
 
 	/* preallocate so we don't have to worry about the put ever failing */
-	rc = flex_array_prealloc(p->type_attr_map_array, 0, p->p_types.nprim - 1,
+	rc = flex_array_prealloc(p->type_attr_map_array, 0, p->p_types.nprim,
 				 GFP_KERNEL | __GFP_ZERO);
 	if (rc)
 		goto bad;
-- 
cgit v1.2.3-71-gd317


From 1742f183fc218798dab6fcf0ded25b6608fc0a48 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Fri, 22 Apr 2011 06:31:16 +0000
Subject: net: fix netdev_increment_features()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Simplify and fix netdev_increment_features() to conform to what is
stated in netdevice.h comments about NETIF_F_ONE_FOR_ALL.
Include FCoE segmentation and VLAN-challedged flags in computation.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 ++++++-
 net/core/dev.c            | 35 +++++++++++------------------------
 2 files changed, 17 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 364bcf212f71..b7d0304762aa 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1106,7 +1106,12 @@ struct net_device {
 	 */
 #define NETIF_F_ONE_FOR_ALL	(NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \
 				 NETIF_F_SG | NETIF_F_HIGHDMA |		\
-				 NETIF_F_FRAGLIST)
+				 NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED)
+	/*
+	 * If one device doesn't support one of these features, then disable it
+	 * for all in netdev_increment_features.
+	 */
+#define NETIF_F_ALL_FOR_ALL	(NETIF_F_NOCACHE_COPY | NETIF_F_FSO)
 
 	/* changeable features with no special hardware requirements */
 #define NETIF_F_SOFT_FEATURES	(NETIF_F_GSO | NETIF_F_GRO)
diff --git a/net/core/dev.c b/net/core/dev.c
index 3bbb4c2ce92e..7db99b52679f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6164,33 +6164,20 @@ static int dev_cpu_callback(struct notifier_block *nfb,
  */
 u32 netdev_increment_features(u32 all, u32 one, u32 mask)
 {
-	/* If device needs checksumming, downgrade to it. */
-	if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
-		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
-	else if (mask & NETIF_F_ALL_CSUM) {
-		/* If one device supports v4/v6 checksumming, set for all. */
-		if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
-		    !(all & NETIF_F_GEN_CSUM)) {
-			all &= ~NETIF_F_ALL_CSUM;
-			all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
-		}
-
-		/* If one device supports hw checksumming, set for all. */
-		if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
-			all &= ~NETIF_F_ALL_CSUM;
-			all |= NETIF_F_HW_CSUM;
-		}
-	}
+	if (mask & NETIF_F_GEN_CSUM)
+		mask |= NETIF_F_ALL_CSUM;
+	mask |= NETIF_F_VLAN_CHALLENGED;
 
-	/* If device can't no cache copy, don't do for all */
-	if (!(one & NETIF_F_NOCACHE_COPY))
-		all &= ~NETIF_F_NOCACHE_COPY;
+	all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
+	all &= one | ~NETIF_F_ALL_FOR_ALL;
 
-	one |= NETIF_F_ALL_CSUM;
+	/* If device needs checksumming, downgrade to it. */
+	if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM))
+		all &= ~NETIF_F_NO_CSUM;
 
-	one |= all & NETIF_F_ONE_FOR_ALL;
-	all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
-	all |= one & mask & NETIF_F_ONE_FOR_ALL;
+	/* If one device supports hw checksumming, set for all. */
+	if (all & NETIF_F_GEN_CSUM)
+		all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
 
 	return all;
 }
-- 
cgit v1.2.3-71-gd317


From fa2bd7ff9247f4218dfc907db14d000cd7edd862 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Fri, 22 Apr 2011 06:31:16 +0000
Subject: net: allow user to change NETIF_F_HIGHDMA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

NETIF_F_HIGHDMA is like any other TX offloads, so allow user to toggle it.
This is needed later for bridge and bonding convertsion to hw_features.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b7d0304762aa..e03af35843bc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1080,7 +1080,7 @@ struct net_device {
 
 	/* Features valid for ethtool to change */
 	/* = all defined minus driver/device-class-related */
-#define NETIF_F_NEVER_CHANGE	(NETIF_F_HIGHDMA | NETIF_F_VLAN_CHALLENGED | \
+#define NETIF_F_NEVER_CHANGE	(NETIF_F_VLAN_CHALLENGED | \
 				  NETIF_F_LLTX | NETIF_F_NETNS_LOCAL)
 #define NETIF_F_ETHTOOL_BITS	(0x7f3fffff & ~NETIF_F_NEVER_CHANGE)
 
@@ -1098,6 +1098,7 @@ struct net_device {
 
 #define NETIF_F_ALL_TX_OFFLOADS	(NETIF_F_ALL_CSUM | NETIF_F_SG | \
 				 NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
+				 NETIF_F_HIGHDMA | \
 				 NETIF_F_SCTP_CSUM | NETIF_F_FCOE_CRC)
 
 	/*
-- 
cgit v1.2.3-71-gd317


From 180bf812ceaf01eb8ac69b86f3be0bd57f697668 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 28 Apr 2011 12:58:11 -0700
Subject: timers: Improve alarmtimer comments and minor fixes

This patch addresses a number of minor comment improvements and
other minor issues from Thomas' review of the alarmtimers code.

CC: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/alarmtimer.h | 12 ++++++++-
 kernel/time/alarmtimer.c   | 67 +++++++++++++++++++---------------------------
 2 files changed, 38 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h
index 6b364b2e2074..c5d6095b46f8 100644
--- a/include/linux/alarmtimer.h
+++ b/include/linux/alarmtimer.h
@@ -13,12 +13,22 @@ enum alarmtimer_type {
 	ALARM_NUMTYPE,
 };
 
+/**
+ * struct alarm - Alarm timer structure
+ * @node:	timerqueue node for adding to the event list this value
+ *		also includes the expiration time.
+ * @period:	Period for recuring alarms
+ * @function:	Function pointer to be executed when the timer fires.
+ * @type:	Alarm type (BOOTTIME/REALTIME)
+ * @enabled:	Flag that represents if the alarm is set to fire or not
+ * @data:	Internal data value.
+ */
 struct alarm {
 	struct timerqueue_node	node;
 	ktime_t			period;
 	void			(*function)(struct alarm *);
 	enum alarmtimer_type	type;
-	char			enabled;
+	bool			enabled;
 	void			*data;
 };
 
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 4058ad79d55f..bed98004ae1a 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -26,7 +26,15 @@
 #include <linux/workqueue.h>
 #include <linux/freezer.h>
 
-
+/**
+ * struct alarm_base - Alarm timer bases
+ * @lock:		Lock for syncrhonized access to the base
+ * @timerqueue:		Timerqueue head managing the list of events
+ * @timer: 		hrtimer used to schedule events while running
+ * @gettime:		Function to read the time correlating to the base
+ * @base_clockid:	clockid for the base
+ * @irqwork		Delayed work structure for expiring timers
+ */
 static struct alarm_base {
 	spinlock_t		lock;
 	struct timerqueue_head	timerqueue;
@@ -36,18 +44,16 @@ static struct alarm_base {
 	struct work_struct	irqwork;
 } alarm_bases[ALARM_NUMTYPE];
 
+/* rtc timer and device for setting alarm wakeups at suspend */
 static struct rtc_timer		rtctimer;
 static struct rtc_device	*rtcdev;
 
+/* freezer delta & lock used to handle clock_nanosleep triggered wakeups */
 static ktime_t freezer_delta;
 static DEFINE_SPINLOCK(freezer_delta_lock);
 
 
-/**************************************************************************
- * alarmtimer management code
- */
-
-/*
+/**
  * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue
  * @base: pointer to the base where the timer is being run
  * @alarm: pointer to alarm being enqueued.
@@ -67,7 +73,7 @@ static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm)
 	}
 }
 
-/*
+/**
  * alarmtimer_remove - Removes an alarm timer from an alarm_base timerqueue
  * @base: pointer to the base where the timer is running
  * @alarm: pointer to alarm being removed
@@ -91,16 +97,16 @@ static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm)
 	}
 }
 
-/*
+/**
  * alarmtimer_do_work - Handles alarm being fired.
  * @work: pointer to workqueue being run
  *
- * When a timer fires, this runs through the timerqueue to see
- * which alarm timers, and run those that expired. If there are
- * more alarm timers queued, we set the hrtimer to fire in the
- * future.
+ * When a alarm timer fires, this runs through the timerqueue to
+ * see which alarms expired, and runs those. If there are more alarm
+ * timers queued for the future, we set the hrtimer to fire when
+ * when the next future alarm timer expires.
  */
-void alarmtimer_do_work(struct work_struct *work)
+static void alarmtimer_do_work(struct work_struct *work)
 {
 	struct alarm_base *base = container_of(work, struct alarm_base,
 						irqwork);
@@ -141,7 +147,7 @@ void alarmtimer_do_work(struct work_struct *work)
 }
 
 
-/*
+/**
  * alarmtimer_fired - Handles alarm hrtimer being fired.
  * @timer: pointer to hrtimer being run
  *
@@ -156,7 +162,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
 }
 
 
-/*
+/**
  * alarmtimer_suspend - Suspend time callback
  * @dev: unused
  * @state: unused
@@ -230,17 +236,11 @@ static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
 }
 
 
-/**************************************************************************
- * alarm kernel interface code
- */
-
-/*
+/**
  * alarm_init - Initialize an alarm structure
  * @alarm: ptr to alarm to be initialized
  * @type: the type of the alarm
  * @function: callback that is run when the alarm fires
- *
- * In-kernel interface to initializes the alarm structure.
  */
 void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
 		void (*function)(struct alarm *))
@@ -252,13 +252,11 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
 	alarm->enabled = 0;
 }
 
-/*
+/**
  * alarm_start - Sets an alarm to fire
  * @alarm: ptr to alarm to set
  * @start: time to run the alarm
  * @period: period at which the alarm will recur
- *
- * In-kernel interface set an alarm timer.
  */
 void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period)
 {
@@ -275,11 +273,9 @@ void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period)
 	spin_unlock_irqrestore(&base->lock, flags);
 }
 
-/*
+/**
  * alarm_cancel - Tries to cancel an alarm timer
  * @alarm: ptr to alarm to be canceled
- *
- * In-kernel interface to cancel an alarm timer.
  */
 void alarm_cancel(struct alarm *alarm)
 {
@@ -294,15 +290,9 @@ void alarm_cancel(struct alarm *alarm)
 }
 
 
-/**************************************************************************
- * alarm posix interface code
- */
-
-/*
+/**
  * clock2alarm - helper that converts from clockid to alarmtypes
  * @clockid: clockid.
- *
- * Helper function that converts from clockids to alarmtypes
  */
 static enum alarmtimer_type clock2alarm(clockid_t clockid)
 {
@@ -313,7 +303,7 @@ static enum alarmtimer_type clock2alarm(clockid_t clockid)
 	return -1;
 }
 
-/*
+/**
  * alarm_handle_timer - Callback for posix timers
  * @alarm: alarm that fired
  *
@@ -327,7 +317,7 @@ static void alarm_handle_timer(struct alarm *alarm)
 		ptr->it_overrun++;
 }
 
-/*
+/**
  * alarm_clock_getres - posix getres interface
  * @which_clock: clockid
  * @tp: timespec to fill
@@ -598,9 +588,6 @@ out:
 	return ret;
 }
 
-/**************************************************************************
- * alarmtimer initialization code
- */
 
 /* Suspend hook structures */
 static const struct dev_pm_ops alarmtimer_pm_ops = {
-- 
cgit v1.2.3-71-gd317


From 69c9dd1ecf446ad8a830e4afc539a2a1adc85b78 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 29 Apr 2011 00:36:05 +0200
Subject: PM: Export platform bus type's default PM callbacks

Export the default PM callbacks defined for the platform bus type so
that they can be used by power domains for suspending and resuming
platform devices in the future.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/platform.c         | 72 +++++++++++------------------------------
 include/linux/platform_device.h | 60 ++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 9e0e4fc24c46..313556f28c9e 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -667,7 +667,7 @@ static int platform_legacy_resume(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_prepare(struct device *dev)
+int platform_pm_prepare(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -678,7 +678,7 @@ static int platform_pm_prepare(struct device *dev)
 	return ret;
 }
 
-static void platform_pm_complete(struct device *dev)
+void platform_pm_complete(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 
@@ -686,16 +686,11 @@ static void platform_pm_complete(struct device *dev)
 		drv->pm->complete(dev);
 }
 
-#else /* !CONFIG_PM_SLEEP */
-
-#define platform_pm_prepare		NULL
-#define platform_pm_complete		NULL
-
-#endif /* !CONFIG_PM_SLEEP */
+#endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_SUSPEND
 
-int __weak platform_pm_suspend(struct device *dev)
+int platform_pm_suspend(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -713,7 +708,7 @@ int __weak platform_pm_suspend(struct device *dev)
 	return ret;
 }
 
-int __weak platform_pm_suspend_noirq(struct device *dev)
+int platform_pm_suspend_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -729,7 +724,7 @@ int __weak platform_pm_suspend_noirq(struct device *dev)
 	return ret;
 }
 
-int __weak platform_pm_resume(struct device *dev)
+int platform_pm_resume(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -747,7 +742,7 @@ int __weak platform_pm_resume(struct device *dev)
 	return ret;
 }
 
-int __weak platform_pm_resume_noirq(struct device *dev)
+int platform_pm_resume_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -763,18 +758,11 @@ int __weak platform_pm_resume_noirq(struct device *dev)
 	return ret;
 }
 
-#else /* !CONFIG_SUSPEND */
-
-#define platform_pm_suspend		NULL
-#define platform_pm_resume		NULL
-#define platform_pm_suspend_noirq	NULL
-#define platform_pm_resume_noirq	NULL
-
-#endif /* !CONFIG_SUSPEND */
+#endif /* CONFIG_SUSPEND */
 
 #ifdef CONFIG_HIBERNATE_CALLBACKS
 
-static int platform_pm_freeze(struct device *dev)
+int platform_pm_freeze(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -792,7 +780,7 @@ static int platform_pm_freeze(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_freeze_noirq(struct device *dev)
+int platform_pm_freeze_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -808,7 +796,7 @@ static int platform_pm_freeze_noirq(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_thaw(struct device *dev)
+int platform_pm_thaw(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -826,7 +814,7 @@ static int platform_pm_thaw(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_thaw_noirq(struct device *dev)
+int platform_pm_thaw_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -842,7 +830,7 @@ static int platform_pm_thaw_noirq(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_poweroff(struct device *dev)
+int platform_pm_poweroff(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -860,7 +848,7 @@ static int platform_pm_poweroff(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_poweroff_noirq(struct device *dev)
+int platform_pm_poweroff_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -876,7 +864,7 @@ static int platform_pm_poweroff_noirq(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_restore(struct device *dev)
+int platform_pm_restore(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -894,7 +882,7 @@ static int platform_pm_restore(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_restore_noirq(struct device *dev)
+int platform_pm_restore_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -910,18 +898,7 @@ static int platform_pm_restore_noirq(struct device *dev)
 	return ret;
 }
 
-#else /* !CONFIG_HIBERNATE_CALLBACKS */
-
-#define platform_pm_freeze		NULL
-#define platform_pm_thaw		NULL
-#define platform_pm_poweroff		NULL
-#define platform_pm_restore		NULL
-#define platform_pm_freeze_noirq	NULL
-#define platform_pm_thaw_noirq		NULL
-#define platform_pm_poweroff_noirq	NULL
-#define platform_pm_restore_noirq	NULL
-
-#endif /* !CONFIG_HIBERNATE_CALLBACKS */
+#endif /* CONFIG_HIBERNATE_CALLBACKS */
 
 #ifdef CONFIG_PM_RUNTIME
 
@@ -949,23 +926,10 @@ int __weak platform_pm_runtime_idle(struct device *dev)
 #endif /* !CONFIG_PM_RUNTIME */
 
 static const struct dev_pm_ops platform_dev_pm_ops = {
-	.prepare = platform_pm_prepare,
-	.complete = platform_pm_complete,
-	.suspend = platform_pm_suspend,
-	.resume = platform_pm_resume,
-	.freeze = platform_pm_freeze,
-	.thaw = platform_pm_thaw,
-	.poweroff = platform_pm_poweroff,
-	.restore = platform_pm_restore,
-	.suspend_noirq = platform_pm_suspend_noirq,
-	.resume_noirq = platform_pm_resume_noirq,
-	.freeze_noirq = platform_pm_freeze_noirq,
-	.thaw_noirq = platform_pm_thaw_noirq,
-	.poweroff_noirq = platform_pm_poweroff_noirq,
-	.restore_noirq = platform_pm_restore_noirq,
 	.runtime_suspend = platform_pm_runtime_suspend,
 	.runtime_resume = platform_pm_runtime_resume,
 	.runtime_idle = platform_pm_runtime_idle,
+	USE_PLATFORM_PM_SLEEP_OPS
 };
 
 struct bus_type platform_bus_type = {
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 744942c95fec..e0093e061b08 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -205,4 +205,64 @@ static inline char *early_platform_driver_setup_func(void)		\
 }
 #endif /* MODULE */
 
+#ifdef CONFIG_PM_SLEEP
+extern int platform_pm_prepare(struct device *dev);
+extern void platform_pm_complete(struct device *dev);
+#else
+#define platform_pm_prepare	NULL
+#define platform_pm_complete	NULL
+#endif
+
+#ifdef CONFIG_SUSPEND
+extern int platform_pm_suspend(struct device *dev);
+extern int platform_pm_suspend_noirq(struct device *dev);
+extern int platform_pm_resume(struct device *dev);
+extern int platform_pm_resume_noirq(struct device *dev);
+#else
+#define platform_pm_suspend		NULL
+#define platform_pm_resume		NULL
+#define platform_pm_suspend_noirq	NULL
+#define platform_pm_resume_noirq	NULL
+#endif
+
+#ifdef CONFIG_HIBERNATE_CALLBACKS
+extern int platform_pm_freeze(struct device *dev);
+extern int platform_pm_freeze_noirq(struct device *dev);
+extern int platform_pm_thaw(struct device *dev);
+extern int platform_pm_thaw_noirq(struct device *dev);
+extern int platform_pm_poweroff(struct device *dev);
+extern int platform_pm_poweroff_noirq(struct device *dev);
+extern int platform_pm_restore(struct device *dev);
+extern int platform_pm_restore_noirq(struct device *dev);
+#else
+#define platform_pm_freeze		NULL
+#define platform_pm_thaw		NULL
+#define platform_pm_poweroff		NULL
+#define platform_pm_restore		NULL
+#define platform_pm_freeze_noirq	NULL
+#define platform_pm_thaw_noirq		NULL
+#define platform_pm_poweroff_noirq	NULL
+#define platform_pm_restore_noirq	NULL
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+#define USE_PLATFORM_PM_SLEEP_OPS \
+	.prepare = platform_pm_prepare, \
+	.complete = platform_pm_complete, \
+	.suspend = platform_pm_suspend, \
+	.resume = platform_pm_resume, \
+	.freeze = platform_pm_freeze, \
+	.thaw = platform_pm_thaw, \
+	.poweroff = platform_pm_poweroff, \
+	.restore = platform_pm_restore, \
+	.suspend_noirq = platform_pm_suspend_noirq, \
+	.resume_noirq = platform_pm_resume_noirq, \
+	.freeze_noirq = platform_pm_freeze_noirq, \
+	.thaw_noirq = platform_pm_thaw_noirq, \
+	.poweroff_noirq = platform_pm_poweroff_noirq, \
+	.restore_noirq = platform_pm_restore_noirq,
+#else
+#define USE_PLATFORM_PM_SLEEP_OPS
+#endif
+
 #endif /* _PLATFORM_DEVICE_H_ */
-- 
cgit v1.2.3-71-gd317


From 1d2b71f61b6a10216274e27b717becf9ae101fc7 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 29 Apr 2011 00:36:53 +0200
Subject: PM / Runtime: Add subsystem data field to struct dev_pm_info

Some subsystems need to attach PM-related data to struct device and
they need to use devres for this purpose.  For their convenience
and to make code more straightforward, add a new field called
subsys_data to struct dev_pm_info and let subsystems use it for
attaching PM-related information to devices.

Convert the ARM shmobile platform to using the new field.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/arm/mach-shmobile/pm_runtime.c | 34 +++++++++++++++++-----------------
 include/linux/pm.h                  |  1 +
 2 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-shmobile/pm_runtime.c b/arch/arm/mach-shmobile/pm_runtime.c
index 12bb504c7f49..30bbe9a99ae1 100644
--- a/arch/arm/mach-shmobile/pm_runtime.c
+++ b/arch/arm/mach-shmobile/pm_runtime.c
@@ -18,6 +18,7 @@
 #include <linux/clk.h>
 #include <linux/sh_clk.h>
 #include <linux/bitmap.h>
+#include <linux/slab.h>
 
 #ifdef CONFIG_PM_RUNTIME
 #define BIT_ONCE 0
@@ -29,22 +30,9 @@ struct pm_runtime_data {
 	struct clk *clk;
 };
 
-static void __devres_release(struct device *dev, void *res)
-{
-	struct pm_runtime_data *prd = res;
-
-	dev_dbg(dev, "__devres_release()\n");
-
-	if (test_bit(BIT_CLK_ENABLED, &prd->flags))
-		clk_disable(prd->clk);
-
-	if (test_bit(BIT_ACTIVE, &prd->flags))
-		clk_put(prd->clk);
-}
-
 static struct pm_runtime_data *__to_prd(struct device *dev)
 {
-	return devres_find(dev, __devres_release, NULL, NULL);
+	return dev ? dev->power.subsys_data : NULL;
 }
 
 static void platform_pm_runtime_init(struct device *dev,
@@ -121,14 +109,26 @@ static int platform_bus_notify(struct notifier_block *nb,
 
 	dev_dbg(dev, "platform_bus_notify() %ld !\n", action);
 
-	if (action == BUS_NOTIFY_BIND_DRIVER) {
-		prd = devres_alloc(__devres_release, sizeof(*prd), GFP_KERNEL);
+	switch (action) {
+	case BUS_NOTIFY_BIND_DRIVER:
+		prd = kzalloc(sizeof(*prd), GFP_KERNEL);
 		if (prd) {
-			devres_add(dev, prd);
+			dev->power.subsys_data = prd;
 			dev->pwr_domain = &default_power_domain;
 		} else {
 			dev_err(dev, "unable to alloc memory for runtime pm\n");
 		}
+		break;
+	case BUS_NOTIFY_UNBOUND_DRIVER:
+		prd = __to_prd(dev);
+		if (prd) {
+			if (test_bit(BIT_CLK_ENABLED, &prd->flags))
+				clk_disable(prd->clk);
+
+			if (test_bit(BIT_ACTIVE, &prd->flags))
+				clk_put(prd->clk);
+		}
+		break;
 	}
 
 	return 0;
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 512e09177e57..f4167d0faa67 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -460,6 +460,7 @@ struct dev_pm_info {
 	unsigned long		active_jiffies;
 	unsigned long		suspended_jiffies;
 	unsigned long		accounting_timestamp;
+	void			*subsys_data;  /* Owned by the subsystem. */
 #endif
 };
 
-- 
cgit v1.2.3-71-gd317


From 8f62242246351b5a4bc0c1f00c0c7003edea128a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 29 Apr 2011 13:19:47 +0200
Subject: perf events: Add generic front-end and back-end stalled cycle event
 definitions

Add two generic hardware events: front-end and back-end stalled cycles.

These events measure conditions when the CPU is executing code but its
capabilities are not fully utilized. Understanding such situations and
analyzing them is an important sub-task of code optimization workflows.

Both events limit performance: most front end stalls tend to be caused
by branch misprediction or instruction fetch cachemisses, backend
stalls can be caused by various resource shortages or inefficient
instruction scheduling.

Front-end stalls are the more important ones: code cannot run fast
if the instruction stream is not being kept up.

An over-utilized back-end can cause front-end stalls and thus
has to be kept an eye on as well.

The exact composition is very program logic and instruction mix
dependent.

We use the terms 'stall', 'front-end' and 'back-end' loosely and
try to use the best available events from specific CPUs that
approximate these concepts.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n000io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 2 +-
 include/linux/perf_event.h             | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 1ea94224f62e..393085b87a2c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1414,7 +1414,7 @@ static __init int intel_pmu_init(void)
 		x86_pmu.extra_regs = intel_nehalem_extra_regs;
 
 		/* Install the stalled-cycles event: UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
-		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0x1803fb1;
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
 
 		if (ebx & 0x40) {
 			/*
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ac636dd20a0c..4e2d7ae71499 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -52,7 +52,8 @@ enum perf_hw_id {
 	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
 	PERF_COUNT_HW_BRANCH_MISSES		= 5,
 	PERF_COUNT_HW_BUS_CYCLES		= 6,
-	PERF_COUNT_HW_STALLED_CYCLES		= 7,
+	PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	= 7,
+	PERF_COUNT_HW_STALLED_CYCLES_BACKEND	= 8,
 
 	PERF_COUNT_HW_MAX,			/* non-ABI */
 };
-- 
cgit v1.2.3-71-gd317


From 36504605432996590f889e33d47e2d9c581f7569 Mon Sep 17 00:00:00 2001
From: David Decotigny <decot@google.com>
Date: Wed, 27 Apr 2011 18:32:37 +0000
Subject: ethtool: cosmetics: enforce const-ness in ethtool_cmd_speed

The 'ep' argument of ethtool_cmd_speed is not altered: advertise it in
protoype. +Indentation fix. Also add comments to advise using the
ethtool_cmd_speed API to get/set the link speed.

Signed-off-by: David Decotigny <decot@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 9de31274341d..7e6e0a89ca26 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -24,7 +24,10 @@ struct ethtool_cmd {
 	__u32	cmd;
 	__u32	supported;	/* Features this interface supports */
 	__u32	advertising;	/* Features this interface advertises */
-	__u16	speed;		/* The forced speed, 10Mb, 100Mb, gigabit */
+	__u16	speed;	        /* The forced speed (lower bits) in
+				 * Mbps. Please use
+				 * ethtool_cmd_speed()/_set() to
+				 * access it */
 	__u8	duplex;		/* Duplex, half or full */
 	__u8	port;		/* Which connector port */
 	__u8	phy_address;
@@ -33,7 +36,10 @@ struct ethtool_cmd {
 	__u8	mdio_support;
 	__u32	maxtxpkt;	/* Tx pkts before generating tx int */
 	__u32	maxrxpkt;	/* Rx pkts before generating rx int */
-	__u16	speed_hi;
+	__u16	speed_hi;       /* The forced speed (upper
+				 * bits) in Mbps. Please use
+				 * ethtool_cmd_speed()/_set() to
+				 * access it */
 	__u8	eth_tp_mdix;
 	__u8	reserved2;
 	__u32	lp_advertising;	/* Features the link partner advertises */
@@ -41,14 +47,14 @@ struct ethtool_cmd {
 };
 
 static inline void ethtool_cmd_speed_set(struct ethtool_cmd *ep,
-						__u32 speed)
+					 __u32 speed)
 {
 
 	ep->speed = (__u16)speed;
 	ep->speed_hi = (__u16)(speed >> 16);
 }
 
-static inline __u32 ethtool_cmd_speed(struct ethtool_cmd *ep)
+static inline __u32 ethtool_cmd_speed(const struct ethtool_cmd *ep)
 {
 	return (ep->speed_hi << 16) | ep->speed;
 }
-- 
cgit v1.2.3-71-gd317


From 8ae6daca85c8bbd6a32c382db5e2a2a989f8bed2 Mon Sep 17 00:00:00 2001
From: David Decotigny <decot@google.com>
Date: Wed, 27 Apr 2011 18:32:38 +0000
Subject: ethtool: Call ethtool's get/set_settings callbacks with cleaned data

This makes sure that when a driver calls the ethtool's
get/set_settings() callback of another driver, the data passed to it
is clean. This guarantees that speed_hi will be zeroed correctly if
the called callback doesn't explicitely set it: we are sure we don't
get a corrupted speed from the underlying driver. We also take care of
setting the cmd field appropriately (ETHTOOL_GSET/SSET).

This applies to dev_ethtool_get_settings(), which now makes sure it
sets up that ethtool command parameter correctly before passing it to
drivers. This also means that whoever calls dev_ethtool_get_settings()
does not have to clean the ethtool command parameter. This function
also becomes an exported symbol instead of an inline.

All drivers visible to make allyesconfig under x86_64 have been
updated.

Signed-off-by: David Decotigny <decot@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/mips/txx9/generic/setup_tx4939.c | 21 ++++++++-------------
 drivers/net/e100.c                    |  2 +-
 drivers/net/mdio.c                    |  3 +++
 drivers/net/mii.c                     |  3 +++
 drivers/net/pch_gbe/pch_gbe_main.c    |  6 +++---
 drivers/net/pch_gbe/pch_gbe_phy.c     |  2 +-
 drivers/net/pcnet32.c                 | 16 ++++++++--------
 drivers/net/sfc/mdio_10g.c            |  4 ++--
 drivers/net/stmmac/stmmac_ethtool.c   |  5 ++---
 drivers/net/usb/asix.c                | 28 +++++++++++++++-------------
 drivers/net/usb/dm9601.c              |  6 +++---
 drivers/net/usb/smsc75xx.c            |  7 ++++---
 drivers/net/usb/smsc95xx.c            |  7 ++++---
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c     | 11 +++++++----
 drivers/scsi/fcoe/fcoe.c              | 11 +++++++----
 include/linux/ethtool.h               |  4 +++-
 include/linux/netdevice.h             |  9 ++-------
 include/rdma/ib_addr.h                | 13 +++++++------
 net/core/dev.c                        | 24 ++++++++++++++++++++++++
 net/core/net-sysfs.c                  | 24 ++++++++++--------------
 20 files changed, 117 insertions(+), 89 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c
index 3dc19f482959..e9f95dcde379 100644
--- a/arch/mips/txx9/generic/setup_tx4939.c
+++ b/arch/mips/txx9/generic/setup_tx4939.c
@@ -318,19 +318,15 @@ void __init tx4939_sio_init(unsigned int sclk, unsigned int cts_mask)
 }
 
 #if defined(CONFIG_TC35815) || defined(CONFIG_TC35815_MODULE)
-static int tx4939_get_eth_speed(struct net_device *dev)
+static u32 tx4939_get_eth_speed(struct net_device *dev)
 {
-	struct ethtool_cmd cmd = { ETHTOOL_GSET };
-	int speed = 100;	/* default 100Mbps */
-	int err;
-	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
-		return speed;
-	err = dev->ethtool_ops->get_settings(dev, &cmd);
-	if (err < 0)
-		return speed;
-	speed = cmd.speed == SPEED_100 ? 100 : 10;
-	return speed;
+	struct ethtool_cmd cmd;
+	if (dev_ethtool_get_settings(dev, &cmd))
+		return 100;	/* default 100Mbps */
+
+	return ethtool_cmd_speed(&cmd);
 }
+
 static int tx4939_netdev_event(struct notifier_block *this,
 			       unsigned long event,
 			       void *ptr)
@@ -343,8 +339,7 @@ static int tx4939_netdev_event(struct notifier_block *this,
 		else if (dev->irq == TXX9_IRQ_BASE + TX4939_IR_ETH(1))
 			bit = TX4939_PCFG_SPEED1;
 		if (bit) {
-			int speed = tx4939_get_eth_speed(dev);
-			if (speed == 100)
+			if (tx4939_get_eth_speed(dev) == 100)
 				txx9_set64(&tx4939_ccfgptr->pcfg, bit);
 			else
 				txx9_clear64(&tx4939_ccfgptr->pcfg, bit);
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index b0aa9e68990a..66ba596a4d37 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -1668,7 +1668,7 @@ static void e100_adjust_adaptive_ifs(struct nic *nic, int speed, int duplex)
 static void e100_watchdog(unsigned long data)
 {
 	struct nic *nic = (struct nic *)data;
-	struct ethtool_cmd cmd;
+	struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
 
 	netif_printk(nic, timer, KERN_DEBUG, nic->netdev,
 		     "right now = %ld\n", jiffies);
diff --git a/drivers/net/mdio.c b/drivers/net/mdio.c
index e85bf04cf813..f2d10abd0403 100644
--- a/drivers/net/mdio.c
+++ b/drivers/net/mdio.c
@@ -176,6 +176,9 @@ static u32 mdio45_get_an(const struct mdio_if_info *mdio, u16 addr)
  * @npage_adv: Modes currently advertised on next pages
  * @npage_lpa: Modes advertised by link partner on next pages
  *
+ * The @ecmd parameter is expected to have been cleared before calling
+ * mdio45_ethtool_gset_npage().
+ *
  * Since the CSRs for auto-negotiation using next pages are not fully
  * standardised, this function does not attempt to decode them.  The
  * caller must pass them in.
diff --git a/drivers/net/mii.c b/drivers/net/mii.c
index 0a6c6a2e7550..05acca78f63a 100644
--- a/drivers/net/mii.c
+++ b/drivers/net/mii.c
@@ -58,6 +58,9 @@ static u32 mii_get_an(struct mii_if_info *mii, u16 addr)
  * @mii: MII interface
  * @ecmd: requested ethtool_cmd
  *
+ * The @ecmd parameter is expected to have been cleared before calling
+ * mii_ethtool_gset().
+ *
  * Returns 0 for success, negative on error.
  */
 int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c
index 4cc9872f5ec4..f3e4b0adae93 100644
--- a/drivers/net/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/pch_gbe/pch_gbe_main.c
@@ -888,12 +888,12 @@ static void pch_gbe_watchdog(unsigned long data)
 	struct pch_gbe_adapter *adapter = (struct pch_gbe_adapter *)data;
 	struct net_device *netdev = adapter->netdev;
 	struct pch_gbe_hw *hw = &adapter->hw;
-	struct ethtool_cmd cmd;
 
 	pr_debug("right now = %ld\n", jiffies);
 
 	pch_gbe_update_stats(adapter);
 	if ((mii_link_ok(&adapter->mii)) && (!netif_carrier_ok(netdev))) {
+		struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
 		netdev->tx_queue_len = adapter->tx_queue_len;
 		/* mii library handles link maintenance tasks */
 		if (mii_ethtool_gset(&adapter->mii, &cmd)) {
@@ -903,7 +903,7 @@ static void pch_gbe_watchdog(unsigned long data)
 						PCH_GBE_WATCHDOG_PERIOD));
 			return;
 		}
-		hw->mac.link_speed = cmd.speed;
+		hw->mac.link_speed = ethtool_cmd_speed(&cmd);
 		hw->mac.link_duplex = cmd.duplex;
 		/* Set the RGMII control. */
 		pch_gbe_set_rgmii_ctrl(adapter, hw->mac.link_speed,
@@ -913,7 +913,7 @@ static void pch_gbe_watchdog(unsigned long data)
 				 hw->mac.link_duplex);
 		netdev_dbg(netdev,
 			   "Link is Up %d Mbps %s-Duplex\n",
-			   cmd.speed,
+			   hw->mac.link_speed,
 			   cmd.duplex == DUPLEX_FULL ? "Full" : "Half");
 		netif_carrier_on(netdev);
 		netif_wake_queue(netdev);
diff --git a/drivers/net/pch_gbe/pch_gbe_phy.c b/drivers/net/pch_gbe/pch_gbe_phy.c
index 923a687acd30..9a8207f686fd 100644
--- a/drivers/net/pch_gbe/pch_gbe_phy.c
+++ b/drivers/net/pch_gbe/pch_gbe_phy.c
@@ -247,7 +247,7 @@ inline void pch_gbe_phy_set_rgmii(struct pch_gbe_hw *hw)
 void pch_gbe_phy_init_setting(struct pch_gbe_hw *hw)
 {
 	struct pch_gbe_adapter *adapter;
-	struct ethtool_cmd     cmd;
+	struct ethtool_cmd     cmd = { .cmd = ETHTOOL_GSET };
 	int ret;
 	u16 mii_reg;
 
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index 0a1efbae1bc0..b48aba9e4227 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -2099,7 +2099,7 @@ static int pcnet32_open(struct net_device *dev)
 		int first_phy = -1;
 		u16 bmcr;
 		u32 bcr9;
-		struct ethtool_cmd ecmd;
+		struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 
 		/*
 		 * There is really no good other way to handle multiple PHYs
@@ -2115,9 +2115,9 @@ static int pcnet32_open(struct net_device *dev)
 			ecmd.port = PORT_MII;
 			ecmd.transceiver = XCVR_INTERNAL;
 			ecmd.autoneg = AUTONEG_DISABLE;
-			ecmd.speed =
-			    lp->
-			    options & PCNET32_PORT_100 ? SPEED_100 : SPEED_10;
+			ethtool_cmd_speed_set(&ecmd,
+					      (lp->options & PCNET32_PORT_100) ?
+					      SPEED_100 : SPEED_10);
 			bcr9 = lp->a.read_bcr(ioaddr, 9);
 
 			if (lp->options & PCNET32_PORT_FD) {
@@ -2763,11 +2763,11 @@ static void pcnet32_check_media(struct net_device *dev, int verbose)
 		netif_carrier_on(dev);
 		if (lp->mii) {
 			if (netif_msg_link(lp)) {
-				struct ethtool_cmd ecmd;
+				struct ethtool_cmd ecmd = {
+					.cmd = ETHTOOL_GSET };
 				mii_ethtool_gset(&lp->mii_if, &ecmd);
-				netdev_info(dev, "link up, %sMbps, %s-duplex\n",
-					    (ecmd.speed == SPEED_100)
-					    ? "100" : "10",
+				netdev_info(dev, "link up, %uMbps, %s-duplex\n",
+					    ethtool_cmd_speed(&ecmd),
 					    (ecmd.duplex == DUPLEX_FULL)
 					    ? "full" : "half");
 			}
diff --git a/drivers/net/sfc/mdio_10g.c b/drivers/net/sfc/mdio_10g.c
index 19e68c26d103..71159145b4bf 100644
--- a/drivers/net/sfc/mdio_10g.c
+++ b/drivers/net/sfc/mdio_10g.c
@@ -232,12 +232,12 @@ void efx_mdio_set_mmds_lpower(struct efx_nic *efx,
  */
 int efx_mdio_set_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd)
 {
-	struct ethtool_cmd prev;
+	struct ethtool_cmd prev = { .cmd = ETHTOOL_GSET };
 
 	efx->phy_op->get_settings(efx, &prev);
 
 	if (ecmd->advertising == prev.advertising &&
-	    ecmd->speed == prev.speed &&
+	    ethtool_cmd_speed(ecmd) == ethtool_cmd_speed(&prev) &&
 	    ecmd->duplex == prev.duplex &&
 	    ecmd->port == prev.port &&
 	    ecmd->autoneg == prev.autoneg)
diff --git a/drivers/net/stmmac/stmmac_ethtool.c b/drivers/net/stmmac/stmmac_ethtool.c
index 0e61ac8707cb..6f5aaeb986ff 100644
--- a/drivers/net/stmmac/stmmac_ethtool.c
+++ b/drivers/net/stmmac/stmmac_ethtool.c
@@ -237,13 +237,12 @@ stmmac_set_pauseparam(struct net_device *netdev,
 
 	if (phy->autoneg) {
 		if (netif_running(netdev)) {
-			struct ethtool_cmd cmd;
+			struct ethtool_cmd cmd = { .cmd = ETHTOOL_SSET };
 			/* auto-negotiation automatically restarted */
-			cmd.cmd = ETHTOOL_NWAY_RST;
 			cmd.supported = phy->supported;
 			cmd.advertising = phy->advertising;
 			cmd.autoneg = phy->autoneg;
-			cmd.speed = phy->speed;
+			ethtool_cmd_speed_set(&cmd, phy->speed);
 			cmd.duplex = phy->duplex;
 			cmd.phy_address = phy->addr;
 			ret = phy_ethtool_sset(phy, &cmd);
diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c
index 6140b56cce53..6998aa6b7bb7 100644
--- a/drivers/net/usb/asix.c
+++ b/drivers/net/usb/asix.c
@@ -847,7 +847,7 @@ static void ax88172_set_multicast(struct net_device *net)
 static int ax88172_link_reset(struct usbnet *dev)
 {
 	u8 mode;
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 
 	mii_check_media(&dev->mii, 1, 1);
 	mii_ethtool_gset(&dev->mii, &ecmd);
@@ -856,8 +856,8 @@ static int ax88172_link_reset(struct usbnet *dev)
 	if (ecmd.duplex != DUPLEX_FULL)
 		mode |= ~AX88172_MEDIUM_FD;
 
-	netdev_dbg(dev->net, "ax88172_link_reset() speed: %d duplex: %d setting mode to 0x%04x\n",
-		   ecmd.speed, ecmd.duplex, mode);
+	netdev_dbg(dev->net, "ax88172_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n",
+		   ethtool_cmd_speed(&ecmd), ecmd.duplex, mode);
 
 	asix_write_medium_mode(dev, mode);
 
@@ -947,20 +947,20 @@ static const struct ethtool_ops ax88772_ethtool_ops = {
 static int ax88772_link_reset(struct usbnet *dev)
 {
 	u16 mode;
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 
 	mii_check_media(&dev->mii, 1, 1);
 	mii_ethtool_gset(&dev->mii, &ecmd);
 	mode = AX88772_MEDIUM_DEFAULT;
 
-	if (ecmd.speed != SPEED_100)
+	if (ethtool_cmd_speed(&ecmd) != SPEED_100)
 		mode &= ~AX_MEDIUM_PS;
 
 	if (ecmd.duplex != DUPLEX_FULL)
 		mode &= ~AX_MEDIUM_FD;
 
-	netdev_dbg(dev->net, "ax88772_link_reset() speed: %d duplex: %d setting mode to 0x%04x\n",
-		   ecmd.speed, ecmd.duplex, mode);
+	netdev_dbg(dev->net, "ax88772_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n",
+		   ethtool_cmd_speed(&ecmd), ecmd.duplex, mode);
 
 	asix_write_medium_mode(dev, mode);
 
@@ -1173,18 +1173,20 @@ static int marvell_led_status(struct usbnet *dev, u16 speed)
 static int ax88178_link_reset(struct usbnet *dev)
 {
 	u16 mode;
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 	struct asix_data *data = (struct asix_data *)&dev->data;
+	u32 speed;
 
 	netdev_dbg(dev->net, "ax88178_link_reset()\n");
 
 	mii_check_media(&dev->mii, 1, 1);
 	mii_ethtool_gset(&dev->mii, &ecmd);
 	mode = AX88178_MEDIUM_DEFAULT;
+	speed = ethtool_cmd_speed(&ecmd);
 
-	if (ecmd.speed == SPEED_1000)
+	if (speed == SPEED_1000)
 		mode |= AX_MEDIUM_GM;
-	else if (ecmd.speed == SPEED_100)
+	else if (speed == SPEED_100)
 		mode |= AX_MEDIUM_PS;
 	else
 		mode &= ~(AX_MEDIUM_PS | AX_MEDIUM_GM);
@@ -1196,13 +1198,13 @@ static int ax88178_link_reset(struct usbnet *dev)
 	else
 		mode &= ~AX_MEDIUM_FD;
 
-	netdev_dbg(dev->net, "ax88178_link_reset() speed: %d duplex: %d setting mode to 0x%04x\n",
-		   ecmd.speed, ecmd.duplex, mode);
+	netdev_dbg(dev->net, "ax88178_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n",
+		   speed, ecmd.duplex, mode);
 
 	asix_write_medium_mode(dev, mode);
 
 	if (data->phymode == PHY_MODE_MARVELL && data->ledmode)
-		marvell_led_status(dev, ecmd.speed);
+		marvell_led_status(dev, speed);
 
 	return 0;
 }
diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c
index 5002f5be47be..1d93133e9b74 100644
--- a/drivers/net/usb/dm9601.c
+++ b/drivers/net/usb/dm9601.c
@@ -599,13 +599,13 @@ static void dm9601_status(struct usbnet *dev, struct urb *urb)
 
 static int dm9601_link_reset(struct usbnet *dev)
 {
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 
 	mii_check_media(&dev->mii, 1, 1);
 	mii_ethtool_gset(&dev->mii, &ecmd);
 
-	netdev_dbg(dev->net, "link_reset() speed: %d duplex: %d\n",
-		   ecmd.speed, ecmd.duplex);
+	netdev_dbg(dev->net, "link_reset() speed: %u duplex: %d\n",
+		   ethtool_cmd_speed(&ecmd), ecmd.duplex);
 
 	return 0;
 }
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 860a20c938b4..15b3d6888ae9 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -503,7 +503,7 @@ static int smsc75xx_update_flowcontrol(struct usbnet *dev, u8 duplex,
 static int smsc75xx_link_reset(struct usbnet *dev)
 {
 	struct mii_if_info *mii = &dev->mii;
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 	u16 lcladv, rmtadv;
 	int ret;
 
@@ -519,8 +519,9 @@ static int smsc75xx_link_reset(struct usbnet *dev)
 	lcladv = smsc75xx_mdio_read(dev->net, mii->phy_id, MII_ADVERTISE);
 	rmtadv = smsc75xx_mdio_read(dev->net, mii->phy_id, MII_LPA);
 
-	netif_dbg(dev, link, dev->net, "speed: %d duplex: %d lcladv: %04x"
-		" rmtadv: %04x", ecmd.speed, ecmd.duplex, lcladv, rmtadv);
+	netif_dbg(dev, link, dev->net, "speed: %u duplex: %d lcladv: %04x"
+		  " rmtadv: %04x", ethtool_cmd_speed(&ecmd),
+		  ecmd.duplex, lcladv, rmtadv);
 
 	return smsc75xx_update_flowcontrol(dev, ecmd.duplex, lcladv, rmtadv);
 }
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 24f4b3739dd2..b374a9997908 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -457,7 +457,7 @@ static int smsc95xx_link_reset(struct usbnet *dev)
 {
 	struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
 	struct mii_if_info *mii = &dev->mii;
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 	unsigned long flags;
 	u16 lcladv, rmtadv;
 	u32 intdata;
@@ -472,8 +472,9 @@ static int smsc95xx_link_reset(struct usbnet *dev)
 	lcladv = smsc95xx_mdio_read(dev->net, mii->phy_id, MII_ADVERTISE);
 	rmtadv = smsc95xx_mdio_read(dev->net, mii->phy_id, MII_LPA);
 
-	netif_dbg(dev, link, dev->net, "speed: %d duplex: %d lcladv: %04x rmtadv: %04x\n",
-		  ecmd.speed, ecmd.duplex, lcladv, rmtadv);
+	netif_dbg(dev, link, dev->net,
+		  "speed: %u duplex: %d lcladv: %04x rmtadv: %04x\n",
+		  ethtool_cmd_speed(&ecmd), ecmd.duplex, lcladv, rmtadv);
 
 	spin_lock_irqsave(&pdata->mac_cr_lock, flags);
 	if (ecmd.duplex != DUPLEX_FULL) {
diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index e2e647509a73..cd050196a163 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -664,7 +664,7 @@ static void bnx2fc_link_speed_update(struct fc_lport *lport)
 	struct fcoe_port *port = lport_priv(lport);
 	struct bnx2fc_hba *hba = port->priv;
 	struct net_device *netdev = hba->netdev;
-	struct ethtool_cmd ecmd = { ETHTOOL_GSET };
+	struct ethtool_cmd ecmd;
 
 	if (!dev_ethtool_get_settings(netdev, &ecmd)) {
 		lport->link_supported_speeds &=
@@ -675,12 +675,15 @@ static void bnx2fc_link_speed_update(struct fc_lport *lport)
 		if (ecmd.supported & SUPPORTED_10000baseT_Full)
 			lport->link_supported_speeds |= FC_PORTSPEED_10GBIT;
 
-		if (ecmd.speed == SPEED_1000)
+		switch (ethtool_cmd_speed(&ecmd)) {
+		case SPEED_1000:
 			lport->link_speed = FC_PORTSPEED_1GBIT;
-		if (ecmd.speed == SPEED_10000)
+			break;
+		case SPEED_10000:
 			lport->link_speed = FC_PORTSPEED_10GBIT;
+			break;
+		}
 	}
-	return;
 }
 static int bnx2fc_link_ok(struct fc_lport *lport)
 {
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index bde6ee5333eb..04f346b562da 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -2026,7 +2026,7 @@ out_nodev:
 int fcoe_link_speed_update(struct fc_lport *lport)
 {
 	struct net_device *netdev = fcoe_netdev(lport);
-	struct ethtool_cmd ecmd = { ETHTOOL_GSET };
+	struct ethtool_cmd ecmd;
 
 	if (!dev_ethtool_get_settings(netdev, &ecmd)) {
 		lport->link_supported_speeds &=
@@ -2037,11 +2037,14 @@ int fcoe_link_speed_update(struct fc_lport *lport)
 		if (ecmd.supported & SUPPORTED_10000baseT_Full)
 			lport->link_supported_speeds |=
 				FC_PORTSPEED_10GBIT;
-		if (ecmd.speed == SPEED_1000)
+		switch (ethtool_cmd_speed(&ecmd)) {
+		case SPEED_1000:
 			lport->link_speed = FC_PORTSPEED_1GBIT;
-		if (ecmd.speed == SPEED_10000)
+			break;
+		case SPEED_10000:
 			lport->link_speed = FC_PORTSPEED_10GBIT;
-
+			break;
+		}
 		return 0;
 	}
 	return -1;
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 7e6e0a89ca26..4194a2067a14 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -744,7 +744,9 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
 /**
  * struct ethtool_ops - optional netdev operations
  * @get_settings: Get various device settings including Ethernet link
- *	settings.  Returns a negative error code or zero.
+ *	settings. The @cmd parameter is expected to have been cleared
+ *	before get_settings is called. Returns a negative error code or
+ *	zero.
  * @set_settings: Set various device settings including Ethernet link
  *	settings.  Returns a negative error code or zero.
  * @get_drvinfo: Report driver/device information.  Should only set the
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e03af35843bc..d5de66af46f9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2597,13 +2597,8 @@ static inline int netif_is_bond_slave(struct net_device *dev)
 
 extern struct pernet_operations __net_initdata loopback_net_ops;
 
-static inline int dev_ethtool_get_settings(struct net_device *dev,
-					   struct ethtool_cmd *cmd)
-{
-	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
-		return -EOPNOTSUPP;
-	return dev->ethtool_ops->get_settings(dev, cmd);
-}
+int dev_ethtool_get_settings(struct net_device *dev,
+			     struct ethtool_cmd *cmd);
 
 static inline u32 dev_ethtool_get_rx_csum(struct net_device *dev)
 {
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index b5fc9f39122b..ae8c68f30f1b 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -217,18 +217,19 @@ static inline enum ib_mtu iboe_get_mtu(int mtu)
 static inline int iboe_get_rate(struct net_device *dev)
 {
 	struct ethtool_cmd cmd;
+	u32 speed;
 
-	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings ||
-	    dev->ethtool_ops->get_settings(dev, &cmd))
+	if (dev_ethtool_get_settings(dev, &cmd))
 		return IB_RATE_PORT_CURRENT;
 
-	if (cmd.speed >= 40000)
+	speed = ethtool_cmd_speed(&cmd);
+	if (speed >= 40000)
 		return IB_RATE_40_GBPS;
-	else if (cmd.speed >= 30000)
+	else if (speed >= 30000)
 		return IB_RATE_30_GBPS;
-	else if (cmd.speed >= 20000)
+	else if (speed >= 20000)
 		return IB_RATE_20_GBPS;
-	else if (cmd.speed >= 10000)
+	else if (speed >= 10000)
 		return IB_RATE_10_GBPS;
 	else
 		return IB_RATE_PORT_CURRENT;
diff --git a/net/core/dev.c b/net/core/dev.c
index 7db99b52679f..e95dc30110eb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4495,6 +4495,30 @@ void dev_set_rx_mode(struct net_device *dev)
 	netif_addr_unlock_bh(dev);
 }
 
+/**
+ *	dev_ethtool_get_settings - call device's ethtool_ops::get_settings()
+ *	@dev: device
+ *	@cmd: memory area for ethtool_ops::get_settings() result
+ *
+ *      The cmd arg is initialized properly (cleared and
+ *      ethtool_cmd::cmd field set to ETHTOOL_GSET).
+ *
+ *	Return device's ethtool_ops::get_settings() result value or
+ *	-EOPNOTSUPP when device doesn't expose
+ *	ethtool_ops::get_settings() operation.
+ */
+int dev_ethtool_get_settings(struct net_device *dev,
+			     struct ethtool_cmd *cmd)
+{
+	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
+		return -EOPNOTSUPP;
+
+	memset(cmd, 0, sizeof(struct ethtool_cmd));
+	cmd->cmd = ETHTOOL_GSET;
+	return dev->ethtool_ops->get_settings(dev, cmd);
+}
+EXPORT_SYMBOL(dev_ethtool_get_settings);
+
 /**
  *	dev_get_flags - get flags reported to userspace
  *	@dev: device
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 5ceb257e860c..381813eae46c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -28,6 +28,7 @@
 static const char fmt_hex[] = "%#x\n";
 static const char fmt_long_hex[] = "%#lx\n";
 static const char fmt_dec[] = "%d\n";
+static const char fmt_udec[] = "%u\n";
 static const char fmt_ulong[] = "%lu\n";
 static const char fmt_u64[] = "%llu\n";
 
@@ -145,13 +146,10 @@ static ssize_t show_speed(struct device *dev,
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	if (netif_running(netdev) &&
-	    netdev->ethtool_ops &&
-	    netdev->ethtool_ops->get_settings) {
-		struct ethtool_cmd cmd = { ETHTOOL_GSET };
-
-		if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
-			ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd));
+	if (netif_running(netdev)) {
+		struct ethtool_cmd cmd;
+		if (!dev_ethtool_get_settings(netdev, &cmd))
+			ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd));
 	}
 	rtnl_unlock();
 	return ret;
@@ -166,13 +164,11 @@ static ssize_t show_duplex(struct device *dev,
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	if (netif_running(netdev) &&
-	    netdev->ethtool_ops &&
-	    netdev->ethtool_ops->get_settings) {
-		struct ethtool_cmd cmd = { ETHTOOL_GSET };
-
-		if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
-			ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half");
+	if (netif_running(netdev)) {
+		struct ethtool_cmd cmd;
+		if (!dev_ethtool_get_settings(netdev, &cmd))
+			ret = sprintf(buf, "%s\n",
+				      cmd.duplex ? "full" : "half");
 	}
 	rtnl_unlock();
 	return ret;
-- 
cgit v1.2.3-71-gd317


From 85eb8c8d0b0900c073b0e6f89979ac9c439ade1a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 30 Apr 2011 00:25:44 +0200
Subject: PM / Runtime: Generic clock manipulation rountines for runtime PM
 (v6)

Many different platforms and subsystems may want to disable device
clocks during suspend and enable them during resume which is going to
be done in a very similar way in all those cases.  For this reason,
provide generic routines for the manipulation of device clocks during
suspend and resume.

Convert the ARM shmobile platform to using the new routines.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/arm/mach-shmobile/pm_runtime.c | 140 +-----------
 drivers/base/power/Makefile         |   1 +
 drivers/base/power/clock_ops.c      | 430 ++++++++++++++++++++++++++++++++++++
 include/linux/pm_runtime.h          |  42 ++++
 kernel/power/Kconfig                |   4 +
 5 files changed, 486 insertions(+), 131 deletions(-)
 create mode 100644 drivers/base/power/clock_ops.c

(limited to 'include/linux')

diff --git a/arch/arm/mach-shmobile/pm_runtime.c b/arch/arm/mach-shmobile/pm_runtime.c
index 30bbe9a99ae1..2d1b67a59e4a 100644
--- a/arch/arm/mach-shmobile/pm_runtime.c
+++ b/arch/arm/mach-shmobile/pm_runtime.c
@@ -21,70 +21,6 @@
 #include <linux/slab.h>
 
 #ifdef CONFIG_PM_RUNTIME
-#define BIT_ONCE 0
-#define BIT_ACTIVE 1
-#define BIT_CLK_ENABLED 2
-
-struct pm_runtime_data {
-	unsigned long flags;
-	struct clk *clk;
-};
-
-static struct pm_runtime_data *__to_prd(struct device *dev)
-{
-	return dev ? dev->power.subsys_data : NULL;
-}
-
-static void platform_pm_runtime_init(struct device *dev,
-				     struct pm_runtime_data *prd)
-{
-	if (prd && !test_and_set_bit(BIT_ONCE, &prd->flags)) {
-		prd->clk = clk_get(dev, NULL);
-		if (!IS_ERR(prd->clk)) {
-			set_bit(BIT_ACTIVE, &prd->flags);
-			dev_info(dev, "clocks managed by runtime pm\n");
-		}
-	}
-}
-
-static void platform_pm_runtime_bug(struct device *dev,
-				    struct pm_runtime_data *prd)
-{
-	if (prd && !test_and_set_bit(BIT_ONCE, &prd->flags))
-		dev_err(dev, "runtime pm suspend before resume\n");
-}
-
-static int default_platform_runtime_suspend(struct device *dev)
-{
-	struct pm_runtime_data *prd = __to_prd(dev);
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	platform_pm_runtime_bug(dev, prd);
-
-	if (prd && test_bit(BIT_ACTIVE, &prd->flags)) {
-		clk_disable(prd->clk);
-		clear_bit(BIT_CLK_ENABLED, &prd->flags);
-	}
-
-	return 0;
-}
-
-static int default_platform_runtime_resume(struct device *dev)
-{
-	struct pm_runtime_data *prd = __to_prd(dev);
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	platform_pm_runtime_init(dev, prd);
-
-	if (prd && test_bit(BIT_ACTIVE, &prd->flags)) {
-		clk_enable(prd->clk);
-		set_bit(BIT_CLK_ENABLED, &prd->flags);
-	}
-
-	return 0;
-}
 
 static int default_platform_runtime_idle(struct device *dev)
 {
@@ -94,87 +30,29 @@ static int default_platform_runtime_idle(struct device *dev)
 
 static struct dev_power_domain default_power_domain = {
 	.ops = {
-		.runtime_suspend = default_platform_runtime_suspend,
-		.runtime_resume = default_platform_runtime_resume,
+		.runtime_suspend = pm_runtime_clk_suspend,
+		.runtime_resume = pm_runtime_clk_resume,
 		.runtime_idle = default_platform_runtime_idle,
 		USE_PLATFORM_PM_SLEEP_OPS
 	},
 };
 
-static int platform_bus_notify(struct notifier_block *nb,
-			       unsigned long action, void *data)
-{
-	struct device *dev = data;
-	struct pm_runtime_data *prd;
-
-	dev_dbg(dev, "platform_bus_notify() %ld !\n", action);
-
-	switch (action) {
-	case BUS_NOTIFY_BIND_DRIVER:
-		prd = kzalloc(sizeof(*prd), GFP_KERNEL);
-		if (prd) {
-			dev->power.subsys_data = prd;
-			dev->pwr_domain = &default_power_domain;
-		} else {
-			dev_err(dev, "unable to alloc memory for runtime pm\n");
-		}
-		break;
-	case BUS_NOTIFY_UNBOUND_DRIVER:
-		prd = __to_prd(dev);
-		if (prd) {
-			if (test_bit(BIT_CLK_ENABLED, &prd->flags))
-				clk_disable(prd->clk);
+#define DEFAULT_PWR_DOMAIN_PTR	(&default_power_domain)
 
-			if (test_bit(BIT_ACTIVE, &prd->flags))
-				clk_put(prd->clk);
-		}
-		break;
-	}
+#else
 
-	return 0;
-}
-
-#else /* CONFIG_PM_RUNTIME */
-
-static int platform_bus_notify(struct notifier_block *nb,
-			       unsigned long action, void *data)
-{
-	struct device *dev = data;
-	struct clk *clk;
-
-	dev_dbg(dev, "platform_bus_notify() %ld !\n", action);
-
-	switch (action) {
-	case BUS_NOTIFY_BIND_DRIVER:
-		clk = clk_get(dev, NULL);
-		if (!IS_ERR(clk)) {
-			clk_enable(clk);
-			clk_put(clk);
-			dev_info(dev, "runtime pm disabled, clock forced on\n");
-		}
-		break;
-	case BUS_NOTIFY_UNBOUND_DRIVER:
-		clk = clk_get(dev, NULL);
-		if (!IS_ERR(clk)) {
-			clk_disable(clk);
-			clk_put(clk);
-			dev_info(dev, "runtime pm disabled, clock forced off\n");
-		}
-		break;
-	}
-
-	return 0;
-}
+#define DEFAULT_PWR_DOMAIN_PTR	NULL
 
 #endif /* CONFIG_PM_RUNTIME */
 
-static struct notifier_block platform_bus_notifier = {
-	.notifier_call = platform_bus_notify
+static struct pm_clk_notifier_block platform_bus_notifier = {
+	.pwr_domain = DEFAULT_PWR_DOMAIN_PTR,
+	.con_ids = { NULL, },
 };
 
 static int __init sh_pm_runtime_init(void)
 {
-	bus_register_notifier(&platform_bus_type, &platform_bus_notifier);
+	pm_runtime_clk_add_notifier(&platform_bus_type, &platform_bus_notifier);
 	return 0;
 }
 core_initcall(sh_pm_runtime_init);
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index 118c1b92a511..06a7073f9027 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_PM_SLEEP)	+= main.o wakeup.o
 obj-$(CONFIG_PM_RUNTIME)	+= runtime.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
 obj-$(CONFIG_PM_OPP)	+= opp.o
+obj-$(CONFIG_HAVE_CLK)	+= clock_ops.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
 ccflags-$(CONFIG_PM_VERBOSE)   += -DDEBUG
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
new file mode 100644
index 000000000000..d74abf334391
--- /dev/null
+++ b/drivers/base/power/clock_ops.c
@@ -0,0 +1,430 @@
+/*
+ * drivers/base/power/clock_ops.c - Generic clock manipulation PM callbacks
+ *
+ * Copyright (c) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/clk.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+
+#ifdef CONFIG_PM_RUNTIME
+
+struct pm_runtime_clk_data {
+	struct list_head clock_list;
+	struct mutex lock;
+};
+
+enum pce_status {
+	PCE_STATUS_NONE = 0,
+	PCE_STATUS_ACQUIRED,
+	PCE_STATUS_ENABLED,
+	PCE_STATUS_ERROR,
+};
+
+struct pm_clock_entry {
+	struct list_head node;
+	char *con_id;
+	struct clk *clk;
+	enum pce_status status;
+};
+
+static struct pm_runtime_clk_data *__to_prd(struct device *dev)
+{
+	return dev ? dev->power.subsys_data : NULL;
+}
+
+/**
+ * pm_runtime_clk_add - Start using a device clock for runtime PM.
+ * @dev: Device whose clock is going to be used for runtime PM.
+ * @con_id: Connection ID of the clock.
+ *
+ * Add the clock represented by @con_id to the list of clocks used for
+ * the runtime PM of @dev.
+ */
+int pm_runtime_clk_add(struct device *dev, const char *con_id)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce;
+
+	if (!prd)
+		return -EINVAL;
+
+	ce = kzalloc(sizeof(*ce), GFP_KERNEL);
+	if (!ce) {
+		dev_err(dev, "Not enough memory for clock entry.\n");
+		return -ENOMEM;
+	}
+
+	if (con_id) {
+		ce->con_id = kstrdup(con_id, GFP_KERNEL);
+		if (!ce->con_id) {
+			dev_err(dev,
+				"Not enough memory for clock connection ID.\n");
+			kfree(ce);
+			return -ENOMEM;
+		}
+	}
+
+	mutex_lock(&prd->lock);
+	list_add_tail(&ce->node, &prd->clock_list);
+	mutex_unlock(&prd->lock);
+	return 0;
+}
+
+/**
+ * __pm_runtime_clk_remove - Destroy runtime PM clock entry.
+ * @ce: Runtime PM clock entry to destroy.
+ *
+ * This routine must be called under the mutex protecting the runtime PM list
+ * of clocks corresponding the the @ce's device.
+ */
+static void __pm_runtime_clk_remove(struct pm_clock_entry *ce)
+{
+	if (!ce)
+		return;
+
+	list_del(&ce->node);
+
+	if (ce->status < PCE_STATUS_ERROR) {
+		if (ce->status == PCE_STATUS_ENABLED)
+			clk_disable(ce->clk);
+
+		if (ce->status >= PCE_STATUS_ACQUIRED)
+			clk_put(ce->clk);
+	}
+
+	if (ce->con_id)
+		kfree(ce->con_id);
+
+	kfree(ce);
+}
+
+/**
+ * pm_runtime_clk_remove - Stop using a device clock for runtime PM.
+ * @dev: Device whose clock should not be used for runtime PM any more.
+ * @con_id: Connection ID of the clock.
+ *
+ * Remove the clock represented by @con_id from the list of clocks used for
+ * the runtime PM of @dev.
+ */
+void pm_runtime_clk_remove(struct device *dev, const char *con_id)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce;
+
+	if (!prd)
+		return;
+
+	mutex_lock(&prd->lock);
+
+	list_for_each_entry(ce, &prd->clock_list, node) {
+		if (!con_id && !ce->con_id) {
+			__pm_runtime_clk_remove(ce);
+			break;
+		} else if (!con_id || !ce->con_id) {
+			continue;
+		} else if (!strcmp(con_id, ce->con_id)) {
+			__pm_runtime_clk_remove(ce);
+			break;
+		}
+	}
+
+	mutex_unlock(&prd->lock);
+}
+
+/**
+ * pm_runtime_clk_init - Initialize a device's list of runtime PM clocks.
+ * @dev: Device to initialize the list of runtime PM clocks for.
+ *
+ * Allocate a struct pm_runtime_clk_data object, initialize its lock member and
+ * make the @dev's power.subsys_data field point to it.
+ */
+int pm_runtime_clk_init(struct device *dev)
+{
+	struct pm_runtime_clk_data *prd;
+
+	prd = kzalloc(sizeof(*prd), GFP_KERNEL);
+	if (!prd) {
+		dev_err(dev, "Not enough memory fo runtime PM data.\n");
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&prd->clock_list);
+	mutex_init(&prd->lock);
+	dev->power.subsys_data = prd;
+	return 0;
+}
+
+/**
+ * pm_runtime_clk_destroy - Destroy a device's list of runtime PM clocks.
+ * @dev: Device to destroy the list of runtime PM clocks for.
+ *
+ * Clear the @dev's power.subsys_data field, remove the list of clock entries
+ * from the struct pm_runtime_clk_data object pointed to by it before and free
+ * that object.
+ */
+void pm_runtime_clk_destroy(struct device *dev)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce, *c;
+
+	if (!prd)
+		return;
+
+	dev->power.subsys_data = NULL;
+
+	mutex_lock(&prd->lock);
+
+	list_for_each_entry_safe_reverse(ce, c, &prd->clock_list, node)
+		__pm_runtime_clk_remove(ce);
+
+	mutex_unlock(&prd->lock);
+
+	kfree(prd);
+}
+
+/**
+ * pm_runtime_clk_acquire - Acquire a device clock.
+ * @dev: Device whose clock is to be acquired.
+ * @con_id: Connection ID of the clock.
+ */
+static void pm_runtime_clk_acquire(struct device *dev,
+				    struct pm_clock_entry *ce)
+{
+	ce->clk = clk_get(dev, ce->con_id);
+	if (IS_ERR(ce->clk)) {
+		ce->status = PCE_STATUS_ERROR;
+	} else {
+		ce->status = PCE_STATUS_ACQUIRED;
+		dev_dbg(dev, "Clock %s managed by runtime PM.\n", ce->con_id);
+	}
+}
+
+/**
+ * pm_runtime_clk_suspend - Disable clocks in a device's runtime PM clock list.
+ * @dev: Device to disable the clocks for.
+ */
+int pm_runtime_clk_suspend(struct device *dev)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	if (!prd)
+		return 0;
+
+	mutex_lock(&prd->lock);
+
+	list_for_each_entry_reverse(ce, &prd->clock_list, node) {
+		if (ce->status == PCE_STATUS_NONE)
+			pm_runtime_clk_acquire(dev, ce);
+
+		if (ce->status < PCE_STATUS_ERROR) {
+			clk_disable(ce->clk);
+			ce->status = PCE_STATUS_ACQUIRED;
+		}
+	}
+
+	mutex_unlock(&prd->lock);
+
+	return 0;
+}
+
+/**
+ * pm_runtime_clk_resume - Enable clocks in a device's runtime PM clock list.
+ * @dev: Device to enable the clocks for.
+ */
+int pm_runtime_clk_resume(struct device *dev)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	if (!prd)
+		return 0;
+
+	mutex_lock(&prd->lock);
+
+	list_for_each_entry(ce, &prd->clock_list, node) {
+		if (ce->status == PCE_STATUS_NONE)
+			pm_runtime_clk_acquire(dev, ce);
+
+		if (ce->status < PCE_STATUS_ERROR) {
+			clk_enable(ce->clk);
+			ce->status = PCE_STATUS_ENABLED;
+		}
+	}
+
+	mutex_unlock(&prd->lock);
+
+	return 0;
+}
+
+/**
+ * pm_runtime_clk_notify - Notify routine for device addition and removal.
+ * @nb: Notifier block object this function is a member of.
+ * @action: Operation being carried out by the caller.
+ * @data: Device the routine is being run for.
+ *
+ * For this function to work, @nb must be a member of an object of type
+ * struct pm_clk_notifier_block containing all of the requisite data.
+ * Specifically, the pwr_domain member of that object is copied to the device's
+ * pwr_domain field and its con_ids member is used to populate the device's list
+ * of runtime PM clocks, depending on @action.
+ *
+ * If the device's pwr_domain field is already populated with a value different
+ * from the one stored in the struct pm_clk_notifier_block object, the function
+ * does nothing.
+ */
+static int pm_runtime_clk_notify(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct pm_clk_notifier_block *clknb;
+	struct device *dev = data;
+	char *con_id;
+	int error;
+
+	dev_dbg(dev, "%s() %ld\n", __func__, action);
+
+	clknb = container_of(nb, struct pm_clk_notifier_block, nb);
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		if (dev->pwr_domain)
+			break;
+
+		error = pm_runtime_clk_init(dev);
+		if (error)
+			break;
+
+		dev->pwr_domain = clknb->pwr_domain;
+		if (clknb->con_ids[0]) {
+			for (con_id = clknb->con_ids[0]; *con_id; con_id++)
+				pm_runtime_clk_add(dev, con_id);
+		} else {
+			pm_runtime_clk_add(dev, NULL);
+		}
+
+		break;
+	case BUS_NOTIFY_DEL_DEVICE:
+		if (dev->pwr_domain != clknb->pwr_domain)
+			break;
+
+		dev->pwr_domain = NULL;
+		pm_runtime_clk_destroy(dev);
+		break;
+	}
+
+	return 0;
+}
+
+#else /* !CONFIG_PM_RUNTIME */
+
+/**
+ * enable_clock - Enable a device clock.
+ * @dev: Device whose clock is to be enabled.
+ * @con_id: Connection ID of the clock.
+ */
+static void enable_clock(struct device *dev, const char *con_id)
+{
+	struct clk *clk;
+
+	clk = clk_get(dev, con_id);
+	if (!IS_ERR(clk)) {
+		clk_enable(clk);
+		clk_put(clk);
+		dev_info(dev, "Runtime PM disabled, clock forced on.\n");
+	}
+}
+
+/**
+ * disable_clock - Disable a device clock.
+ * @dev: Device whose clock is to be disabled.
+ * @con_id: Connection ID of the clock.
+ */
+static void disable_clock(struct device *dev, const char *con_id)
+{
+	struct clk *clk;
+
+	clk = clk_get(dev, con_id);
+	if (!IS_ERR(clk)) {
+		clk_disable(clk);
+		clk_put(clk);
+		dev_info(dev, "Runtime PM disabled, clock forced off.\n");
+	}
+}
+
+/**
+ * pm_runtime_clk_notify - Notify routine for device addition and removal.
+ * @nb: Notifier block object this function is a member of.
+ * @action: Operation being carried out by the caller.
+ * @data: Device the routine is being run for.
+ *
+ * For this function to work, @nb must be a member of an object of type
+ * struct pm_clk_notifier_block containing all of the requisite data.
+ * Specifically, the con_ids member of that object is used to enable or disable
+ * the device's clocks, depending on @action.
+ */
+static int pm_runtime_clk_notify(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct pm_clk_notifier_block *clknb;
+	struct device *dev = data;
+
+	dev_dbg(dev, "%s() %ld\n", __func__, action);
+
+	clknb = container_of(nb, struct pm_clk_notifier_block, nb);
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		if (clknb->con_ids[0]) {
+			for (con_id = clknb->con_ids[0]; *con_id; con_id++)
+				enable_clock(dev, con_id);
+		} else {
+			enable_clock(dev, NULL);
+		}
+		break;
+	case BUS_NOTIFY_DEL_DEVICE:
+		if (clknb->con_ids[0]) {
+			for (con_id = clknb->con_ids[0]; *con_id; con_id++)
+				disable_clock(dev, con_id);
+		} else {
+			disable_clock(dev, NULL);
+		}
+		break;
+	}
+
+	return 0;
+}
+
+#endif /* !CONFIG_PM_RUNTIME */
+
+/**
+ * pm_runtime_clk_add_notifier - Add bus type notifier for runtime PM clocks.
+ * @bus: Bus type to add the notifier to.
+ * @clknb: Notifier to be added to the given bus type.
+ *
+ * The nb member of @clknb is not expected to be initialized and its
+ * notifier_call member will be replaced with pm_runtime_clk_notify().  However,
+ * the remaining members of @clknb should be populated prior to calling this
+ * routine.
+ */
+void pm_runtime_clk_add_notifier(struct bus_type *bus,
+				 struct pm_clk_notifier_block *clknb)
+{
+	if (!bus || !clknb)
+		return;
+
+	clknb->nb.notifier_call = pm_runtime_clk_notify;
+	bus_register_notifier(bus, &clknb->nb);
+}
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 8de9aa6e7def..878cf84baeb1 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -245,4 +245,46 @@ static inline void pm_runtime_dont_use_autosuspend(struct device *dev)
 	__pm_runtime_use_autosuspend(dev, false);
 }
 
+struct pm_clk_notifier_block {
+	struct notifier_block nb;
+	struct dev_power_domain *pwr_domain;
+	char *con_ids[];
+};
+
+#ifdef CONFIG_PM_RUNTIME_CLK
+extern int pm_runtime_clk_init(struct device *dev);
+extern void pm_runtime_clk_destroy(struct device *dev);
+extern int pm_runtime_clk_add(struct device *dev, const char *con_id);
+extern void pm_runtime_clk_remove(struct device *dev, const char *con_id);
+extern int pm_runtime_clk_suspend(struct device *dev);
+extern int pm_runtime_clk_resume(struct device *dev);
+#else
+static inline int pm_runtime_clk_init(struct device *dev)
+{
+	return -EINVAL;
+}
+static inline void pm_runtime_clk_destroy(struct device *dev)
+{
+}
+static inline int pm_runtime_clk_add(struct device *dev, const char *con_id)
+{
+	return -EINVAL;
+}
+static inline void pm_runtime_clk_remove(struct device *dev, const char *con_id)
+{
+}
+#define pm_runtime_clock_suspend	NULL
+#define pm_runtime_clock_resume		NULL
+#endif
+
+#ifdef CONFIG_HAVE_CLK
+extern void pm_runtime_clk_add_notifier(struct bus_type *bus,
+					struct pm_clk_notifier_block *clknb);
+#else
+static inline void pm_runtime_clk_add_notifier(struct bus_type *bus,
+					struct pm_clk_notifier_block *clknb)
+{
+}
+#endif
+
 #endif
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 6de9a8fc3417..d74ad4a90695 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -229,3 +229,7 @@ config PM_OPP
 	  representing individual voltage domains and provides SOC
 	  implementations a ready to use framework to manage OPPs.
 	  For more information, read <file:Documentation/power/opp.txt>
+
+config PM_RUNTIME_CLK
+	def_bool y
+	depends on PM_RUNTIME && HAVE_CLK
-- 
cgit v1.2.3-71-gd317


From 6498d9db6d2dad4cf5deb2dd09e0816904f41ca5 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 28 Apr 2011 10:45:24 -0400
Subject: USB: documentation update for the pre_reset method

This patch (as1459) updates the documentation for the pre_reset method
in struct usb_driver.  When a driver is notified of an impending
reset, it must cancel all outstanding I/O and not start any new I/O
until it has been notified that the reset is complete.

As far as I know, most existing drivers that implement pre_reset do
this now.  The major exceptions appear to be the SpeedTouch and
CDC-WDM drivers.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/usb/callbacks.txt | 8 +++++---
 include/linux/usb.h             | 6 ++++--
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/usb/callbacks.txt b/Documentation/usb/callbacks.txt
index bfb36b34b79e..9e85846bdb98 100644
--- a/Documentation/usb/callbacks.txt
+++ b/Documentation/usb/callbacks.txt
@@ -95,9 +95,11 @@ pre_reset
 
 int (*pre_reset)(struct usb_interface *intf);
 
-Another driver or user space is triggering a reset on the device which
-contains the interface passed as an argument. Cease IO and save any
-device state you need to restore.
+A driver or user space is triggering a reset on the device which
+contains the interface passed as an argument. Cease IO, wait for all
+outstanding URBs to complete, and save any device state you need to
+restore.  No more URBs may be submitted until the post_reset method
+is called.
 
 If you need to allocate memory here, use GFP_NOIO or GFP_ATOMIC, if you
 are in atomic context.
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 65f78ca5d88e..73c7df489607 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -806,8 +806,10 @@ struct usbdrv_wrap {
  * @resume: Called when the device is being resumed by the system.
  * @reset_resume: Called when the suspended device has been reset instead
  *	of being resumed.
- * @pre_reset: Called by usb_reset_device() when the device
- *	is about to be reset.
+ * @pre_reset: Called by usb_reset_device() when the device is about to be
+ *	reset.  This routine must not return until the driver has no active
+ *	URBs for the device, and no more URBs may be submitted until the
+ *	post_reset method is called.
  * @post_reset: Called by usb_reset_device() after the device
  *	has been reset
  * @id_table: USB drivers use ID table to support hotplugging.
-- 
cgit v1.2.3-71-gd317


From af32fe511374f17feb137d7fbfe2f4c73a8f531c Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 21 Apr 2011 14:10:16 +0900
Subject: usb: renesas_usbhs: remove callback when module removed.

The callback function which is called from platform must be removed
if module removed.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/renesas_usbhs/common.c | 4 ++++
 include/linux/usb/renesas_usbhs.h  | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
index d9ad60d1c156..fda586d1f7d8 100644
--- a/drivers/usb/renesas_usbhs/common.c
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -352,9 +352,13 @@ probe_end_kfree:
 static int __devexit usbhs_remove(struct platform_device *pdev)
 {
 	struct usbhs_priv *priv = usbhsc_pdev_to_priv(pdev);
+	struct renesas_usbhs_platform_info *info = pdev->dev.platform_data;
+	struct renesas_usbhs_driver_callback *dfunc = &info->driver_callback;
 
 	dev_dbg(&pdev->dev, "usb remove\n");
 
+	dfunc->notify_hotplug = NULL;
+
 	pm_runtime_disable(&pdev->dev);
 
 	usbhsc_bus_ctrl(priv, 0);
diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h
index 565bca3aa440..66bbdd12d153 100644
--- a/include/linux/usb/renesas_usbhs.h
+++ b/include/linux/usb/renesas_usbhs.h
@@ -143,7 +143,7 @@ struct renesas_usbhs_platform_info {
 	({								\
 		struct renesas_usbhs_driver_callback *dc;		\
 		dc = &(renesas_usbhs_get_info(pdev)->driver_callback);	\
-		if (dc)							\
+		if (dc && dc->notify_hotplug)				\
 			dc->notify_hotplug(pdev);			\
 	})
 #endif /* RENESAS_USB_H */
-- 
cgit v1.2.3-71-gd317


From bc57381e634782009b1cb2e86b18013699ada576 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 28 Apr 2011 16:41:14 +0900
Subject: usb: renesas_usbhs: use delayed_work instead of work_struct

This delay is used to overjump debounce.

And, this patch also move usbhsc_drvcllbck_notify_hotplug to global,
because it will be called from other files.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/renesas_usbhs/common.c | 9 +++++----
 drivers/usb/renesas_usbhs/common.h | 3 ++-
 include/linux/usb/renesas_usbhs.h  | 7 +++++++
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
index db13cef9effe..9a75a45687bb 100644
--- a/drivers/usb/renesas_usbhs/common.c
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -178,7 +178,7 @@ static void usbhsc_notify_hotplug(struct work_struct *work)
 {
 	struct usbhs_priv *priv = container_of(work,
 					       struct usbhs_priv,
-					       notify_hotplug_work);
+					       notify_hotplug_work.work);
 	struct platform_device *pdev = usbhs_priv_to_pdev(priv);
 	struct usbhs_mod *mod = usbhs_mod_get_current(priv);
 	int id;
@@ -224,16 +224,17 @@ static void usbhsc_notify_hotplug(struct work_struct *work)
 	}
 }
 
-static int usbhsc_drvcllbck_notify_hotplug(struct platform_device *pdev)
+int usbhsc_drvcllbck_notify_hotplug(struct platform_device *pdev)
 {
 	struct usbhs_priv *priv = usbhs_pdev_to_priv(pdev);
+	int delay = usbhs_get_dparam(priv, detection_delay);
 
 	/*
 	 * This functions will be called in interrupt.
 	 * To make sure safety context,
 	 * use workqueue for usbhs_notify_hotplug
 	 */
-	schedule_work(&priv->notify_hotplug_work);
+	schedule_delayed_work(&priv->notify_hotplug_work, delay);
 	return 0;
 }
 
@@ -300,7 +301,7 @@ static int __devinit usbhs_probe(struct platform_device *pdev)
 	 */
 	priv->irq	= irq;
 	priv->pdev	= pdev;
-	INIT_WORK(&priv->notify_hotplug_work, usbhsc_notify_hotplug);
+	INIT_DELAYED_WORK(&priv->notify_hotplug_work, usbhsc_notify_hotplug);
 	spin_lock_init(usbhs_priv_to_lock(priv));
 
 	/* call pipe and module init */
diff --git a/drivers/usb/renesas_usbhs/common.h b/drivers/usb/renesas_usbhs/common.h
index f3b907d26082..0157eb805cf6 100644
--- a/drivers/usb/renesas_usbhs/common.h
+++ b/drivers/usb/renesas_usbhs/common.h
@@ -177,7 +177,7 @@ struct usbhs_priv {
 	struct renesas_usbhs_platform_callback	*pfunc;
 	struct renesas_usbhs_driver_param	*dparam;
 
-	struct work_struct notify_hotplug_work;
+	struct delayed_work notify_hotplug_work;
 	struct platform_device *pdev;
 
 	spinlock_t		lock;
@@ -200,6 +200,7 @@ u16 usbhs_read(struct usbhs_priv *priv, u32 reg);
 void usbhs_write(struct usbhs_priv *priv, u32 reg, u16 data);
 void usbhs_bset(struct usbhs_priv *priv, u32 reg, u16 mask, u16 data);
 
+int usbhsc_drvcllbck_notify_hotplug(struct platform_device *pdev);
 /*
  * sysconfig
  */
diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h
index 66bbdd12d153..3a7f1d982dd6 100644
--- a/include/linux/usb/renesas_usbhs.h
+++ b/include/linux/usb/renesas_usbhs.h
@@ -103,6 +103,13 @@ struct renesas_usbhs_driver_param {
 	 * for BUSWAIT :: BWAIT
 	 * */
 	int buswait_bwait;
+
+	/*
+	 * option:
+	 *
+	 * delay time from notify_hotplug callback
+	 */
+	int detection_delay;
 };
 
 /*
-- 
cgit v1.2.3-71-gd317


From 45a4a2372b364107cabea79f255b333236626416 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 21 Apr 2011 23:16:46 -0400
Subject: ftrace: Remove FTRACE_FL_FAILED flag

Since we disable all function tracer processing if we detect
that a modification of a instruction had failed, we do not need
to track that the record has failed. No more ftrace processing
is allowed, and the FTRACE_FL_FAILED flag is pointless.

Removing this flag simplifies some of the code, but some ftrace_disabled
checks needed to be added or move around a little.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  9 +++---
 kernel/trace/ftrace.c  | 76 +++++++++++++++++++++++++++++++-------------------
 2 files changed, 51 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ca29e03c1fac..2a195ffd4269 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -147,11 +147,10 @@ extern int ftrace_text_reserved(void *start, void *end);
 
 enum {
 	FTRACE_FL_FREE		= (1 << 0),
-	FTRACE_FL_FAILED	= (1 << 1),
-	FTRACE_FL_FILTER	= (1 << 2),
-	FTRACE_FL_ENABLED	= (1 << 3),
-	FTRACE_FL_NOTRACE	= (1 << 4),
-	FTRACE_FL_CONVERTED	= (1 << 5),
+	FTRACE_FL_FILTER	= (1 << 1),
+	FTRACE_FL_ENABLED	= (1 << 2),
+	FTRACE_FL_NOTRACE	= (1 << 3),
+	FTRACE_FL_CONVERTED	= (1 << 4),
 };
 
 struct dyn_ftrace {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 97b30f818642..eb19fae2c54a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1083,19 +1083,20 @@ static void ftrace_replace_code(int enable)
 	struct ftrace_page *pg;
 	int failed;
 
+	if (unlikely(ftrace_disabled))
+		return;
+
 	do_for_each_ftrace_rec(pg, rec) {
 		/*
 		 * Skip over free records, records that have
 		 * failed and not converted.
 		 */
 		if (rec->flags & FTRACE_FL_FREE ||
-		    rec->flags & FTRACE_FL_FAILED ||
 		    !(rec->flags & FTRACE_FL_CONVERTED))
 			continue;
 
 		failed = __ftrace_replace_code(rec, enable);
 		if (failed) {
-			rec->flags |= FTRACE_FL_FAILED;
 			ftrace_bug(failed, rec->ip);
 			/* Stop processing */
 			return;
@@ -1111,10 +1112,12 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
 
 	ip = rec->ip;
 
+	if (unlikely(ftrace_disabled))
+		return 0;
+
 	ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
 	if (ret) {
 		ftrace_bug(ret, ip);
-		rec->flags |= FTRACE_FL_FAILED;
 		return 0;
 	}
 	return 1;
@@ -1466,6 +1469,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 	struct ftrace_iterator *iter = m->private;
 	struct dyn_ftrace *rec = NULL;
 
+	if (unlikely(ftrace_disabled))
+		return NULL;
+
 	if (iter->flags & FTRACE_ITER_HASH)
 		return t_hash_next(m, pos);
 
@@ -1518,6 +1524,10 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 	loff_t l;
 
 	mutex_lock(&ftrace_lock);
+
+	if (unlikely(ftrace_disabled))
+		return NULL;
+
 	/*
 	 * If an lseek was done, then reset and start from beginning.
 	 */
@@ -1636,8 +1646,6 @@ static void ftrace_filter_reset(int enable)
 	if (enable)
 		ftrace_filtered = 0;
 	do_for_each_ftrace_rec(pg, rec) {
-		if (rec->flags & FTRACE_FL_FAILED)
-			continue;
 		rec->flags &= ~type;
 	} while_for_each_ftrace_rec();
 	mutex_unlock(&ftrace_lock);
@@ -1767,9 +1775,6 @@ static int ftrace_match_records(char *buff, int len, int enable)
 	mutex_lock(&ftrace_lock);
 	do_for_each_ftrace_rec(pg, rec) {
 
-		if (rec->flags & FTRACE_FL_FAILED)
-			continue;
-
 		if (ftrace_match_record(rec, search, search_len, type)) {
 			if (not)
 				rec->flags &= ~flag;
@@ -1837,10 +1842,11 @@ static int ftrace_match_module_records(char *buff, char *mod, int enable)
 	}
 
 	mutex_lock(&ftrace_lock);
-	do_for_each_ftrace_rec(pg, rec) {
 
-		if (rec->flags & FTRACE_FL_FAILED)
-			continue;
+	if (unlikely(ftrace_disabled))
+		goto out_unlock;
+
+	do_for_each_ftrace_rec(pg, rec) {
 
 		if (ftrace_match_module_record(rec, mod,
 					       search, search_len, type)) {
@@ -1854,6 +1860,7 @@ static int ftrace_match_module_records(char *buff, char *mod, int enable)
 			ftrace_filtered = 1;
 
 	} while_for_each_ftrace_rec();
+ out_unlock:
 	mutex_unlock(&ftrace_lock);
 
 	return found;
@@ -2008,10 +2015,11 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 		return -EINVAL;
 
 	mutex_lock(&ftrace_lock);
-	do_for_each_ftrace_rec(pg, rec) {
 
-		if (rec->flags & FTRACE_FL_FAILED)
-			continue;
+	if (unlikely(ftrace_disabled))
+		goto out_unlock;
+
+	do_for_each_ftrace_rec(pg, rec) {
 
 		if (!ftrace_match_record(rec, search, len, type))
 			continue;
@@ -2218,6 +2226,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
 
 	mutex_lock(&ftrace_regex_lock);
 
+	ret = -ENODEV;
+	if (unlikely(ftrace_disabled))
+		goto out_unlock;
+
 	if (file->f_mode & FMODE_READ) {
 		struct seq_file *m = file->private_data;
 		iter = m->private;
@@ -2545,9 +2557,6 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
 	bool exists;
 	int i;
 
-	if (ftrace_disabled)
-		return -ENODEV;
-
 	/* decode regex */
 	type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
 	if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS)
@@ -2556,9 +2565,15 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
 	search_len = strlen(search);
 
 	mutex_lock(&ftrace_lock);
+
+	if (unlikely(ftrace_disabled)) {
+		mutex_unlock(&ftrace_lock);
+		return -ENODEV;
+	}
+
 	do_for_each_ftrace_rec(pg, rec) {
 
-		if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
+		if (rec->flags & FTRACE_FL_FREE)
 			continue;
 
 		if (ftrace_match_record(rec, search, search_len, type)) {
@@ -2700,10 +2715,11 @@ void ftrace_release_mod(struct module *mod)
 	struct dyn_ftrace *rec;
 	struct ftrace_page *pg;
 
+	mutex_lock(&ftrace_lock);
+
 	if (ftrace_disabled)
-		return;
+		goto out_unlock;
 
-	mutex_lock(&ftrace_lock);
 	do_for_each_ftrace_rec(pg, rec) {
 		if (within_module_core(rec->ip, mod)) {
 			/*
@@ -2714,6 +2730,7 @@ void ftrace_release_mod(struct module *mod)
 			ftrace_free_rec(rec);
 		}
 	} while_for_each_ftrace_rec();
+ out_unlock:
 	mutex_unlock(&ftrace_lock);
 }
 
@@ -3108,16 +3125,17 @@ void ftrace_kill(void)
  */
 int register_ftrace_function(struct ftrace_ops *ops)
 {
-	int ret;
-
-	if (unlikely(ftrace_disabled))
-		return -1;
+	int ret = -1;
 
 	mutex_lock(&ftrace_lock);
 
+	if (unlikely(ftrace_disabled))
+		goto out_unlock;
+
 	ret = __register_ftrace_function(ops);
 	ftrace_startup(0);
 
+ out_unlock:
 	mutex_unlock(&ftrace_lock);
 	return ret;
 }
@@ -3145,14 +3163,14 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp,
 		     loff_t *ppos)
 {
-	int ret;
-
-	if (unlikely(ftrace_disabled))
-		return -ENODEV;
+	int ret = -ENODEV;
 
 	mutex_lock(&ftrace_lock);
 
-	ret  = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (unlikely(ftrace_disabled))
+		goto out;
+
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
 
 	if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
 		goto out;
-- 
cgit v1.2.3-71-gd317


From d2c8c3eafbf715306ec891e7ca52d3d999acbe31 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 25 Apr 2011 14:32:42 -0400
Subject: ftrace: Remove FTRACE_FL_CONVERTED flag

Since we disable all function tracer processing if we detect
that a modification of a instruction had failed, we do not need
to track that the record has failed. No more ftrace processing
is allowed, and the FTRACE_FL_CONVERTED flag is pointless.

The FTRACE_FL_CONVERTED flag was used to denote records that were
successfully converted from mcount calls into nops. But if a single
record fails, all of ftrace is disabled.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  1 -
 kernel/trace/ftrace.c  | 12 ++++--------
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 2a195ffd4269..32047449b309 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -150,7 +150,6 @@ enum {
 	FTRACE_FL_FILTER	= (1 << 1),
 	FTRACE_FL_ENABLED	= (1 << 2),
 	FTRACE_FL_NOTRACE	= (1 << 3),
-	FTRACE_FL_CONVERTED	= (1 << 4),
 };
 
 struct dyn_ftrace {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index eb19fae2c54a..9abaaf46f212 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1087,12 +1087,8 @@ static void ftrace_replace_code(int enable)
 		return;
 
 	do_for_each_ftrace_rec(pg, rec) {
-		/*
-		 * Skip over free records, records that have
-		 * failed and not converted.
-		 */
-		if (rec->flags & FTRACE_FL_FREE ||
-		    !(rec->flags & FTRACE_FL_CONVERTED))
+		/* Skip over free records */
+		if (rec->flags & FTRACE_FL_FREE)
 			continue;
 
 		failed = __ftrace_replace_code(rec, enable);
@@ -1280,10 +1276,10 @@ static int ftrace_update_code(struct module *mod)
 		 */
 		if (!ftrace_code_disable(mod, p)) {
 			ftrace_free_rec(p);
-			continue;
+			/* Game over */
+			break;
 		}
 
-		p->flags |= FTRACE_FL_CONVERTED;
 		ftrace_update_cnt++;
 
 		/*
-- 
cgit v1.2.3-71-gd317


From a6e5e2be44616c8400f9ec2f635b10f8e579217c Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 1 May 2011 18:18:49 +0200
Subject: i2c-i801: Move device ID definitions to driver

Move the SMBus device ID definitions of recent devices from pci_ids.h
to the i2c-i801.c driver file. They don't have to be shared, as they
are clearly identified and only used in this driver. In the future,
such IDs will go to i2c-i801 directly. This will make adding support
for new devices much faster and easier, as it will avoid cross-
subsystem patch sets and merge conflicts.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Seth Heasley <seth.heasley@intel.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/i2c/busses/i2c-i801.c | 5 +++++
 include/linux/pci_ids.h       | 4 ----
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 72c0415f6f94..455e909bc768 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -134,10 +134,15 @@
 				 SMBHSTSTS_BUS_ERR | SMBHSTSTS_DEV_ERR | \
 				 SMBHSTSTS_INTR)
 
+/* Older devices have their ID defined in <linux/pci_ids.h> */
+#define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS	0x1c22
+#define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS	0x1d22
 /* Patsburg also has three 'Integrated Device Function' SMBus controllers */
 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF0	0x1d70
 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF1	0x1d71
 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF2	0x1d72
+#define PCI_DEVICE_ID_INTEL_DH89XXCC_SMBUS	0x2330
+#define PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS	0x3b30
 
 struct i801_priv {
 	struct i2c_adapter adapter;
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 4e2c9150a785..8abe8d78c4bf 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2477,15 +2477,12 @@
 #define PCI_DEVICE_ID_INTEL_82840_HB	0x1a21
 #define PCI_DEVICE_ID_INTEL_82845_HB	0x1a30
 #define PCI_DEVICE_ID_INTEL_IOAT	0x1a38
-#define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS	0x1c22
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN	0x1c41
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX	0x1c5f
-#define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS	0x1d22
 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0	0x1d40
 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1	0x1d41
 #define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN	0x2310
 #define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX	0x231f
-#define PCI_DEVICE_ID_INTEL_DH89XXCC_SMBUS	0x2330
 #define PCI_DEVICE_ID_INTEL_82801AA_0	0x2410
 #define PCI_DEVICE_ID_INTEL_82801AA_1	0x2411
 #define PCI_DEVICE_ID_INTEL_82801AA_3	0x2413
@@ -2696,7 +2693,6 @@
 #define PCI_DEVICE_ID_INTEL_ICH10_5	0x3a60
 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN	0x3b00
 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX	0x3b1f
-#define PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS	0x3b30
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
 #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
 #define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
-- 
cgit v1.2.3-71-gd317


From b12a03ce4880bd13786a98db6de494a3e0123129 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 2 May 2011 16:48:57 +0200
Subject: hrtimers: Prepare for cancel on clock was set timers

Make clock_was_set() unconditional and rename hres_timers_resume to
hrtimers_resume. This is a preparatory patch for hrtimers which are
cancelled when clock realtime was set.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h   |  16 ++----
 kernel/hrtimer.c          | 125 ++++++++++++++++++++++------------------------
 kernel/time/timekeeping.c |   2 +-
 3 files changed, 65 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 62f500c724f9..4135c88fe4fa 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -148,9 +148,7 @@ struct hrtimer_clock_base {
 	ktime_t			resolution;
 	ktime_t			(*get_time)(void);
 	ktime_t			softirq_time;
-#ifdef CONFIG_HIGH_RES_TIMERS
 	ktime_t			offset;
-#endif
 };
 
 enum  hrtimer_base_type {
@@ -256,8 +254,6 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
 #ifdef CONFIG_HIGH_RES_TIMERS
 struct clock_event_device;
 
-extern void clock_was_set(void);
-extern void hres_timers_resume(void);
 extern void hrtimer_interrupt(struct clock_event_device *dev);
 
 /*
@@ -291,16 +287,8 @@ extern void hrtimer_peek_ahead_timers(void);
 # define MONOTONIC_RES_NSEC	LOW_RES_NSEC
 # define KTIME_MONOTONIC_RES	KTIME_LOW_RES
 
-/*
- * clock_was_set() is a NOP for non- high-resolution systems. The
- * time-sorted order guarantees that a timer does not expire early and
- * is expired in the next softirq when the clock was advanced.
- */
-static inline void clock_was_set(void) { }
 static inline void hrtimer_peek_ahead_timers(void) { }
 
-static inline void hres_timers_resume(void) { }
-
 /*
  * In non high resolution mode the time reference is taken from
  * the base softirq time variable.
@@ -316,11 +304,13 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
 }
 #endif
 
+extern void clock_was_set(void);
+extern void hrtimers_resume(void);
+
 extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_real(void);
 extern ktime_t ktime_get_boottime(void);
 
-
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
 
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index dbbbf7d43080..c145ed643bca 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -621,66 +621,6 @@ static int hrtimer_reprogram(struct hrtimer *timer,
 	return res;
 }
 
-
-/*
- * Retrigger next event is called after clock was set
- *
- * Called with interrupts disabled via on_each_cpu()
- */
-static void retrigger_next_event(void *arg)
-{
-	struct hrtimer_cpu_base *base;
-	struct timespec realtime_offset, wtm, sleep;
-
-	if (!hrtimer_hres_active())
-		return;
-
-	get_xtime_and_monotonic_and_sleep_offset(&realtime_offset, &wtm,
-							&sleep);
-	set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
-
-	base = &__get_cpu_var(hrtimer_bases);
-
-	/* Adjust CLOCK_REALTIME offset */
-	raw_spin_lock(&base->lock);
-	base->clock_base[HRTIMER_BASE_REALTIME].offset =
-		timespec_to_ktime(realtime_offset);
-	base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
-		timespec_to_ktime(sleep);
-
-	hrtimer_force_reprogram(base, 0);
-	raw_spin_unlock(&base->lock);
-}
-
-/*
- * Clock realtime was set
- *
- * Change the offset of the realtime clock vs. the monotonic
- * clock.
- *
- * We might have to reprogram the high resolution timer interrupt. On
- * SMP we call the architecture specific code to retrigger _all_ high
- * resolution timer interrupts. On UP we just disable interrupts and
- * call the high resolution interrupt code.
- */
-void clock_was_set(void)
-{
-	/* Retrigger the CPU local events everywhere */
-	on_each_cpu(retrigger_next_event, NULL, 1);
-}
-
-/*
- * During resume we might have to reprogram the high resolution timer
- * interrupt (on the local CPU):
- */
-void hres_timers_resume(void)
-{
-	WARN_ONCE(!irqs_disabled(),
-		  KERN_INFO "hres_timers_resume() called with IRQs enabled!");
-
-	retrigger_next_event(NULL);
-}
-
 /*
  * Initialize the high resolution related parts of cpu_base
  */
@@ -714,12 +654,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 	return 0;
 }
 
+static void retrigger_next_event(void *arg);
+
 /*
  * Switch to high resolution mode
  */
 static int hrtimer_switch_to_hres(void)
 {
-	int cpu = smp_processor_id();
+	int i, cpu = smp_processor_id();
 	struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
 	unsigned long flags;
 
@@ -735,9 +677,8 @@ static int hrtimer_switch_to_hres(void)
 		return 0;
 	}
 	base->hres_active = 1;
-	base->clock_base[HRTIMER_BASE_REALTIME].resolution = KTIME_HIGH_RES;
-	base->clock_base[HRTIMER_BASE_MONOTONIC].resolution = KTIME_HIGH_RES;
-	base->clock_base[HRTIMER_BASE_BOOTTIME].resolution = KTIME_HIGH_RES;
+	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
+		base->clock_base[i].resolution = KTIME_HIGH_RES;
 
 	tick_setup_sched_timer();
 
@@ -764,6 +705,62 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
 
 #endif /* CONFIG_HIGH_RES_TIMERS */
 
+/*
+ * Retrigger next event is called after clock was set
+ *
+ * Called with interrupts disabled via on_each_cpu()
+ */
+static void retrigger_next_event(void *arg)
+{
+	struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
+	struct timespec realtime_offset, xtim, wtm, sleep;
+
+	if (!hrtimer_hres_active())
+		return;
+
+	get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
+	set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
+
+	/* Adjust CLOCK_REALTIME offset */
+	raw_spin_lock(&base->lock);
+	base->clock_base[HRTIMER_BASE_REALTIME].offset =
+		timespec_to_ktime(realtime_offset);
+	base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
+		timespec_to_ktime(sleep);
+
+	hrtimer_force_reprogram(base, 0);
+	raw_spin_unlock(&base->lock);
+}
+
+/*
+ * Clock realtime was set
+ *
+ * Change the offset of the realtime clock vs. the monotonic
+ * clock.
+ *
+ * We might have to reprogram the high resolution timer interrupt. On
+ * SMP we call the architecture specific code to retrigger _all_ high
+ * resolution timer interrupts. On UP we just disable interrupts and
+ * call the high resolution interrupt code.
+ */
+void clock_was_set(void)
+{
+	/* Retrigger the CPU local events everywhere */
+	on_each_cpu(retrigger_next_event, NULL, 1);
+}
+
+/*
+ * During resume we might have to reprogram the high resolution timer
+ * interrupt (on the local CPU):
+ */
+void hrtimers_resume(void)
+{
+	WARN_ONCE(!irqs_disabled(),
+		  KERN_INFO "hrtimers_resume() called with IRQs enabled!");
+
+	retrigger_next_event(NULL);
+}
+
 static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
 {
 #ifdef CONFIG_TIMER_STATS
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 8e6a05a5915a..a61b8fa2d39a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -680,7 +680,7 @@ static void timekeeping_resume(void)
 	clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
 
 	/* Resume hrtimers */
-	hres_timers_resume();
+	hrtimers_resume();
 }
 
 static int timekeeping_suspend(void)
-- 
cgit v1.2.3-71-gd317


From 99ee5315dac6211e972fa3f23bcc9a0343ff58c4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 27 Apr 2011 14:16:42 +0200
Subject: timerfd: Allow timers to be cancelled when clock was set

Some applications must be aware of clock realtime being set
backward. A simple example is a clock applet which arms a timer for
the next minute display. If clock realtime is set backward then the
applet displays a stale time for the amount of time which the clock
was set backwards. Due to that applications poll the time because we
don't have an interface.

Extend the timerfd interface by adding a flag which puts the timer
onto a different internal realtime clock. All timers on this clock are
expired whenever the clock was set.

The timerfd core records the monotonic offset when the timer is
created. When the timer is armed, then the current offset is compared
to the previous recorded offset. When it has changed, then
timerfd_settime returns -ECANCELED. When a timer is read the offset is
compared and if it changed -ECANCELED returned to user space. Periodic
timers are not rearmed in the cancelation case.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Chris Friesen <chris.friesen@genband.com>
Tested-by: Kay Sievers <kay.sievers@vrfy.org>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Davide Libenzi <davidel@xmailserver.org>
Reviewed-by: Alexander Shishkin <virtuoso@slind.org>
Link: http://lkml.kernel.org/r/%3Calpine.LFD.2.02.1104271359580.3323%40ionos%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/timerfd.c              | 57 ++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/hrtimer.h   |  2 ++
 include/linux/time.h      |  6 +++++
 include/linux/timerfd.h   |  3 ++-
 kernel/hrtimer.c          | 36 +++++++++++++++++++++++++++++-
 kernel/time/timekeeping.c | 15 +++++++++++++
 6 files changed, 111 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/timerfd.c b/fs/timerfd.c
index 8c4fc1425b3e..7e14c9e7c4ee 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -26,10 +26,12 @@
 struct timerfd_ctx {
 	struct hrtimer tmr;
 	ktime_t tintv;
+	ktime_t moffs;
 	wait_queue_head_t wqh;
 	u64 ticks;
 	int expired;
 	int clockid;
+	bool might_cancel;
 };
 
 /*
@@ -59,24 +61,52 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
 	return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
 }
 
-static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
-			  const struct itimerspec *ktmr)
+static bool timerfd_canceled(struct timerfd_ctx *ctx)
+{
+	ktime_t moffs;
+
+	if (!ctx->might_cancel)
+		return false;
+
+	moffs = ktime_get_monotonic_offset();
+
+	if (moffs.tv64 == ctx->moffs.tv64)
+		return false;
+
+	ctx->moffs = moffs;
+	return true;
+}
+
+static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
+			 const struct itimerspec *ktmr)
 {
 	enum hrtimer_mode htmode;
 	ktime_t texp;
+	int clockid = ctx->clockid;
 
 	htmode = (flags & TFD_TIMER_ABSTIME) ?
 		HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
 
+	ctx->might_cancel = false;
+	if (htmode == HRTIMER_MODE_ABS && ctx->clockid == CLOCK_REALTIME &&
+	    (flags & TFD_TIMER_CANCELON_SET)) {
+		clockid = CLOCK_REALTIME_COS;
+		ctx->might_cancel = true;
+	}
+
 	texp = timespec_to_ktime(ktmr->it_value);
 	ctx->expired = 0;
 	ctx->ticks = 0;
 	ctx->tintv = timespec_to_ktime(ktmr->it_interval);
-	hrtimer_init(&ctx->tmr, ctx->clockid, htmode);
+	hrtimer_init(&ctx->tmr, clockid, htmode);
 	hrtimer_set_expires(&ctx->tmr, texp);
 	ctx->tmr.function = timerfd_tmrproc;
-	if (texp.tv64 != 0)
+	if (texp.tv64 != 0) {
 		hrtimer_start(&ctx->tmr, texp, htmode);
+		if (timerfd_canceled(ctx))
+			return -ECANCELED;
+	}
+	return 0;
 }
 
 static int timerfd_release(struct inode *inode, struct file *file)
@@ -118,8 +148,21 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
 		res = -EAGAIN;
 	else
 		res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
+
 	if (ctx->ticks) {
 		ticks = ctx->ticks;
+
+		/*
+		 * If clock has changed, we do not care about the
+		 * ticks and we do not rearm the timer. Userspace must
+		 * reevaluate anyway.
+		 */
+		if (timerfd_canceled(ctx)) {
+			ticks = 0;
+			ctx->expired = 0;
+			res = -ECANCELED;
+		}
+
 		if (ctx->expired && ctx->tintv.tv64) {
 			/*
 			 * If tintv.tv64 != 0, this is a periodic timer that
@@ -183,6 +226,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
 	init_waitqueue_head(&ctx->wqh);
 	ctx->clockid = clockid;
 	hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
+	ctx->moffs = ktime_get_monotonic_offset();
 
 	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
 			       O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
@@ -199,6 +243,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 	struct file *file;
 	struct timerfd_ctx *ctx;
 	struct itimerspec ktmr, kotmr;
+	int ret;
 
 	if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
 		return -EFAULT;
@@ -240,14 +285,14 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 	/*
 	 * Re-program the timer to the new value ...
 	 */
-	timerfd_setup(ctx, flags, &ktmr);
+	ret = timerfd_setup(ctx, flags, &ktmr);
 
 	spin_unlock_irq(&ctx->wqh.lock);
 	fput(file);
 	if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
 		return -EFAULT;
 
-	return 0;
+	return ret;
 }
 
 SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 4135c88fe4fa..eda4ccde0730 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -155,6 +155,7 @@ enum  hrtimer_base_type {
 	HRTIMER_BASE_REALTIME,
 	HRTIMER_BASE_MONOTONIC,
 	HRTIMER_BASE_BOOTTIME,
+	HRTIMER_BASE_REALTIME_COS,
 	HRTIMER_MAX_CLOCK_BASES,
 };
 
@@ -310,6 +311,7 @@ extern void hrtimers_resume(void);
 extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_real(void);
 extern ktime_t ktime_get_boottime(void);
+extern ktime_t ktime_get_monotonic_offset(void);
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
diff --git a/include/linux/time.h b/include/linux/time.h
index b3061782dec3..a9242773eb24 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -302,6 +302,12 @@ struct itimerval {
  * The IDs of various hardware clocks:
  */
 #define CLOCK_SGI_CYCLE			10
+
+#ifdef __KERNEL__
+/* This clock is not exposed to user space */
+#define CLOCK_REALTIME_COS		15
+#endif
+
 #define MAX_CLOCKS			16
 #define CLOCKS_MASK			(CLOCK_REALTIME | CLOCK_MONOTONIC)
 #define CLOCKS_MONO			CLOCK_MONOTONIC
diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h
index 2d0792983f8c..e9571fc8f1a0 100644
--- a/include/linux/timerfd.h
+++ b/include/linux/timerfd.h
@@ -19,6 +19,7 @@
  * shared O_* flags.
  */
 #define TFD_TIMER_ABSTIME (1 << 0)
+#define TFD_TIMER_CANCELON_SET (1 << 1)
 #define TFD_CLOEXEC O_CLOEXEC
 #define TFD_NONBLOCK O_NONBLOCK
 
@@ -26,6 +27,6 @@
 /* Flags for timerfd_create.  */
 #define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS
 /* Flags for timerfd_settime.  */
-#define TFD_SETTIME_FLAGS TFD_TIMER_ABSTIME
+#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCELON_SET)
 
 #endif /* _LINUX_TIMERFD_H */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index c145ed643bca..eabcbd781433 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -78,6 +78,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
 			.get_time = &ktime_get_boottime,
 			.resolution = KTIME_LOW_RES,
 		},
+		{
+			.index = CLOCK_REALTIME_COS,
+			.get_time = &ktime_get_real,
+			.resolution = KTIME_LOW_RES,
+		},
 	}
 };
 
@@ -85,6 +90,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
 	[CLOCK_REALTIME]	= HRTIMER_BASE_REALTIME,
 	[CLOCK_MONOTONIC]	= HRTIMER_BASE_MONOTONIC,
 	[CLOCK_BOOTTIME]	= HRTIMER_BASE_BOOTTIME,
+	[CLOCK_REALTIME_COS]	= HRTIMER_BASE_REALTIME_COS,
 };
 
 static inline int hrtimer_clockid_to_base(clockid_t clock_id)
@@ -110,6 +116,7 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
 	base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
 	base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
 	base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
+	base->clock_base[HRTIMER_BASE_REALTIME_COS].softirq_time = xtim;
 }
 
 /*
@@ -479,6 +486,8 @@ static inline void debug_deactivate(struct hrtimer *timer)
 	trace_hrtimer_cancel(timer);
 }
 
+static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base);
+
 /* High resolution timer related functions */
 #ifdef CONFIG_HIGH_RES_TIMERS
 
@@ -715,9 +724,14 @@ static void retrigger_next_event(void *arg)
 	struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
 	struct timespec realtime_offset, xtim, wtm, sleep;
 
-	if (!hrtimer_hres_active())
+	if (!hrtimer_hres_active()) {
+		raw_spin_lock(&base->lock);
+		hrtimer_expire_cancelable(base);
+		raw_spin_unlock(&base->lock);
 		return;
+	}
 
+	/* Optimized out for !HIGH_RES */
 	get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
 	set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
 
@@ -727,6 +741,10 @@ static void retrigger_next_event(void *arg)
 		timespec_to_ktime(realtime_offset);
 	base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
 		timespec_to_ktime(sleep);
+	base->clock_base[HRTIMER_BASE_REALTIME_COS].offset =
+		timespec_to_ktime(realtime_offset);
+
+	hrtimer_expire_cancelable(base);
 
 	hrtimer_force_reprogram(base, 0);
 	raw_spin_unlock(&base->lock);
@@ -1222,6 +1240,22 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
 	timer->state &= ~HRTIMER_STATE_CALLBACK;
 }
 
+static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base)
+{
+	struct timerqueue_node *node;
+	struct hrtimer_clock_base *base;
+	ktime_t now = ktime_get_real();
+
+	base = &cpu_base->clock_base[HRTIMER_BASE_REALTIME_COS];
+
+	while ((node = timerqueue_getnext(&base->active))) {
+			struct hrtimer *timer;
+
+			timer = container_of(node, struct hrtimer, node);
+			__run_hrtimer(timer, &now);
+	}
+}
+
 #ifdef CONFIG_HIGH_RES_TIMERS
 
 /*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index a61b8fa2d39a..342408cf68dd 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1098,6 +1098,21 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
 	} while (read_seqretry(&xtime_lock, seq));
 }
 
+/**
+ * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format
+ */
+ktime_t ktime_get_monotonic_offset(void)
+{
+	unsigned long seq;
+	struct timespec wtom;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		wtom = wall_to_monotonic;
+	} while (read_seqretry(&xtime_lock, seq));
+	return timespec_to_ktime(wtom);
+}
+
 /**
  * xtime_update() - advances the timekeeping infrastructure
  * @ticks:	number of ticks, that have elapsed since the last call.
-- 
cgit v1.2.3-71-gd317


From 3dacdf11f1f82b98d301d5e1d42cdaea9a39968a Mon Sep 17 00:00:00 2001
From: Anatolij Gustschin <agust@denx.de>
Date: Fri, 15 Apr 2011 16:18:38 +0200
Subject: usb: factor out state_string() on otg drivers

Provide common otg_state_string() and use
it in drivers.

Signed-off-by: Anatolij Gustschin <agust@denx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/otg/isp1301_omap.c | 26 +++-----------------------
 drivers/usb/otg/langwell_otg.c | 40 +++-------------------------------------
 drivers/usb/otg/otg.c          | 35 +++++++++++++++++++++++++++++++++++
 include/linux/usb/otg.h        |  1 +
 4 files changed, 42 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/otg/isp1301_omap.c b/drivers/usb/otg/isp1301_omap.c
index e25700f44b6f..8c282258e1bd 100644
--- a/drivers/usb/otg/isp1301_omap.c
+++ b/drivers/usb/otg/isp1301_omap.c
@@ -234,29 +234,9 @@ isp1301_clear_bits(struct isp1301 *isp, u8 reg, u8 bits)
 
 /*-------------------------------------------------------------------------*/
 
-static const char *state_string(enum usb_otg_state state)
-{
-	switch (state) {
-	case OTG_STATE_A_IDLE:		return "a_idle";
-	case OTG_STATE_A_WAIT_VRISE:	return "a_wait_vrise";
-	case OTG_STATE_A_WAIT_BCON:	return "a_wait_bcon";
-	case OTG_STATE_A_HOST:		return "a_host";
-	case OTG_STATE_A_SUSPEND:	return "a_suspend";
-	case OTG_STATE_A_PERIPHERAL:	return "a_peripheral";
-	case OTG_STATE_A_WAIT_VFALL:	return "a_wait_vfall";
-	case OTG_STATE_A_VBUS_ERR:	return "a_vbus_err";
-	case OTG_STATE_B_IDLE:		return "b_idle";
-	case OTG_STATE_B_SRP_INIT:	return "b_srp_init";
-	case OTG_STATE_B_PERIPHERAL:	return "b_peripheral";
-	case OTG_STATE_B_WAIT_ACON:	return "b_wait_acon";
-	case OTG_STATE_B_HOST:		return "b_host";
-	default:			return "UNDEFINED";
-	}
-}
-
 static inline const char *state_name(struct isp1301 *isp)
 {
-	return state_string(isp->otg.state);
+	return otg_state_string(isp->otg.state);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -501,7 +481,7 @@ static void check_state(struct isp1301 *isp, const char *tag)
 	if (isp->otg.state == state && !extra)
 		return;
 	pr_debug("otg: %s FSM %s/%02x, %s, %06x\n", tag,
-		state_string(state), fsm, state_name(isp),
+		otg_state_string(state), fsm, state_name(isp),
 		omap_readl(OTG_CTRL));
 }
 
@@ -1095,7 +1075,7 @@ static void isp_update_otg(struct isp1301 *isp, u8 stat)
 
 	if (state != isp->otg.state)
 		pr_debug("  isp, %s -> %s\n",
-				state_string(state), state_name(isp));
+				otg_state_string(state), state_name(isp));
 
 #ifdef	CONFIG_USB_OTG
 	/* update the OTG controller state to match the isp1301; may
diff --git a/drivers/usb/otg/langwell_otg.c b/drivers/usb/otg/langwell_otg.c
index e973ff19c55a..f08f784086f7 100644
--- a/drivers/usb/otg/langwell_otg.c
+++ b/drivers/usb/otg/langwell_otg.c
@@ -82,40 +82,6 @@ static struct pci_driver otg_pci_driver = {
 	.resume =	langwell_otg_resume,
 };
 
-static const char *state_string(enum usb_otg_state state)
-{
-	switch (state) {
-	case OTG_STATE_A_IDLE:
-		return "a_idle";
-	case OTG_STATE_A_WAIT_VRISE:
-		return "a_wait_vrise";
-	case OTG_STATE_A_WAIT_BCON:
-		return "a_wait_bcon";
-	case OTG_STATE_A_HOST:
-		return "a_host";
-	case OTG_STATE_A_SUSPEND:
-		return "a_suspend";
-	case OTG_STATE_A_PERIPHERAL:
-		return "a_peripheral";
-	case OTG_STATE_A_WAIT_VFALL:
-		return "a_wait_vfall";
-	case OTG_STATE_A_VBUS_ERR:
-		return "a_vbus_err";
-	case OTG_STATE_B_IDLE:
-		return "b_idle";
-	case OTG_STATE_B_SRP_INIT:
-		return "b_srp_init";
-	case OTG_STATE_B_PERIPHERAL:
-		return "b_peripheral";
-	case OTG_STATE_B_WAIT_ACON:
-		return "b_wait_acon";
-	case OTG_STATE_B_HOST:
-		return "b_host";
-	default:
-		return "UNDEFINED";
-	}
-}
-
 /* HSM timers */
 static inline struct langwell_otg_timer *otg_timer_initializer
 (void (*function)(unsigned long), unsigned long expires, unsigned long data)
@@ -968,7 +934,7 @@ static void langwell_otg_work(struct work_struct *work)
 	pdev = to_pci_dev(lnw->dev);
 
 	dev_dbg(lnw->dev, "%s: old state = %s\n", __func__,
-			state_string(iotg->otg.state));
+			otg_state_string(iotg->otg.state));
 
 	switch (iotg->otg.state) {
 	case OTG_STATE_UNDEFINED:
@@ -1703,7 +1669,7 @@ static void langwell_otg_work(struct work_struct *work)
 	}
 
 	dev_dbg(lnw->dev, "%s: new state = %s\n", __func__,
-			state_string(iotg->otg.state));
+			otg_state_string(iotg->otg.state));
 }
 
 static ssize_t
@@ -1789,7 +1755,7 @@ show_hsm(struct device *_dev, struct device_attribute *attr, char *buf)
 		"b_bus_req = \t%d\n"
 		"b_bus_suspend_tmout = \t%d\n"
 		"b_bus_suspend_vld = \t%d\n",
-		state_string(iotg->otg.state),
+		otg_state_string(iotg->otg.state),
 		iotg->hsm.a_bus_resume,
 		iotg->hsm.a_bus_suspend,
 		iotg->hsm.a_conn,
diff --git a/drivers/usb/otg/otg.c b/drivers/usb/otg/otg.c
index 0a43a7db750f..fb7adeff9ffa 100644
--- a/drivers/usb/otg/otg.c
+++ b/drivers/usb/otg/otg.c
@@ -64,3 +64,38 @@ int otg_set_transceiver(struct otg_transceiver *x)
 	return 0;
 }
 EXPORT_SYMBOL(otg_set_transceiver);
+
+const char *otg_state_string(enum usb_otg_state state)
+{
+	switch (state) {
+	case OTG_STATE_A_IDLE:
+		return "a_idle";
+	case OTG_STATE_A_WAIT_VRISE:
+		return "a_wait_vrise";
+	case OTG_STATE_A_WAIT_BCON:
+		return "a_wait_bcon";
+	case OTG_STATE_A_HOST:
+		return "a_host";
+	case OTG_STATE_A_SUSPEND:
+		return "a_suspend";
+	case OTG_STATE_A_PERIPHERAL:
+		return "a_peripheral";
+	case OTG_STATE_A_WAIT_VFALL:
+		return "a_wait_vfall";
+	case OTG_STATE_A_VBUS_ERR:
+		return "a_vbus_err";
+	case OTG_STATE_B_IDLE:
+		return "b_idle";
+	case OTG_STATE_B_SRP_INIT:
+		return "b_srp_init";
+	case OTG_STATE_B_PERIPHERAL:
+		return "b_peripheral";
+	case OTG_STATE_B_WAIT_ACON:
+		return "b_wait_acon";
+	case OTG_STATE_B_HOST:
+		return "b_host";
+	default:
+		return "UNDEFINED";
+	}
+}
+EXPORT_SYMBOL(otg_state_string);
diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
index 6e40718f5abe..bc84858b3a4d 100644
--- a/include/linux/usb/otg.h
+++ b/include/linux/usb/otg.h
@@ -246,5 +246,6 @@ otg_unregister_notifier(struct otg_transceiver *otg, struct notifier_block *nb)
 
 /* for OTG controller drivers (and maybe other stuff) */
 extern int usb_bus_start_enum(struct usb_bus *bus, unsigned port_num);
+extern const char *otg_state_string(enum usb_otg_state state);
 
 #endif /* __LINUX_USB_OTG_H */
-- 
cgit v1.2.3-71-gd317


From 64b3c304bed25388fed48dbdc098dfcad7063d9c Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 11 Apr 2011 20:19:12 +0200
Subject: usb/ch9: use proper endianess for wBytesPerInterval

while going through Tatyana's changes for the gadget framework I noticed
that this type is not defined as __le16.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
---
 drivers/usb/core/config.c   | 2 +-
 drivers/usb/host/xhci-mem.c | 2 +-
 include/linux/usb/ch9.h     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 83126b03e7cf..c962608b4b9a 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -129,7 +129,7 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 		max_tx = ep->desc.wMaxPacketSize * (desc->bMaxBurst + 1);
 	else
 		max_tx = 999999;
-	if (desc->wBytesPerInterval > max_tx) {
+	if (le16_to_cpu(desc->wBytesPerInterval) > max_tx) {
 		dev_warn(ddev, "%s endpoint with wBytesPerInterval of %d in "
 				"config %d interface %d altsetting %d ep %d: "
 				"setting to %d\n",
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 500ec7a9eb8a..a4fc4d929385 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1130,7 +1130,7 @@ static u32 xhci_get_max_esit_payload(struct xhci_hcd *xhci,
 		return 0;
 
 	if (udev->speed == USB_SPEED_SUPER)
-		return ep->ss_ep_comp.wBytesPerInterval;
+		return le16_to_cpu(ep->ss_ep_comp.wBytesPerInterval);
 
 	max_packet = GET_MAX_PACKET(le16_to_cpu(ep->desc.wMaxPacketSize));
 	max_burst = (le16_to_cpu(ep->desc.wMaxPacketSize) & 0x1800) >> 11;
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index b72f305ce6bd..0fd3fbdd8283 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -579,7 +579,7 @@ struct usb_ss_ep_comp_descriptor {
 
 	__u8  bMaxBurst;
 	__u8  bmAttributes;
-	__u16 wBytesPerInterval;
+	__le16 wBytesPerInterval;
 } __attribute__ ((packed));
 
 #define USB_DT_SS_EP_COMP_SIZE		6
-- 
cgit v1.2.3-71-gd317


From 13b7ee2a953f07d994b6bc3439cdd4a718de6f80 Mon Sep 17 00:00:00 2001
From: Anatolij Gustschin <agust@denx.de>
Date: Mon, 18 Apr 2011 22:01:55 +0200
Subject: USB: ehci-fsl: add MPC5121E specific suspend and resume

Signed-off-by: Anatolij Gustschin <agust@denx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/host/ehci-fsl.c | 153 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/usb/host/ehci-fsl.h |   4 ++
 include/linux/fsl_devices.h |  15 +++++
 3 files changed, 172 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c
index 5c761df7fa83..caf3d4ac42bd 100644
--- a/drivers/usb/host/ehci-fsl.c
+++ b/drivers/usb/host/ehci-fsl.c
@@ -328,6 +328,149 @@ struct ehci_fsl {
 
 #ifdef CONFIG_PM
 
+#ifdef CONFIG_PPC_MPC512x
+static int ehci_fsl_mpc512x_drv_suspend(struct device *dev)
+{
+	struct usb_hcd *hcd = dev_get_drvdata(dev);
+	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
+	struct fsl_usb2_platform_data *pdata = dev->platform_data;
+	u32 tmp;
+
+#ifdef DEBUG
+	u32 mode = ehci_readl(ehci, hcd->regs + FSL_SOC_USB_USBMODE);
+	mode &= USBMODE_CM_MASK;
+	tmp = ehci_readl(ehci, hcd->regs + 0x140);	/* usbcmd */
+
+	dev_dbg(dev, "suspend=%d already_suspended=%d "
+		"mode=%d  usbcmd %08x\n", pdata->suspended,
+		pdata->already_suspended, mode, tmp);
+#endif
+
+	/*
+	 * If the controller is already suspended, then this must be a
+	 * PM suspend.  Remember this fact, so that we will leave the
+	 * controller suspended at PM resume time.
+	 */
+	if (pdata->suspended) {
+		dev_dbg(dev, "already suspended, leaving early\n");
+		pdata->already_suspended = 1;
+		return 0;
+	}
+
+	dev_dbg(dev, "suspending...\n");
+
+	hcd->state = HC_STATE_SUSPENDED;
+	dev->power.power_state = PMSG_SUSPEND;
+
+	/* ignore non-host interrupts */
+	clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
+
+	/* stop the controller */
+	tmp = ehci_readl(ehci, &ehci->regs->command);
+	tmp &= ~CMD_RUN;
+	ehci_writel(ehci, tmp, &ehci->regs->command);
+
+	/* save EHCI registers */
+	pdata->pm_command = ehci_readl(ehci, &ehci->regs->command);
+	pdata->pm_command &= ~CMD_RUN;
+	pdata->pm_status  = ehci_readl(ehci, &ehci->regs->status);
+	pdata->pm_intr_enable  = ehci_readl(ehci, &ehci->regs->intr_enable);
+	pdata->pm_frame_index  = ehci_readl(ehci, &ehci->regs->frame_index);
+	pdata->pm_segment  = ehci_readl(ehci, &ehci->regs->segment);
+	pdata->pm_frame_list  = ehci_readl(ehci, &ehci->regs->frame_list);
+	pdata->pm_async_next  = ehci_readl(ehci, &ehci->regs->async_next);
+	pdata->pm_configured_flag  =
+		ehci_readl(ehci, &ehci->regs->configured_flag);
+	pdata->pm_portsc = ehci_readl(ehci, &ehci->regs->port_status[0]);
+	pdata->pm_usbgenctrl = ehci_readl(ehci,
+					  hcd->regs + FSL_SOC_USB_USBGENCTRL);
+
+	/* clear the W1C bits */
+	pdata->pm_portsc &= cpu_to_hc32(ehci, ~PORT_RWC_BITS);
+
+	pdata->suspended = 1;
+
+	/* clear PP to cut power to the port */
+	tmp = ehci_readl(ehci, &ehci->regs->port_status[0]);
+	tmp &= ~PORT_POWER;
+	ehci_writel(ehci, tmp, &ehci->regs->port_status[0]);
+
+	return 0;
+}
+
+static int ehci_fsl_mpc512x_drv_resume(struct device *dev)
+{
+	struct usb_hcd *hcd = dev_get_drvdata(dev);
+	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
+	struct fsl_usb2_platform_data *pdata = dev->platform_data;
+	u32 tmp;
+
+	dev_dbg(dev, "suspend=%d already_suspended=%d\n",
+		pdata->suspended, pdata->already_suspended);
+
+	/*
+	 * If the controller was already suspended at suspend time,
+	 * then don't resume it now.
+	 */
+	if (pdata->already_suspended) {
+		dev_dbg(dev, "already suspended, leaving early\n");
+		pdata->already_suspended = 0;
+		return 0;
+	}
+
+	if (!pdata->suspended) {
+		dev_dbg(dev, "not suspended, leaving early\n");
+		return 0;
+	}
+
+	pdata->suspended = 0;
+
+	dev_dbg(dev, "resuming...\n");
+
+	/* set host mode */
+	tmp = USBMODE_CM_HOST | (pdata->es ? USBMODE_ES : 0);
+	ehci_writel(ehci, tmp, hcd->regs + FSL_SOC_USB_USBMODE);
+
+	ehci_writel(ehci, pdata->pm_usbgenctrl,
+		    hcd->regs + FSL_SOC_USB_USBGENCTRL);
+	ehci_writel(ehci, ISIPHYCTRL_PXE | ISIPHYCTRL_PHYE,
+		    hcd->regs + FSL_SOC_USB_ISIPHYCTRL);
+
+	/* restore EHCI registers */
+	ehci_writel(ehci, pdata->pm_command, &ehci->regs->command);
+	ehci_writel(ehci, pdata->pm_intr_enable, &ehci->regs->intr_enable);
+	ehci_writel(ehci, pdata->pm_frame_index, &ehci->regs->frame_index);
+	ehci_writel(ehci, pdata->pm_segment, &ehci->regs->segment);
+	ehci_writel(ehci, pdata->pm_frame_list, &ehci->regs->frame_list);
+	ehci_writel(ehci, pdata->pm_async_next, &ehci->regs->async_next);
+	ehci_writel(ehci, pdata->pm_configured_flag,
+		    &ehci->regs->configured_flag);
+	ehci_writel(ehci, pdata->pm_portsc, &ehci->regs->port_status[0]);
+
+	set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
+	hcd->state = HC_STATE_RUNNING;
+	dev->power.power_state = PMSG_ON;
+
+	tmp = ehci_readl(ehci, &ehci->regs->command);
+	tmp |= CMD_RUN;
+	ehci_writel(ehci, tmp, &ehci->regs->command);
+
+	usb_hcd_resume_root_hub(hcd);
+
+	return 0;
+}
+#else
+static inline int ehci_fsl_mpc512x_drv_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static inline int ehci_fsl_mpc512x_drv_resume(struct device *dev)
+{
+	return 0;
+}
+#endif /* CONFIG_PPC_MPC512x */
+
 static struct ehci_fsl *hcd_to_ehci_fsl(struct usb_hcd *hcd)
 {
 	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
@@ -341,6 +484,11 @@ static int ehci_fsl_drv_suspend(struct device *dev)
 	struct ehci_fsl *ehci_fsl = hcd_to_ehci_fsl(hcd);
 	void __iomem *non_ehci = hcd->regs;
 
+	if (of_device_is_compatible(dev->parent->of_node,
+				    "fsl,mpc5121-usb2-dr")) {
+		return ehci_fsl_mpc512x_drv_suspend(dev);
+	}
+
 	ehci_prepare_ports_for_controller_suspend(hcd_to_ehci(hcd),
 			device_may_wakeup(dev));
 	if (!fsl_deep_sleep())
@@ -357,6 +505,11 @@ static int ehci_fsl_drv_resume(struct device *dev)
 	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
 	void __iomem *non_ehci = hcd->regs;
 
+	if (of_device_is_compatible(dev->parent->of_node,
+				    "fsl,mpc5121-usb2-dr")) {
+		return ehci_fsl_mpc512x_drv_resume(dev);
+	}
+
 	ehci_prepare_ports_for_controller_resume(ehci);
 	if (!fsl_deep_sleep())
 		return 0;
diff --git a/drivers/usb/host/ehci-fsl.h b/drivers/usb/host/ehci-fsl.h
index 3fabed33d940..491806221165 100644
--- a/drivers/usb/host/ehci-fsl.h
+++ b/drivers/usb/host/ehci-fsl.h
@@ -27,6 +27,10 @@
 #define	PORT_PTS_SERIAL		(3<<30)
 #define PORT_PTS_PTW		(1<<28)
 #define FSL_SOC_USB_PORTSC2	0x188
+#define FSL_SOC_USB_USBMODE	0x1a8
+#define USBMODE_CM_MASK		(3 << 0)	/* controller mode mask */
+#define USBMODE_CM_HOST		(3 << 0)	/* controller mode: host */
+#define USBMODE_ES		(1 << 2)	/* (Big) Endian Select */
 
 #define FSL_SOC_USB_USBGENCTRL	0x200
 #define USBGENCTRL_PPP		(1 << 3)
diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index 4eb56ed75fbc..3773c5dab8f5 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -79,6 +79,21 @@ struct fsl_usb2_platform_data {
 	unsigned	have_sysif_regs:1;
 	unsigned	invert_drvvbus:1;
 	unsigned	invert_pwr_fault:1;
+
+	unsigned	suspended:1;
+	unsigned	already_suspended:1;
+
+	/* register save area for suspend/resume */
+	u32		pm_command;
+	u32		pm_status;
+	u32		pm_intr_enable;
+	u32		pm_frame_index;
+	u32		pm_segment;
+	u32		pm_frame_list;
+	u32		pm_async_next;
+	u32		pm_configured_flag;
+	u32		pm_portsc;
+	u32		pm_usbgenctrl;
 };
 
 /* Flags in fsl_usb2_mph_platform_data */
-- 
cgit v1.2.3-71-gd317


From 83722bc9430424de1614ff31696f73a40b3d81a9 Mon Sep 17 00:00:00 2001
From: Anatolij Gustschin <agust@denx.de>
Date: Mon, 18 Apr 2011 22:02:00 +0200
Subject: USB: extend ehci-fsl and fsl_udc_core driver for OTG operation

Signed-off-by: Anatolij Gustschin <agust@denx.de>
Cc: Li Yang <leoli@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/fsl_udc_core.c | 147 ++++++++++++++++++++++++++++++++++----
 drivers/usb/gadget/fsl_usb2_udc.h |   2 +
 drivers/usb/host/ehci-fsl.c       |  66 +++++++++++++++++
 drivers/usb/host/ehci-hub.c       |   8 +++
 drivers/usb/host/ehci.h           |   4 ++
 include/linux/fsl_devices.h       |   1 +
 6 files changed, 213 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/fsl_udc_core.c b/drivers/usb/gadget/fsl_udc_core.c
index 28b3a9f25f3b..999eafe89653 100644
--- a/drivers/usb/gadget/fsl_udc_core.c
+++ b/drivers/usb/gadget/fsl_udc_core.c
@@ -353,6 +353,19 @@ static void dr_controller_stop(struct fsl_udc *udc)
 {
 	unsigned int tmp;
 
+	pr_debug("%s\n", __func__);
+
+	/* if we're in OTG mode, and the Host is currently using the port,
+	 * stop now and don't rip the controller out from under the
+	 * ehci driver
+	 */
+	if (udc->gadget.is_otg) {
+		if (!(fsl_readl(&dr_regs->otgsc) & OTGSC_STS_USB_ID)) {
+			pr_debug("udc: Leaving early\n");
+			return;
+		}
+	}
+
 	/* disable all INTR */
 	fsl_writel(0, &dr_regs->usbintr);
 
@@ -1668,6 +1681,9 @@ static void port_change_irq(struct fsl_udc *udc)
 {
 	u32 speed;
 
+	if (udc->bus_reset)
+		udc->bus_reset = 0;
+
 	/* Bus resetting is finished */
 	if (!(fsl_readl(&dr_regs->portsc1) & PORTSCX_PORT_RESET)) {
 		/* Get the speed */
@@ -1775,6 +1791,8 @@ static void reset_irq(struct fsl_udc *udc)
 
 	if (fsl_readl(&dr_regs->portsc1) & PORTSCX_PORT_RESET) {
 		VDBG("Bus reset");
+		/* Bus is reseting */
+		udc->bus_reset = 1;
 		/* Reset all the queues, include XD, dTD, EP queue
 		 * head and TR Queue */
 		reset_queues(udc);
@@ -1852,6 +1870,7 @@ static irqreturn_t fsl_udc_irq(int irq, void *_udc)
 
 	/* Reset Received */
 	if (irq_src & USB_STS_RESET) {
+		VDBG("reset int");
 		reset_irq(udc);
 		status = IRQ_HANDLED;
 	}
@@ -1909,11 +1928,30 @@ int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
 		goto out;
 	}
 
-	/* Enable DR IRQ reg and Set usbcmd reg  Run bit */
-	dr_controller_run(udc_controller);
-	udc_controller->usb_state = USB_STATE_ATTACHED;
-	udc_controller->ep0_state = WAIT_FOR_SETUP;
-	udc_controller->ep0_dir = 0;
+	if (udc_controller->transceiver) {
+		/* Suspend the controller until OTG enable it */
+		udc_controller->stopped = 1;
+		printk(KERN_INFO "Suspend udc for OTG auto detect\n");
+
+		/* connect to bus through transceiver */
+		if (udc_controller->transceiver) {
+			retval = otg_set_peripheral(udc_controller->transceiver,
+						    &udc_controller->gadget);
+			if (retval < 0) {
+				ERR("can't bind to transceiver\n");
+				driver->unbind(&udc_controller->gadget);
+				udc_controller->gadget.dev.driver = 0;
+				udc_controller->driver = 0;
+				return retval;
+			}
+		}
+	} else {
+		/* Enable DR IRQ reg and set USBCMD reg Run bit */
+		dr_controller_run(udc_controller);
+		udc_controller->usb_state = USB_STATE_ATTACHED;
+		udc_controller->ep0_state = WAIT_FOR_SETUP;
+		udc_controller->ep0_dir = 0;
+	}
 	printk(KERN_INFO "%s: bind to driver %s\n",
 			udc_controller->gadget.name, driver->driver.name);
 
@@ -2374,17 +2412,30 @@ static int __init fsl_udc_probe(struct platform_device *pdev)
 	spin_lock_init(&udc_controller->lock);
 	udc_controller->stopped = 1;
 
+#ifdef CONFIG_USB_OTG
+	if (pdata->operating_mode == FSL_USB2_DR_OTG) {
+		udc_controller->transceiver = otg_get_transceiver();
+		if (!udc_controller->transceiver) {
+			ERR("Can't find OTG driver!\n");
+			ret = -ENODEV;
+			goto err_kfree;
+		}
+	}
+#endif
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
 		ret = -ENXIO;
 		goto err_kfree;
 	}
 
-	if (!request_mem_region(res->start, res->end - res->start + 1,
-				driver_name)) {
-		ERR("request mem region for %s failed\n", pdev->name);
-		ret = -EBUSY;
-		goto err_kfree;
+	if (pdata->operating_mode == FSL_USB2_DR_DEVICE) {
+		if (!request_mem_region(res->start, res->end - res->start + 1,
+					driver_name)) {
+			ERR("request mem region for %s failed\n", pdev->name);
+			ret = -EBUSY;
+			goto err_kfree;
+		}
 	}
 
 	dr_regs = ioremap(res->start, resource_size(res));
@@ -2455,9 +2506,11 @@ static int __init fsl_udc_probe(struct platform_device *pdev)
 		goto err_free_irq;
 	}
 
-	/* initialize usb hw reg except for regs for EP,
-	 * leave usbintr reg untouched */
-	dr_controller_setup(udc_controller);
+	if (!udc_controller->transceiver) {
+		/* initialize usb hw reg except for regs for EP,
+		 * leave usbintr reg untouched */
+		dr_controller_setup(udc_controller);
+	}
 
 	fsl_udc_clk_finalize(pdev);
 
@@ -2477,6 +2530,9 @@ static int __init fsl_udc_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto err_free_irq;
 
+	if (udc_controller->transceiver)
+		udc_controller->gadget.is_otg = 1;
+
 	/* setup QH and epctrl for ep0 */
 	ep0_setup(udc_controller);
 
@@ -2521,7 +2577,8 @@ err_iounmap:
 err_iounmap_noclk:
 	iounmap(dr_regs);
 err_release_mem_region:
-	release_mem_region(res->start, res->end - res->start + 1);
+	if (pdata->operating_mode == FSL_USB2_DR_DEVICE)
+		release_mem_region(res->start, res->end - res->start + 1);
 err_kfree:
 	kfree(udc_controller);
 	udc_controller = NULL;
@@ -2555,7 +2612,8 @@ static int __exit fsl_udc_remove(struct platform_device *pdev)
 	dma_pool_destroy(udc_controller->td_pool);
 	free_irq(udc_controller->irq, udc_controller);
 	iounmap(dr_regs);
-	release_mem_region(res->start, res->end - res->start + 1);
+	if (pdata->operating_mode == FSL_USB2_DR_DEVICE)
+		release_mem_region(res->start, res->end - res->start + 1);
 
 	device_unregister(&udc_controller->gadget.dev);
 	/* free udc --wait for the release() finished */
@@ -2598,6 +2656,62 @@ static int fsl_udc_resume(struct platform_device *pdev)
 	return 0;
 }
 
+static int fsl_udc_otg_suspend(struct device *dev, pm_message_t state)
+{
+	struct fsl_udc *udc = udc_controller;
+	u32 mode, usbcmd;
+
+	mode = fsl_readl(&dr_regs->usbmode) & USB_MODE_CTRL_MODE_MASK;
+
+	pr_debug("%s(): mode 0x%x stopped %d\n", __func__, mode, udc->stopped);
+
+	/*
+	 * If the controller is already stopped, then this must be a
+	 * PM suspend.  Remember this fact, so that we will leave the
+	 * controller stopped at PM resume time.
+	 */
+	if (udc->stopped) {
+		pr_debug("gadget already stopped, leaving early\n");
+		udc->already_stopped = 1;
+		return 0;
+	}
+
+	if (mode != USB_MODE_CTRL_MODE_DEVICE) {
+		pr_debug("gadget not in device mode, leaving early\n");
+		return 0;
+	}
+
+	/* stop the controller */
+	usbcmd = fsl_readl(&dr_regs->usbcmd) & ~USB_CMD_RUN_STOP;
+	fsl_writel(usbcmd, &dr_regs->usbcmd);
+
+	udc->stopped = 1;
+
+	pr_info("USB Gadget suspended\n");
+
+	return 0;
+}
+
+static int fsl_udc_otg_resume(struct device *dev)
+{
+	pr_debug("%s(): stopped %d  already_stopped %d\n", __func__,
+		 udc_controller->stopped, udc_controller->already_stopped);
+
+	/*
+	 * If the controller was stopped at suspend time, then
+	 * don't resume it now.
+	 */
+	if (udc_controller->already_stopped) {
+		udc_controller->already_stopped = 0;
+		pr_debug("gadget was already stopped, leaving early\n");
+		return 0;
+	}
+
+	pr_info("USB Gadget resume\n");
+
+	return fsl_udc_resume(NULL);
+}
+
 /*-------------------------------------------------------------------------
 	Register entry point for the peripheral controller driver
 --------------------------------------------------------------------------*/
@@ -2610,6 +2724,9 @@ static struct platform_driver udc_driver = {
 	.driver  = {
 		.name = (char *)driver_name,
 		.owner = THIS_MODULE,
+		/* udc suspend/resume called from OTG driver */
+		.suspend = fsl_udc_otg_suspend,
+		.resume  = fsl_udc_otg_resume,
 	},
 };
 
diff --git a/drivers/usb/gadget/fsl_usb2_udc.h b/drivers/usb/gadget/fsl_usb2_udc.h
index 5647cc21b84c..1d51be83fda8 100644
--- a/drivers/usb/gadget/fsl_usb2_udc.h
+++ b/drivers/usb/gadget/fsl_usb2_udc.h
@@ -476,6 +476,7 @@ struct fsl_udc {
 	unsigned vbus_active:1;
 	unsigned stopped:1;
 	unsigned remote_wakeup:1;
+	unsigned already_stopped:1;
 	unsigned big_endian_desc:1;
 
 	struct ep_queue_head *ep_qh;	/* Endpoints Queue-Head */
@@ -487,6 +488,7 @@ struct fsl_udc {
 	dma_addr_t ep_qh_dma;		/* dma address of QH */
 
 	u32 max_pipes;          /* Device max pipes */
+	u32 bus_reset;		/* Device is bus resetting */
 	u32 resume_state;	/* USB state to resume */
 	u32 usb_state;		/* USB current state */
 	u32 ep0_state;		/* Endpoint zero state */
diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c
index caf3d4ac42bd..623732a312dd 100644
--- a/drivers/usb/host/ehci-fsl.c
+++ b/drivers/usb/host/ehci-fsl.c
@@ -117,6 +117,9 @@ static int usb_hcd_fsl_probe(const struct hc_driver *driver,
 
 	pdata->regs = hcd->regs;
 
+	if (pdata->power_budget)
+		hcd->power_budget = pdata->power_budget;
+
 	/*
 	 * do platform specific init: check the clock, grab/config pins, etc.
 	 */
@@ -134,6 +137,30 @@ static int usb_hcd_fsl_probe(const struct hc_driver *driver,
 	retval = usb_add_hcd(hcd, irq, IRQF_DISABLED | IRQF_SHARED);
 	if (retval != 0)
 		goto err4;
+
+#ifdef CONFIG_USB_OTG
+	if (pdata->operating_mode == FSL_USB2_DR_OTG) {
+		struct ehci_hcd *ehci = hcd_to_ehci(hcd);
+
+		ehci->transceiver = otg_get_transceiver();
+		dev_dbg(&pdev->dev, "hcd=0x%p  ehci=0x%p, transceiver=0x%p\n",
+			hcd, ehci, ehci->transceiver);
+
+		if (ehci->transceiver) {
+			retval = otg_set_host(ehci->transceiver,
+					      &ehci_to_hcd(ehci)->self);
+			if (retval) {
+				if (ehci->transceiver)
+					put_device(ehci->transceiver->dev);
+				goto err4;
+			}
+		} else {
+			dev_err(&pdev->dev, "can't find transceiver\n");
+			retval = -ENODEV;
+			goto err4;
+		}
+	}
+#endif
 	return retval;
 
       err4:
@@ -164,6 +191,12 @@ static void usb_hcd_fsl_remove(struct usb_hcd *hcd,
 			       struct platform_device *pdev)
 {
 	struct fsl_usb2_platform_data *pdata = pdev->dev.platform_data;
+	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
+
+	if (ehci->transceiver) {
+		otg_set_host(ehci->transceiver, NULL);
+		put_device(ehci->transceiver->dev);
+	}
 
 	usb_remove_hcd(hcd);
 
@@ -544,6 +577,38 @@ static struct dev_pm_ops ehci_fsl_pm_ops = {
 #define EHCI_FSL_PM_OPS		NULL
 #endif /* CONFIG_PM */
 
+#ifdef CONFIG_USB_OTG
+static int ehci_start_port_reset(struct usb_hcd *hcd, unsigned port)
+{
+	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
+	u32 status;
+
+	if (!port)
+		return -EINVAL;
+
+	port--;
+
+	/* start port reset before HNP protocol time out */
+	status = readl(&ehci->regs->port_status[port]);
+	if (!(status & PORT_CONNECT))
+		return -ENODEV;
+
+	/* khubd will finish the reset later */
+	if (ehci_is_TDI(ehci)) {
+		writel(PORT_RESET |
+		       (status & ~(PORT_CSC | PORT_PEC | PORT_OCC)),
+		       &ehci->regs->port_status[port]);
+	} else {
+		writel(PORT_RESET, &ehci->regs->port_status[port]);
+	}
+
+	return 0;
+}
+#else
+#define ehci_start_port_reset	NULL
+#endif /* CONFIG_USB_OTG */
+
+
 static const struct hc_driver ehci_fsl_hc_driver = {
 	.description = hcd_name,
 	.product_desc = "Freescale On-Chip EHCI Host Controller",
@@ -583,6 +648,7 @@ static const struct hc_driver ehci_fsl_hc_driver = {
 	.hub_control = ehci_hub_control,
 	.bus_suspend = ehci_bus_suspend,
 	.bus_resume = ehci_bus_resume,
+	.start_port_reset = ehci_start_port_reset,
 	.relinquish_port = ehci_relinquish_port,
 	.port_handed_over = ehci_port_handed_over,
 
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index 1a21799195af..ea6184bf48d0 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -27,6 +27,7 @@
  */
 
 /*-------------------------------------------------------------------------*/
+#include <linux/usb/otg.h>
 
 #define	PORT_WAKE_BITS	(PORT_WKOC_E|PORT_WKDISC_E|PORT_WKCONN_E)
 
@@ -801,6 +802,13 @@ static int ehci_hub_control (
 				goto error;
 			if (ehci->no_selective_suspend)
 				break;
+#ifdef CONFIG_USB_OTG
+			if ((hcd->self.otg_port == (wIndex + 1))
+			    && hcd->self.b_hnp_enable) {
+				otg_start_hnp(ehci->transceiver);
+				break;
+			}
+#endif
 			if (!(temp & PORT_SUSPEND))
 				break;
 			if ((temp & PORT_PE) == 0)
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index 168f1a88c4d0..e9ba8e252489 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -161,6 +161,10 @@ struct ehci_hcd {			/* one per controller */
 #ifdef DEBUG
 	struct dentry		*debug_dir;
 #endif
+	/*
+	 * OTG controllers and transceivers need software interaction
+	 */
+	struct otg_transceiver	*transceiver;
 };
 
 /* convert between an HCD pointer and the corresponding EHCI_HCD */
diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index 3773c5dab8f5..fffdf00f87b9 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -72,6 +72,7 @@ struct fsl_usb2_platform_data {
 	void		(*exit)(struct platform_device *);
 	void __iomem	*regs;		/* ioremap'd register base */
 	struct clk	*clk;
+	unsigned	power_budget;	/* hcd->power_budget */
 	unsigned	big_endian_mmio:1;
 	unsigned	big_endian_desc:1;
 	unsigned	es:1;		/* need USBMODE:ES */
-- 
cgit v1.2.3-71-gd317


From 139540170d9d9b7ead3caaf540f161756b356d56 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Wed, 27 Apr 2011 21:07:28 +0530
Subject: USB: ehci: remove structure packing from ehci_def

As pointed out by Arnd Bergmann, in include/linux/usb/ehci_def.h, struct
ehci_caps is defined with __attribute__((packed)) for no good reason,
and this triggers undefined behaviour when using ARM's readl() on
pointers to elements of this structure:

http://lkml.kernel.org/r/201102021700.20683.arnd@arndb.de

The same problem exists with the other two structures in ehci_def.h too,
so remove the __attribute__((packed)) from all of them.

Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/ehci_def.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index e49dfd45baa4..78799432008e 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -52,7 +52,7 @@ struct ehci_caps {
 #define HCC_PGM_FRAMELISTLEN(p) ((p)&(1 << 1))  /* true: periodic_size changes*/
 #define HCC_64BIT_ADDR(p)       ((p)&(1))       /* true: can use 64-bit addr */
 	u8		portroute[8];	 /* nibbles for routing - offset 0xC */
-} __attribute__ ((packed));
+};
 
 
 /* Section 2.3 Host Controller Operational Registers */
@@ -150,7 +150,7 @@ struct ehci_regs {
 #define PORT_CSC	(1<<1)		/* connect status change */
 #define PORT_CONNECT	(1<<0)		/* device connected */
 #define PORT_RWC_BITS   (PORT_CSC | PORT_PEC | PORT_OCC)
-} __attribute__ ((packed));
+};
 
 #define USBMODE		0x68		/* USB Device mode */
 #define USBMODE_SDIS	(1<<3)		/* Stream disable */
@@ -194,7 +194,7 @@ struct ehci_dbg_port {
 	u32	data47;
 	u32	address;
 #define DBGP_EPADDR(dev, ep)	(((dev)<<8)|(ep))
-} __attribute__ ((packed));
+};
 
 #ifdef CONFIG_EARLY_PRINTK_DBGP
 #include <linux/init.h>
-- 
cgit v1.2.3-71-gd317


From 61ec9016988f5c030e96e3c8a42ee9e11b8517aa Mon Sep 17 00:00:00 2001
From: John Linn <john.linn@xilinx.com>
Date: Sat, 30 Apr 2011 00:07:43 -0400
Subject: tty/serial: add support for Xilinx PS UART

The Xilinx PS Uart is used on the new ARM based SoC. This
UART is not compatible with others such that a seperate
driver is required.

Signed-off-by: John Linn <john.linn@xilinx.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/Kconfig         |   13 +
 drivers/tty/serial/Makefile        |    1 +
 drivers/tty/serial/xilinx_uartps.c | 1113 ++++++++++++++++++++++++++++++++++++
 include/linux/serial_core.h        |    3 +
 4 files changed, 1130 insertions(+)
 create mode 100644 drivers/tty/serial/xilinx_uartps.c

(limited to 'include/linux')

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 80484af781e1..84876ec2ed97 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1612,4 +1612,17 @@ config SERIAL_MXS_AUART_CONSOLE
 	help
 	  Enable a MXS AUART port to be the system console.
 
+config SERIAL_XILINX_PS_UART
+	tristate "Xilinx PS UART support"
+	select SERIAL_CORE
+	help
+	  This driver supports the Xilinx PS UART port.
+
+config SERIAL_XILINX_PS_UART_CONSOLE
+	bool "Xilinx PS UART console support"
+	depends on SERIAL_XILINX_PS_UART=y
+	select SERIAL_CORE_CONSOLE
+	help
+	  Enable a Xilinx PS UART port to be the system console.
+
 endmenu
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index fee0690ef8e3..aafddf15992f 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -94,3 +94,4 @@ obj-$(CONFIG_SERIAL_IFX6X60)  	+= ifx6x60.o
 obj-$(CONFIG_SERIAL_PCH_UART)	+= pch_uart.o
 obj-$(CONFIG_SERIAL_MSM_SMD)	+= msm_smd_tty.o
 obj-$(CONFIG_SERIAL_MXS_AUART) += mxs-auart.o
+obj-$(CONFIG_SERIAL_XILINX_PS_UART) += xilinx_uartps.o
diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c
new file mode 100644
index 000000000000..19cc1e8149dd
--- /dev/null
+++ b/drivers/tty/serial/xilinx_uartps.c
@@ -0,0 +1,1113 @@
+/*
+ * Xilinx PS UART driver
+ *
+ * 2011 (c) Xilinx Inc.
+ *
+ * This program is free software; you can redistribute it
+ * and/or modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation;
+ * either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ */
+
+#include <linux/platform_device.h>
+#include <linux/serial_core.h>
+#include <linux/console.h>
+#include <linux/serial.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/of.h>
+
+#define XUARTPS_TTY_NAME	"ttyPS"
+#define XUARTPS_NAME		"xuartps"
+#define XUARTPS_MAJOR		0	/* use dynamic node allocation */
+#define XUARTPS_MINOR		0	/* works best with devtmpfs */
+#define XUARTPS_NR_PORTS	2
+#define XUARTPS_FIFO_SIZE	16	/* FIFO size */
+#define XUARTPS_REGISTER_SPACE	0xFFF
+
+#define xuartps_readl(offset)		ioread32(port->membase + offset)
+#define xuartps_writel(val, offset)	iowrite32(val, port->membase + offset)
+
+/********************************Register Map********************************/
+/** UART
+ *
+ * Register offsets for the UART.
+ *
+ */
+#define XUARTPS_CR_OFFSET	0x00  /* Control Register [8:0] */
+#define XUARTPS_MR_OFFSET	0x04  /* Mode Register [10:0] */
+#define XUARTPS_IER_OFFSET	0x08  /* Interrupt Enable [10:0] */
+#define XUARTPS_IDR_OFFSET	0x0C  /* Interrupt Disable [10:0] */
+#define XUARTPS_IMR_OFFSET	0x10  /* Interrupt Mask [10:0] */
+#define XUARTPS_ISR_OFFSET	0x14  /* Interrupt Status [10:0]*/
+#define XUARTPS_BAUDGEN_OFFSET	0x18  /* Baud Rate Generator [15:0] */
+#define XUARTPS_RXTOUT_OFFSET	0x1C  /* RX Timeout [7:0] */
+#define XUARTPS_RXWM_OFFSET	0x20  /* RX FIFO Trigger Level [5:0] */
+#define XUARTPS_MODEMCR_OFFSET	0x24  /* Modem Control [5:0] */
+#define XUARTPS_MODEMSR_OFFSET	0x28  /* Modem Status [8:0] */
+#define XUARTPS_SR_OFFSET	0x2C  /* Channel Status [11:0] */
+#define XUARTPS_FIFO_OFFSET	0x30  /* FIFO [15:0] or [7:0] */
+#define XUARTPS_BAUDDIV_OFFSET	0x34  /* Baud Rate Divider [7:0] */
+#define XUARTPS_FLOWDEL_OFFSET	0x38  /* Flow Delay [15:0] */
+#define XUARTPS_IRRX_PWIDTH_OFFSET 0x3C /* IR Minimum Received Pulse
+						Width [15:0] */
+#define XUARTPS_IRTX_PWIDTH_OFFSET 0x40 /* IR Transmitted pulse
+						Width [7:0] */
+#define XUARTPS_TXWM_OFFSET	0x44  /* TX FIFO Trigger Level [5:0] */
+
+/** Control Register
+ *
+ * The Control register (CR) controls the major functions of the device.
+ *
+ * Control Register Bit Definitions
+ */
+#define XUARTPS_CR_STOPBRK	0x00000100  /* Stop TX break */
+#define XUARTPS_CR_STARTBRK	0x00000080  /* Set TX break */
+#define XUARTPS_CR_TX_DIS	0x00000020  /* TX disabled. */
+#define XUARTPS_CR_TX_EN	0x00000010  /* TX enabled */
+#define XUARTPS_CR_RX_DIS	0x00000008  /* RX disabled. */
+#define XUARTPS_CR_RX_EN	0x00000004  /* RX enabled */
+#define XUARTPS_CR_TXRST	0x00000002  /* TX logic reset */
+#define XUARTPS_CR_RXRST	0x00000001  /* RX logic reset */
+#define XUARTPS_CR_RST_TO	0x00000040  /* Restart Timeout Counter */
+
+/** Mode Register
+ *
+ * The mode register (MR) defines the mode of transfer as well as the data
+ * format. If this register is modified during transmission or reception,
+ * data validity cannot be guaranteed.
+ *
+ * Mode Register Bit Definitions
+ *
+ */
+#define XUARTPS_MR_CLKSEL		0x00000001  /* Pre-scalar selection */
+#define XUARTPS_MR_CHMODE_L_LOOP	0x00000200  /* Local loop back mode */
+#define XUARTPS_MR_CHMODE_NORM		0x00000000  /* Normal mode */
+
+#define XUARTPS_MR_STOPMODE_2_BIT	0x00000080  /* 2 stop bits */
+#define XUARTPS_MR_STOPMODE_1_BIT	0x00000000  /* 1 stop bit */
+
+#define XUARTPS_MR_PARITY_NONE		0x00000020  /* No parity mode */
+#define XUARTPS_MR_PARITY_MARK		0x00000018  /* Mark parity mode */
+#define XUARTPS_MR_PARITY_SPACE		0x00000010  /* Space parity mode */
+#define XUARTPS_MR_PARITY_ODD		0x00000008  /* Odd parity mode */
+#define XUARTPS_MR_PARITY_EVEN		0x00000000  /* Even parity mode */
+
+#define XUARTPS_MR_CHARLEN_6_BIT	0x00000006  /* 6 bits data */
+#define XUARTPS_MR_CHARLEN_7_BIT	0x00000004  /* 7 bits data */
+#define XUARTPS_MR_CHARLEN_8_BIT	0x00000000  /* 8 bits data */
+
+/** Interrupt Registers
+ *
+ * Interrupt control logic uses the interrupt enable register (IER) and the
+ * interrupt disable register (IDR) to set the value of the bits in the
+ * interrupt mask register (IMR). The IMR determines whether to pass an
+ * interrupt to the interrupt status register (ISR).
+ * Writing a 1 to IER Enables an interrupt, writing a 1 to IDR disables an
+ * interrupt. IMR and ISR are read only, and IER and IDR are write only.
+ * Reading either IER or IDR returns 0x00.
+ *
+ * All four registers have the same bit definitions.
+ */
+#define XUARTPS_IXR_TOUT	0x00000100 /* RX Timeout error interrupt */
+#define XUARTPS_IXR_PARITY	0x00000080 /* Parity error interrupt */
+#define XUARTPS_IXR_FRAMING	0x00000040 /* Framing error interrupt */
+#define XUARTPS_IXR_OVERRUN	0x00000020 /* Overrun error interrupt */
+#define XUARTPS_IXR_TXFULL	0x00000010 /* TX FIFO Full interrupt */
+#define XUARTPS_IXR_TXEMPTY	0x00000008 /* TX FIFO empty interrupt */
+#define XUARTPS_ISR_RXEMPTY	0x00000002 /* RX FIFO empty interrupt */
+#define XUARTPS_IXR_RXTRIG	0x00000001 /* RX FIFO trigger interrupt */
+#define XUARTPS_IXR_RXFULL	0x00000004 /* RX FIFO full interrupt. */
+#define XUARTPS_IXR_RXEMPTY	0x00000002 /* RX FIFO empty interrupt. */
+#define XUARTPS_IXR_MASK	0x00001FFF /* Valid bit mask */
+
+/** Channel Status Register
+ *
+ * The channel status register (CSR) is provided to enable the control logic
+ * to monitor the status of bits in the channel interrupt status register,
+ * even if these are masked out by the interrupt mask register.
+ */
+#define XUARTPS_SR_RXEMPTY	0x00000002 /* RX FIFO empty */
+#define XUARTPS_SR_TXEMPTY	0x00000008 /* TX FIFO empty */
+#define XUARTPS_SR_TXFULL	0x00000010 /* TX FIFO full */
+#define XUARTPS_SR_RXTRIG	0x00000001 /* Rx Trigger */
+
+/**
+ * xuartps_isr - Interrupt handler
+ * @irq: Irq number
+ * @dev_id: Id of the port
+ *
+ * Returns IRQHANDLED
+ **/
+static irqreturn_t xuartps_isr(int irq, void *dev_id)
+{
+	struct uart_port *port = (struct uart_port *)dev_id;
+	struct tty_struct *tty;
+	unsigned long flags;
+	unsigned int isrstatus, numbytes;
+	unsigned int data;
+	char status = TTY_NORMAL;
+
+	/* Get the tty which could be NULL so don't assume it's valid */
+	tty = tty_port_tty_get(&port->state->port);
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* Read the interrupt status register to determine which
+	 * interrupt(s) is/are active.
+	 */
+	isrstatus = xuartps_readl(XUARTPS_ISR_OFFSET);
+
+	/* drop byte with parity error if IGNPAR specified */
+	if (isrstatus & port->ignore_status_mask & XUARTPS_IXR_PARITY)
+		isrstatus &= ~(XUARTPS_IXR_RXTRIG | XUARTPS_IXR_TOUT);
+
+	isrstatus &= port->read_status_mask;
+	isrstatus &= ~port->ignore_status_mask;
+
+	if ((isrstatus & XUARTPS_IXR_TOUT) ||
+		(isrstatus & XUARTPS_IXR_RXTRIG)) {
+		/* Receive Timeout Interrupt */
+		while ((xuartps_readl(XUARTPS_SR_OFFSET) &
+			XUARTPS_SR_RXEMPTY) != XUARTPS_SR_RXEMPTY) {
+			data = xuartps_readl(XUARTPS_FIFO_OFFSET);
+			port->icount.rx++;
+
+			if (isrstatus & XUARTPS_IXR_PARITY) {
+				port->icount.parity++;
+				status = TTY_PARITY;
+			} else if (isrstatus & XUARTPS_IXR_FRAMING) {
+				port->icount.frame++;
+				status = TTY_FRAME;
+			} else if (isrstatus & XUARTPS_IXR_OVERRUN)
+				port->icount.overrun++;
+
+			if (tty)
+				uart_insert_char(port, isrstatus,
+						XUARTPS_IXR_OVERRUN, data,
+						status);
+		}
+		spin_unlock(&port->lock);
+		if (tty)
+			tty_flip_buffer_push(tty);
+		spin_lock(&port->lock);
+	}
+
+	/* Dispatch an appropriate handler */
+	if ((isrstatus & XUARTPS_IXR_TXEMPTY) == XUARTPS_IXR_TXEMPTY) {
+		if (uart_circ_empty(&port->state->xmit)) {
+			xuartps_writel(XUARTPS_IXR_TXEMPTY,
+						XUARTPS_IDR_OFFSET);
+		} else {
+			numbytes = port->fifosize;
+			/* Break if no more data available in the UART buffer */
+			while (numbytes--) {
+				if (uart_circ_empty(&port->state->xmit))
+					break;
+				/* Get the data from the UART circular buffer
+				 * and write it to the xuartps's TX_FIFO
+				 * register.
+				 */
+				xuartps_writel(
+					port->state->xmit.buf[port->state->xmit.
+					tail], XUARTPS_FIFO_OFFSET);
+
+				port->icount.tx++;
+
+				/* Adjust the tail of the UART buffer and wrap
+				 * the buffer if it reaches limit.
+				 */
+				port->state->xmit.tail =
+					(port->state->xmit.tail + 1) & \
+						(UART_XMIT_SIZE - 1);
+			}
+
+			if (uart_circ_chars_pending(
+					&port->state->xmit) < WAKEUP_CHARS)
+				uart_write_wakeup(port);
+		}
+	}
+
+	xuartps_writel(isrstatus, XUARTPS_ISR_OFFSET);
+
+	/* be sure to release the lock and tty before leaving */
+	spin_unlock_irqrestore(&port->lock, flags);
+	tty_kref_put(tty);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * xuartps_set_baud_rate - Calculate and set the baud rate
+ * @port: Handle to the uart port structure
+ * @baud: Baud rate to set
+ *
+ * Returns baud rate, requested baud when possible, or actual baud when there
+ *	was too much error
+ **/
+static unsigned int xuartps_set_baud_rate(struct uart_port *port,
+						unsigned int baud)
+{
+	unsigned int sel_clk;
+	unsigned int calc_baud = 0;
+	unsigned int brgr_val, brdiv_val;
+	unsigned int bauderror;
+
+	/* Formula to obtain baud rate is
+	 *	baud_tx/rx rate = sel_clk/CD * (BDIV + 1)
+	 *	input_clk = (Uart User Defined Clock or Apb Clock)
+	 *		depends on UCLKEN in MR Reg
+	 *	sel_clk = input_clk or input_clk/8;
+	 *		depends on CLKS in MR reg
+	 *	CD and BDIV depends on values in
+	 *			baud rate generate register
+	 *			baud rate clock divisor register
+	 */
+	sel_clk = port->uartclk;
+	if (xuartps_readl(XUARTPS_MR_OFFSET) & XUARTPS_MR_CLKSEL)
+		sel_clk = sel_clk / 8;
+
+	/* Find the best values for baud generation */
+	for (brdiv_val = 4; brdiv_val < 255; brdiv_val++) {
+
+		brgr_val = sel_clk / (baud * (brdiv_val + 1));
+		if (brgr_val < 2 || brgr_val > 65535)
+			continue;
+
+		calc_baud = sel_clk / (brgr_val * (brdiv_val + 1));
+
+		if (baud > calc_baud)
+			bauderror = baud - calc_baud;
+		else
+			bauderror = calc_baud - baud;
+
+		/* use the values when percent error is acceptable */
+		if (((bauderror * 100) / baud) < 3) {
+			calc_baud = baud;
+			break;
+		}
+	}
+
+	/* Set the values for the new baud rate */
+	xuartps_writel(brgr_val, XUARTPS_BAUDGEN_OFFSET);
+	xuartps_writel(brdiv_val, XUARTPS_BAUDDIV_OFFSET);
+
+	return calc_baud;
+}
+
+/*----------------------Uart Operations---------------------------*/
+
+/**
+ * xuartps_start_tx -  Start transmitting bytes
+ * @port: Handle to the uart port structure
+ *
+ **/
+static void xuartps_start_tx(struct uart_port *port)
+{
+	unsigned int status, numbytes = port->fifosize;
+
+	if (uart_circ_empty(&port->state->xmit) || uart_tx_stopped(port))
+		return;
+
+	status = xuartps_readl(XUARTPS_CR_OFFSET);
+	/* Set the TX enable bit and clear the TX disable bit to enable the
+	 * transmitter.
+	 */
+	xuartps_writel((status & ~XUARTPS_CR_TX_DIS) | XUARTPS_CR_TX_EN,
+		XUARTPS_CR_OFFSET);
+
+	while (numbytes-- && ((xuartps_readl(XUARTPS_SR_OFFSET)
+		& XUARTPS_SR_TXFULL)) != XUARTPS_SR_TXFULL) {
+
+		/* Break if no more data available in the UART buffer */
+		if (uart_circ_empty(&port->state->xmit))
+			break;
+
+		/* Get the data from the UART circular buffer and
+		 * write it to the xuartps's TX_FIFO register.
+		 */
+		xuartps_writel(
+			port->state->xmit.buf[port->state->xmit.tail],
+			XUARTPS_FIFO_OFFSET);
+		port->icount.tx++;
+
+		/* Adjust the tail of the UART buffer and wrap
+		 * the buffer if it reaches limit.
+		 */
+		port->state->xmit.tail = (port->state->xmit.tail + 1) &
+					(UART_XMIT_SIZE - 1);
+	}
+
+	/* Enable the TX Empty interrupt */
+	xuartps_writel(XUARTPS_IXR_TXEMPTY, XUARTPS_IER_OFFSET);
+
+	if (uart_circ_chars_pending(&port->state->xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+}
+
+/**
+ * xuartps_stop_tx - Stop TX
+ * @port: Handle to the uart port structure
+ *
+ **/
+static void xuartps_stop_tx(struct uart_port *port)
+{
+	unsigned int regval;
+
+	regval = xuartps_readl(XUARTPS_CR_OFFSET);
+	regval |= XUARTPS_CR_TX_DIS;
+	/* Disable the transmitter */
+	xuartps_writel(regval, XUARTPS_CR_OFFSET);
+}
+
+/**
+ * xuartps_stop_rx - Stop RX
+ * @port: Handle to the uart port structure
+ *
+ **/
+static void xuartps_stop_rx(struct uart_port *port)
+{
+	unsigned int regval;
+
+	regval = xuartps_readl(XUARTPS_CR_OFFSET);
+	regval |= XUARTPS_CR_RX_DIS;
+	/* Disable the receiver */
+	xuartps_writel(regval, XUARTPS_CR_OFFSET);
+}
+
+/**
+ * xuartps_tx_empty -  Check whether TX is empty
+ * @port: Handle to the uart port structure
+ *
+ * Returns TIOCSER_TEMT on success, 0 otherwise
+ **/
+static unsigned int xuartps_tx_empty(struct uart_port *port)
+{
+	unsigned int status;
+
+	status = xuartps_readl(XUARTPS_ISR_OFFSET) & XUARTPS_IXR_TXEMPTY;
+	return status ? TIOCSER_TEMT : 0;
+}
+
+/**
+ * xuartps_break_ctl - Based on the input ctl we have to start or stop
+ *			transmitting char breaks
+ * @port: Handle to the uart port structure
+ * @ctl: Value based on which start or stop decision is taken
+ *
+ **/
+static void xuartps_break_ctl(struct uart_port *port, int ctl)
+{
+	unsigned int status;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	status = xuartps_readl(XUARTPS_CR_OFFSET);
+
+	if (ctl == -1)
+		xuartps_writel(XUARTPS_CR_STARTBRK | status,
+					XUARTPS_CR_OFFSET);
+	else {
+		if ((status & XUARTPS_CR_STOPBRK) == 0)
+			xuartps_writel(XUARTPS_CR_STOPBRK | status,
+					 XUARTPS_CR_OFFSET);
+	}
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/**
+ * xuartps_set_termios - termios operations, handling data length, parity,
+ *				stop bits, flow control, baud rate
+ * @port: Handle to the uart port structure
+ * @termios: Handle to the input termios structure
+ * @old: Values of the previously saved termios structure
+ *
+ **/
+static void xuartps_set_termios(struct uart_port *port,
+				struct ktermios *termios, struct ktermios *old)
+{
+	unsigned int cval = 0;
+	unsigned int baud;
+	unsigned long flags;
+	unsigned int ctrl_reg, mode_reg;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* Empty the receive FIFO 1st before making changes */
+	while ((xuartps_readl(XUARTPS_SR_OFFSET) &
+		 XUARTPS_SR_RXEMPTY) != XUARTPS_SR_RXEMPTY) {
+		xuartps_readl(XUARTPS_FIFO_OFFSET);
+	}
+
+	/* Disable the TX and RX to set baud rate */
+	xuartps_writel(xuartps_readl(XUARTPS_CR_OFFSET) |
+			(XUARTPS_CR_TX_DIS | XUARTPS_CR_RX_DIS),
+			XUARTPS_CR_OFFSET);
+
+	/* Min baud rate = 6bps and Max Baud Rate is 10Mbps for 100Mhz clk */
+	baud = uart_get_baud_rate(port, termios, old, 0, 10000000);
+	baud = xuartps_set_baud_rate(port, baud);
+	if (tty_termios_baud_rate(termios))
+		tty_termios_encode_baud_rate(termios, baud, baud);
+
+	/*
+	 * Update the per-port timeout.
+	 */
+	uart_update_timeout(port, termios->c_cflag, baud);
+
+	/* Set TX/RX Reset */
+	xuartps_writel(xuartps_readl(XUARTPS_CR_OFFSET) |
+			(XUARTPS_CR_TXRST | XUARTPS_CR_RXRST),
+			XUARTPS_CR_OFFSET);
+
+	ctrl_reg = xuartps_readl(XUARTPS_CR_OFFSET);
+
+	/* Clear the RX disable and TX disable bits and then set the TX enable
+	 * bit and RX enable bit to enable the transmitter and receiver.
+	 */
+	xuartps_writel(
+		(ctrl_reg & ~(XUARTPS_CR_TX_DIS | XUARTPS_CR_RX_DIS))
+			| (XUARTPS_CR_TX_EN | XUARTPS_CR_RX_EN),
+			XUARTPS_CR_OFFSET);
+
+	xuartps_writel(10, XUARTPS_RXTOUT_OFFSET);
+
+	port->read_status_mask = XUARTPS_IXR_TXEMPTY | XUARTPS_IXR_RXTRIG |
+			XUARTPS_IXR_OVERRUN | XUARTPS_IXR_TOUT;
+	port->ignore_status_mask = 0;
+
+	if (termios->c_iflag & INPCK)
+		port->read_status_mask |= XUARTPS_IXR_PARITY |
+		XUARTPS_IXR_FRAMING;
+
+	if (termios->c_iflag & IGNPAR)
+		port->ignore_status_mask |= XUARTPS_IXR_PARITY |
+			XUARTPS_IXR_FRAMING | XUARTPS_IXR_OVERRUN;
+
+	/* ignore all characters if CREAD is not set */
+	if ((termios->c_cflag & CREAD) == 0)
+		port->ignore_status_mask |= XUARTPS_IXR_RXTRIG |
+			XUARTPS_IXR_TOUT | XUARTPS_IXR_PARITY |
+			XUARTPS_IXR_FRAMING | XUARTPS_IXR_OVERRUN;
+
+	mode_reg = xuartps_readl(XUARTPS_MR_OFFSET);
+
+	/* Handling Data Size */
+	switch (termios->c_cflag & CSIZE) {
+	case CS6:
+		cval |= XUARTPS_MR_CHARLEN_6_BIT;
+		break;
+	case CS7:
+		cval |= XUARTPS_MR_CHARLEN_7_BIT;
+		break;
+	default:
+	case CS8:
+		cval |= XUARTPS_MR_CHARLEN_8_BIT;
+		termios->c_cflag &= ~CSIZE;
+		termios->c_cflag |= CS8;
+		break;
+	}
+
+	/* Handling Parity and Stop Bits length */
+	if (termios->c_cflag & CSTOPB)
+		cval |= XUARTPS_MR_STOPMODE_2_BIT; /* 2 STOP bits */
+	else
+		cval |= XUARTPS_MR_STOPMODE_1_BIT; /* 1 STOP bit */
+
+	if (termios->c_cflag & PARENB) {
+		/* Mark or Space parity */
+		if (termios->c_cflag & CMSPAR) {
+			if (termios->c_cflag & PARODD)
+				cval |= XUARTPS_MR_PARITY_MARK;
+			else
+				cval |= XUARTPS_MR_PARITY_SPACE;
+		} else if (termios->c_cflag & PARODD)
+				cval |= XUARTPS_MR_PARITY_ODD;
+			else
+				cval |= XUARTPS_MR_PARITY_EVEN;
+	} else
+		cval |= XUARTPS_MR_PARITY_NONE;
+	xuartps_writel(cval , XUARTPS_MR_OFFSET);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/**
+ * xuartps_startup - Called when an application opens a xuartps port
+ * @port: Handle to the uart port structure
+ *
+ * Returns 0 on success, negative error otherwise
+ **/
+static int xuartps_startup(struct uart_port *port)
+{
+	unsigned int retval = 0, status = 0;
+
+	retval = request_irq(port->irq, xuartps_isr, 0, XUARTPS_NAME,
+								(void *)port);
+	if (retval)
+		return retval;
+
+	/* Disable the TX and RX */
+	xuartps_writel(XUARTPS_CR_TX_DIS | XUARTPS_CR_RX_DIS,
+						XUARTPS_CR_OFFSET);
+
+	/* Set the Control Register with TX/RX Enable, TX/RX Reset,
+	 * no break chars.
+	 */
+	xuartps_writel(XUARTPS_CR_TXRST | XUARTPS_CR_RXRST,
+				XUARTPS_CR_OFFSET);
+
+	status = xuartps_readl(XUARTPS_CR_OFFSET);
+
+	/* Clear the RX disable and TX disable bits and then set the TX enable
+	 * bit and RX enable bit to enable the transmitter and receiver.
+	 */
+	xuartps_writel((status & ~(XUARTPS_CR_TX_DIS | XUARTPS_CR_RX_DIS))
+			| (XUARTPS_CR_TX_EN | XUARTPS_CR_RX_EN |
+			XUARTPS_CR_STOPBRK), XUARTPS_CR_OFFSET);
+
+	/* Set the Mode Register with normal mode,8 data bits,1 stop bit,
+	 * no parity.
+	 */
+	xuartps_writel(XUARTPS_MR_CHMODE_NORM | XUARTPS_MR_STOPMODE_1_BIT
+		| XUARTPS_MR_PARITY_NONE | XUARTPS_MR_CHARLEN_8_BIT,
+		 XUARTPS_MR_OFFSET);
+
+	/* Set the RX FIFO Trigger level to 14 assuming FIFO size as 16 */
+	xuartps_writel(14, XUARTPS_RXWM_OFFSET);
+
+	/* Receive Timeout register is enabled with value of 10 */
+	xuartps_writel(10, XUARTPS_RXTOUT_OFFSET);
+
+
+	/* Set the Interrupt Registers with desired interrupts */
+	xuartps_writel(XUARTPS_IXR_TXEMPTY | XUARTPS_IXR_PARITY |
+		XUARTPS_IXR_FRAMING | XUARTPS_IXR_OVERRUN |
+		XUARTPS_IXR_RXTRIG | XUARTPS_IXR_TOUT, XUARTPS_IER_OFFSET);
+	xuartps_writel(~(XUARTPS_IXR_TXEMPTY | XUARTPS_IXR_PARITY |
+		XUARTPS_IXR_FRAMING | XUARTPS_IXR_OVERRUN |
+		XUARTPS_IXR_RXTRIG | XUARTPS_IXR_TOUT), XUARTPS_IDR_OFFSET);
+
+	return retval;
+}
+
+/**
+ * xuartps_shutdown - Called when an application closes a xuartps port
+ * @port: Handle to the uart port structure
+ *
+ **/
+static void xuartps_shutdown(struct uart_port *port)
+{
+	int status;
+
+	/* Disable interrupts */
+	status = xuartps_readl(XUARTPS_IMR_OFFSET);
+	xuartps_writel(status, XUARTPS_IDR_OFFSET);
+
+	/* Disable the TX and RX */
+	xuartps_writel(XUARTPS_CR_TX_DIS | XUARTPS_CR_RX_DIS,
+				 XUARTPS_CR_OFFSET);
+	free_irq(port->irq, port);
+}
+
+/**
+ * xuartps_type - Set UART type to xuartps port
+ * @port: Handle to the uart port structure
+ *
+ * Returns string on success, NULL otherwise
+ **/
+static const char *xuartps_type(struct uart_port *port)
+{
+	return port->type == PORT_XUARTPS ? XUARTPS_NAME : NULL;
+}
+
+/**
+ * xuartps_verify_port - Verify the port params
+ * @port: Handle to the uart port structure
+ * @ser: Handle to the structure whose members are compared
+ *
+ * Returns 0 if success otherwise -EINVAL
+ **/
+static int xuartps_verify_port(struct uart_port *port,
+					struct serial_struct *ser)
+{
+	if (ser->type != PORT_UNKNOWN && ser->type != PORT_XUARTPS)
+		return -EINVAL;
+	if (port->irq != ser->irq)
+		return -EINVAL;
+	if (ser->io_type != UPIO_MEM)
+		return -EINVAL;
+	if (port->iobase != ser->port)
+		return -EINVAL;
+	if (ser->hub6 != 0)
+		return -EINVAL;
+	return 0;
+}
+
+/**
+ * xuartps_request_port - Claim the memory region attached to xuartps port,
+ *				called when the driver adds a xuartps port via
+ *				uart_add_one_port()
+ * @port: Handle to the uart port structure
+ *
+ * Returns 0, -ENOMEM if request fails
+ **/
+static int xuartps_request_port(struct uart_port *port)
+{
+	if (!request_mem_region(port->mapbase, XUARTPS_REGISTER_SPACE,
+					 XUARTPS_NAME)) {
+		return -ENOMEM;
+	}
+
+	port->membase = ioremap(port->mapbase, XUARTPS_REGISTER_SPACE);
+	if (!port->membase) {
+		dev_err(port->dev, "Unable to map registers\n");
+		release_mem_region(port->mapbase, XUARTPS_REGISTER_SPACE);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+/**
+ * xuartps_release_port - Release the memory region attached to a xuartps
+ *				port, called when the driver removes a xuartps
+ *				port via uart_remove_one_port().
+ * @port: Handle to the uart port structure
+ *
+ **/
+static void xuartps_release_port(struct uart_port *port)
+{
+	release_mem_region(port->mapbase, XUARTPS_REGISTER_SPACE);
+	iounmap(port->membase);
+	port->membase = NULL;
+}
+
+/**
+ * xuartps_config_port - Configure xuartps, called when the driver adds a
+ *				xuartps port
+ * @port: Handle to the uart port structure
+ * @flags: If any
+ *
+ **/
+static void xuartps_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE && xuartps_request_port(port) == 0)
+		port->type = PORT_XUARTPS;
+}
+
+/**
+ * xuartps_get_mctrl - Get the modem control state
+ *
+ * @port: Handle to the uart port structure
+ *
+ * Returns the modem control state
+ *
+ **/
+static unsigned int xuartps_get_mctrl(struct uart_port *port)
+{
+	return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR;
+}
+
+static void xuartps_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	/* N/A */
+}
+
+static void xuartps_enable_ms(struct uart_port *port)
+{
+	/* N/A */
+}
+
+/** The UART operations structure
+ */
+static struct uart_ops xuartps_ops = {
+	.set_mctrl	= xuartps_set_mctrl,
+	.get_mctrl	= xuartps_get_mctrl,
+	.enable_ms	= xuartps_enable_ms,
+
+	.start_tx	= xuartps_start_tx,	/* Start transmitting */
+	.stop_tx	= xuartps_stop_tx,	/* Stop transmission */
+	.stop_rx	= xuartps_stop_rx,	/* Stop reception */
+	.tx_empty	= xuartps_tx_empty,	/* Transmitter busy? */
+	.break_ctl	= xuartps_break_ctl,	/* Start/stop
+						 * transmitting break
+						 */
+	.set_termios	= xuartps_set_termios,	/* Set termios */
+	.startup	= xuartps_startup,	/* App opens xuartps */
+	.shutdown	= xuartps_shutdown,	/* App closes xuartps */
+	.type		= xuartps_type,		/* Set UART type */
+	.verify_port	= xuartps_verify_port,	/* Verification of port
+						 * params
+						 */
+	.request_port	= xuartps_request_port,	/* Claim resources
+						 * associated with a
+						 * xuartps port
+						 */
+	.release_port	= xuartps_release_port,	/* Release resources
+						 * associated with a
+						 * xuartps port
+						 */
+	.config_port	= xuartps_config_port,	/* Configure when driver
+						 * adds a xuartps port
+						 */
+};
+
+static struct uart_port xuartps_port[2];
+
+/**
+ * xuartps_get_port - Configure the port from the platform device resource
+ *			info
+ *
+ * Returns a pointer to a uart_port or NULL for failure
+ **/
+static struct uart_port *xuartps_get_port(void)
+{
+	struct uart_port *port;
+	int id;
+
+	/* Find the next unused port */
+	for (id = 0; id < XUARTPS_NR_PORTS; id++)
+		if (xuartps_port[id].mapbase == 0)
+			break;
+
+	if (id >= XUARTPS_NR_PORTS)
+		return NULL;
+
+	port = &xuartps_port[id];
+
+	/* At this point, we've got an empty uart_port struct, initialize it */
+	spin_lock_init(&port->lock);
+	port->membase	= NULL;
+	port->iobase	= 1; /* mark port in use */
+	port->irq	= 0;
+	port->type	= PORT_UNKNOWN;
+	port->iotype	= UPIO_MEM32;
+	port->flags	= UPF_BOOT_AUTOCONF;
+	port->ops	= &xuartps_ops;
+	port->fifosize	= XUARTPS_FIFO_SIZE;
+	port->line	= id;
+	port->dev	= NULL;
+	return port;
+}
+
+/*-----------------------Console driver operations--------------------------*/
+
+#ifdef CONFIG_SERIAL_XILINX_PS_UART_CONSOLE
+/**
+ * xuartps_console_wait_tx - Wait for the TX to be full
+ * @port: Handle to the uart port structure
+ *
+ **/
+static void xuartps_console_wait_tx(struct uart_port *port)
+{
+	while ((xuartps_readl(XUARTPS_SR_OFFSET) & XUARTPS_SR_TXEMPTY)
+				!= XUARTPS_SR_TXEMPTY)
+		barrier();
+}
+
+/**
+ * xuartps_console_putchar - write the character to the FIFO buffer
+ * @port: Handle to the uart port structure
+ * @ch: Character to be written
+ *
+ **/
+static void xuartps_console_putchar(struct uart_port *port, int ch)
+{
+	xuartps_console_wait_tx(port);
+	xuartps_writel(ch, XUARTPS_FIFO_OFFSET);
+}
+
+/**
+ * xuartps_console_write - perform write operation
+ * @port: Handle to the uart port structure
+ * @s: Pointer to character array
+ * @count: No of characters
+ **/
+static void xuartps_console_write(struct console *co, const char *s,
+				unsigned int count)
+{
+	struct uart_port *port = &xuartps_port[co->index];
+	unsigned long flags;
+	unsigned int imr;
+	int locked = 1;
+
+	if (oops_in_progress)
+		locked = spin_trylock_irqsave(&port->lock, flags);
+	else
+		spin_lock_irqsave(&port->lock, flags);
+
+	/* save and disable interrupt */
+	imr = xuartps_readl(XUARTPS_IMR_OFFSET);
+	xuartps_writel(imr, XUARTPS_IDR_OFFSET);
+
+	uart_console_write(port, s, count, xuartps_console_putchar);
+	xuartps_console_wait_tx(port);
+
+	/* restore interrupt state, it seems like there may be a h/w bug
+	 * in that the interrupt enable register should not need to be
+	 * written based on the data sheet
+	 */
+	xuartps_writel(~imr, XUARTPS_IDR_OFFSET);
+	xuartps_writel(imr, XUARTPS_IER_OFFSET);
+
+	if (locked)
+		spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/**
+ * xuartps_console_setup - Initialize the uart to default config
+ * @co: Console handle
+ * @options: Initial settings of uart
+ *
+ * Returns 0, -ENODEV if no device
+ **/
+static int __init xuartps_console_setup(struct console *co, char *options)
+{
+	struct uart_port *port = &xuartps_port[co->index];
+	int baud = 9600;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	if (co->index < 0 || co->index >= XUARTPS_NR_PORTS)
+		return -EINVAL;
+
+	if (!port->mapbase) {
+		pr_debug("console on ttyPS%i not present\n", co->index);
+		return -ENODEV;
+	}
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(port, co, baud, parity, bits, flow);
+}
+
+static struct uart_driver xuartps_uart_driver;
+
+static struct console xuartps_console = {
+	.name	= XUARTPS_TTY_NAME,
+	.write	= xuartps_console_write,
+	.device	= uart_console_device,
+	.setup	= xuartps_console_setup,
+	.flags	= CON_PRINTBUFFER,
+	.index	= -1, /* Specified on the cmdline (e.g. console=ttyPS ) */
+	.data	= &xuartps_uart_driver,
+};
+
+/**
+ * xuartps_console_init - Initialization call
+ *
+ * Returns 0 on success, negative error otherwise
+ **/
+static int __init xuartps_console_init(void)
+{
+	register_console(&xuartps_console);
+	return 0;
+}
+
+console_initcall(xuartps_console_init);
+
+#endif /* CONFIG_SERIAL_XILINX_PS_UART_CONSOLE */
+
+/** Structure Definitions
+ */
+static struct uart_driver xuartps_uart_driver = {
+	.owner		= THIS_MODULE,		/* Owner */
+	.driver_name	= XUARTPS_NAME,		/* Driver name */
+	.dev_name	= XUARTPS_TTY_NAME,	/* Node name */
+	.major		= XUARTPS_MAJOR,	/* Major number */
+	.minor		= XUARTPS_MINOR,	/* Minor number */
+	.nr		= XUARTPS_NR_PORTS,	/* Number of UART ports */
+#ifdef CONFIG_SERIAL_XILINX_PS_UART_CONSOLE
+	.cons		= &xuartps_console,	/* Console */
+#endif
+};
+
+/* ---------------------------------------------------------------------
+ * Platform bus binding
+ */
+/**
+ * xuartps_probe - Platform driver probe
+ * @pdev: Pointer to the platform device structure
+ *
+ * Returns 0 on success, negative error otherwise
+ **/
+static int __devinit xuartps_probe(struct platform_device *pdev)
+{
+	int rc;
+	struct uart_port *port;
+	struct resource *res, *res2;
+	int clk = 0;
+
+#ifdef CONFIG_OF
+	const unsigned int *prop;
+
+	prop = of_get_property(pdev->dev.of_node, "clock", NULL);
+	if (prop)
+		clk = be32_to_cpup(prop);
+#else
+	clk = *((unsigned int *)(pdev->dev.platform_data));
+#endif
+	if (!clk) {
+		dev_err(&pdev->dev, "no clock specified\n");
+		return -ENODEV;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	res2 = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res2)
+		return -ENODEV;
+
+	/* Initialize the port structure */
+	port = xuartps_get_port();
+
+	if (!port) {
+		dev_err(&pdev->dev, "Cannot get uart_port structure\n");
+		return -ENODEV;
+	} else {
+		/* Register the port.
+		 * This function also registers this device with the tty layer
+		 * and triggers invocation of the config_port() entry point.
+		 */
+		port->mapbase = res->start;
+		port->irq = res2->start;
+		port->dev = &pdev->dev;
+		port->uartclk = clk;
+		dev_set_drvdata(&pdev->dev, port);
+		rc = uart_add_one_port(&xuartps_uart_driver, port);
+		if (rc) {
+			dev_err(&pdev->dev,
+				"uart_add_one_port() failed; err=%i\n", rc);
+			dev_set_drvdata(&pdev->dev, NULL);
+			return rc;
+		}
+		return 0;
+	}
+}
+
+/**
+ * xuartps_remove - called when the platform driver is unregistered
+ * @pdev: Pointer to the platform device structure
+ *
+ * Returns 0 on success, negative error otherwise
+ **/
+static int __devexit xuartps_remove(struct platform_device *pdev)
+{
+	struct uart_port *port = dev_get_drvdata(&pdev->dev);
+	int rc = 0;
+
+	/* Remove the xuartps port from the serial core */
+	if (port) {
+		rc = uart_remove_one_port(&xuartps_uart_driver, port);
+		dev_set_drvdata(&pdev->dev, NULL);
+		port->mapbase = 0;
+	}
+	return rc;
+}
+
+/**
+ * xuartps_suspend - suspend event
+ * @pdev: Pointer to the platform device structure
+ * @state: State of the device
+ *
+ * Returns 0
+ **/
+static int xuartps_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	/* Call the API provided in serial_core.c file which handles
+	 * the suspend.
+	 */
+	uart_suspend_port(&xuartps_uart_driver, &xuartps_port[pdev->id]);
+	return 0;
+}
+
+/**
+ * xuartps_resume - Resume after a previous suspend
+ * @pdev: Pointer to the platform device structure
+ *
+ * Returns 0
+ **/
+static int xuartps_resume(struct platform_device *pdev)
+{
+	uart_resume_port(&xuartps_uart_driver, &xuartps_port[pdev->id]);
+	return 0;
+}
+
+/* Match table for of_platform binding */
+
+#ifdef CONFIG_OF
+static struct of_device_id xuartps_of_match[] __devinitdata = {
+	{ .compatible = "xlnx,xuartps", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, xuartps_of_match);
+#else
+#define xuartps_of_match NULL
+#endif
+
+static struct platform_driver xuartps_platform_driver = {
+	.probe   = xuartps_probe,		/* Probe method */
+	.remove  = __exit_p(xuartps_remove),	/* Detach method */
+	.suspend = xuartps_suspend,		/* Suspend */
+	.resume  = xuartps_resume,		/* Resume after a suspend */
+	.driver  = {
+		.owner = THIS_MODULE,
+		.name = XUARTPS_NAME,		/* Driver name */
+		.of_match_table = xuartps_of_match,
+		},
+};
+
+/* ---------------------------------------------------------------------
+ * Module Init and Exit
+ */
+/**
+ * xuartps_init - Initial driver registration call
+ *
+ * Returns whether the registration was successful or not
+ **/
+static int __init xuartps_init(void)
+{
+	int retval = 0;
+
+	/* Register the xuartps driver with the serial core */
+	retval = uart_register_driver(&xuartps_uart_driver);
+	if (retval)
+		return retval;
+
+	/* Register the platform driver */
+	retval = platform_driver_register(&xuartps_platform_driver);
+	if (retval)
+		uart_unregister_driver(&xuartps_uart_driver);
+
+	return retval;
+}
+
+/**
+ * xuartps_exit - Driver unregistration call
+ **/
+static void __exit xuartps_exit(void)
+{
+	/* The order of unregistration is important. Unregister the
+	 * UART driver before the platform driver crashes the system.
+	 */
+
+	/* Unregister the platform driver */
+	platform_driver_unregister(&xuartps_platform_driver);
+
+	/* Unregister the xuartps driver */
+	uart_unregister_driver(&xuartps_uart_driver);
+}
+
+module_init(xuartps_init);
+module_exit(xuartps_exit);
+
+MODULE_DESCRIPTION("Driver for PS UART");
+MODULE_AUTHOR("Xilinx Inc.");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 758c5b0c6fd3..95d479ba514e 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -202,6 +202,9 @@
 /* VIA VT8500 SoC */
 #define PORT_VT8500	97
 
+/* Xilinx PSS UART */
+#define PORT_XUARTPS	98
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
-- 
cgit v1.2.3-71-gd317


From c430131a02d677aa708f56342c1565edfdacb3c0 Mon Sep 17 00:00:00 2001
From: Jan Andersson <jan@gaisler.com>
Date: Tue, 3 May 2011 20:11:57 +0200
Subject: USB: EHCI: Support controllers with big endian capability regs

The two first HC capability registers (CAPLENGTH and HCIVERSION)
are defined as one 8-bit and one 16-bit register. Most HC
implementations have selected to treat these registers as part
of a 32-bit register, giving the same layout for both big and
small endian systems.

This patch adds a new quirk, big_endian_capbase, to support
controllers with big endian register interfaces that treat
HCIVERSION and CAPLENGTH as individual registers.

Signed-off-by: Jan Andersson <jan@gaisler.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/early/ehci-dbgp.c     | 5 ++++-
 drivers/usb/host/ehci-ath79.c     | 8 ++++----
 drivers/usb/host/ehci-atmel.c     | 2 +-
 drivers/usb/host/ehci-au1xxx.c    | 3 ++-
 drivers/usb/host/ehci-cns3xxx.c   | 2 +-
 drivers/usb/host/ehci-dbg.c       | 2 +-
 drivers/usb/host/ehci-fsl.c       | 2 +-
 drivers/usb/host/ehci-hcd.c       | 2 +-
 drivers/usb/host/ehci-ixp4xx.c    | 2 +-
 drivers/usb/host/ehci-msm.c       | 2 +-
 drivers/usb/host/ehci-mxc.c       | 2 +-
 drivers/usb/host/ehci-octeon.c    | 2 +-
 drivers/usb/host/ehci-omap.c      | 2 +-
 drivers/usb/host/ehci-orion.c     | 2 +-
 drivers/usb/host/ehci-pci.c       | 2 +-
 drivers/usb/host/ehci-pmcmsp.c    | 2 +-
 drivers/usb/host/ehci-ppc-of.c    | 2 +-
 drivers/usb/host/ehci-ps3.c       | 2 +-
 drivers/usb/host/ehci-s5p.c       | 3 ++-
 drivers/usb/host/ehci-sh.c        | 2 +-
 drivers/usb/host/ehci-spear.c     | 2 +-
 drivers/usb/host/ehci-tegra.c     | 2 +-
 drivers/usb/host/ehci-vt8500.c    | 3 ++-
 drivers/usb/host/ehci-w90x900.c   | 2 +-
 drivers/usb/host/ehci-xilinx-of.c | 2 +-
 drivers/usb/host/ehci.h           | 7 +++++++
 include/linux/usb/ehci_def.h      | 9 +++++++--
 27 files changed, 48 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
index a6a350f5827b..1fc8f1249806 100644
--- a/drivers/usb/early/ehci-dbgp.c
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -102,6 +102,9 @@ static struct kgdb_io kgdbdbgp_io_ops;
 #define dbgp_kgdb_mode (0)
 #endif
 
+/* Local version of HC_LENGTH macro as ehci struct is not available here */
+#define EARLY_HC_LENGTH(p)	(0x00ff & (p)) /* bits 7 : 0 */
+
 /*
  * USB Packet IDs (PIDs)
  */
@@ -892,7 +895,7 @@ int __init early_dbgp_init(char *s)
 	dbgp_printk("ehci_bar: %p\n", ehci_bar);
 
 	ehci_caps  = ehci_bar;
-	ehci_regs  = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
+	ehci_regs  = ehci_bar + EARLY_HC_LENGTH(readl(&ehci_caps->hc_capbase));
 	ehci_debug = ehci_bar + offset;
 	ehci_dev.bus = bus;
 	ehci_dev.slot = slot;
diff --git a/drivers/usb/host/ehci-ath79.c b/drivers/usb/host/ehci-ath79.c
index 7ea23b50f5d8..98cc8a13169c 100644
--- a/drivers/usb/host/ehci-ath79.c
+++ b/drivers/usb/host/ehci-ath79.c
@@ -44,6 +44,7 @@ static int ehci_ath79_init(struct usb_hcd *hcd)
 	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
 	struct platform_device *pdev = to_platform_device(hcd->self.controller);
 	const struct platform_device_id *id;
+	int hclength;
 	int ret;
 
 	id = platform_get_device_id(pdev);
@@ -52,21 +53,20 @@ static int ehci_ath79_init(struct usb_hcd *hcd)
 		return -EINVAL;
 	}
 
+	hclength = HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	switch (id->driver_data) {
 	case EHCI_ATH79_IP_V1:
 		ehci->has_synopsys_hc_bug = 1;
 
 		ehci->caps = hcd->regs;
-		ehci->regs = hcd->regs +
-			HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		ehci->regs = hcd->regs + hclength;
 		break;
 
 	case EHCI_ATH79_IP_V2:
 		hcd->has_tt = 1;
 
 		ehci->caps = hcd->regs + 0x100;
-		ehci->regs = hcd->regs + 0x100 +
-			HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		ehci->regs = hcd->regs + 0x100 + hclength;
 		break;
 
 	default:
diff --git a/drivers/usb/host/ehci-atmel.c b/drivers/usb/host/ehci-atmel.c
index b2ed55cb811d..a5a3ef1f0096 100644
--- a/drivers/usb/host/ehci-atmel.c
+++ b/drivers/usb/host/ehci-atmel.c
@@ -56,7 +56,7 @@ static int ehci_atmel_setup(struct usb_hcd *hcd)
 	/* registers start at offset 0x0 */
 	ehci->caps = hcd->regs;
 	ehci->regs = hcd->regs +
-		HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	dbg_hcs_params(ehci, "reset");
 	dbg_hcc_params(ehci, "reset");
 
diff --git a/drivers/usb/host/ehci-au1xxx.c b/drivers/usb/host/ehci-au1xxx.c
index a869e3c103d3..40b002869ac2 100644
--- a/drivers/usb/host/ehci-au1xxx.c
+++ b/drivers/usb/host/ehci-au1xxx.c
@@ -175,7 +175,8 @@ static int ehci_hcd_au1xxx_drv_probe(struct platform_device *pdev)
 
 	ehci = hcd_to_ehci(hcd);
 	ehci->caps = hcd->regs;
-	ehci->regs = hcd->regs + HC_LENGTH(readl(&ehci->caps->hc_capbase));
+	ehci->regs = hcd->regs +
+		HC_LENGTH(ehci, readl(&ehci->caps->hc_capbase));
 	/* cache this readonly data; minimize chip reads */
 	ehci->hcs_params = readl(&ehci->caps->hcs_params);
 
diff --git a/drivers/usb/host/ehci-cns3xxx.c b/drivers/usb/host/ehci-cns3xxx.c
index 708a05b5d258..d41745c6f0c4 100644
--- a/drivers/usb/host/ehci-cns3xxx.c
+++ b/drivers/usb/host/ehci-cns3xxx.c
@@ -34,7 +34,7 @@ static int cns3xxx_ehci_init(struct usb_hcd *hcd)
 
 	ehci->caps = hcd->regs;
 	ehci->regs = hcd->regs
-		+ HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		+ HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params);
 
 	hcd->has_tt = 0;
diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c
index 693c29b30521..40a844c1dbb4 100644
--- a/drivers/usb/host/ehci-dbg.c
+++ b/drivers/usb/host/ehci-dbg.c
@@ -726,7 +726,7 @@ static ssize_t fill_registers_buffer(struct debug_buffer *buf)
 	}
 
 	/* Capability Registers */
-	i = HC_VERSION(ehci_readl(ehci, &ehci->caps->hc_capbase));
+	i = HC_VERSION(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	temp = scnprintf (next, size,
 		"bus %s, device %s\n"
 		"%s\n"
diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c
index 623732a312dd..f380bf97e5af 100644
--- a/drivers/usb/host/ehci-fsl.c
+++ b/drivers/usb/host/ehci-fsl.c
@@ -324,7 +324,7 @@ static int ehci_fsl_setup(struct usb_hcd *hcd)
 	/* EHCI registers start at offset 0x100 */
 	ehci->caps = hcd->regs + 0x100;
 	ehci->regs = hcd->regs + 0x100 +
-	    HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	dbg_hcs_params(ehci, "reset");
 	dbg_hcc_params(ehci, "reset");
 
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 83b7d5f02a15..8164ffafd10a 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -739,7 +739,7 @@ static int ehci_run (struct usb_hcd *hcd)
 	up_write(&ehci_cf_port_reset_rwsem);
 	ehci->last_periodic_enable = ktime_get_real();
 
-	temp = HC_VERSION(ehci_readl(ehci, &ehci->caps->hc_capbase));
+	temp = HC_VERSION(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	ehci_info (ehci,
 		"USB %x.%x started, EHCI %x.%02x%s\n",
 		((ehci->sbrn & 0xf0)>>4), (ehci->sbrn & 0x0f),
diff --git a/drivers/usb/host/ehci-ixp4xx.c b/drivers/usb/host/ehci-ixp4xx.c
index 89b7c70c6ed6..50e600d26e28 100644
--- a/drivers/usb/host/ehci-ixp4xx.c
+++ b/drivers/usb/host/ehci-ixp4xx.c
@@ -23,7 +23,7 @@ static int ixp4xx_ehci_init(struct usb_hcd *hcd)
 
 	ehci->caps = hcd->regs + 0x100;
 	ehci->regs = hcd->regs + 0x100
-		+ HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		+ HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params);
 
 	hcd->has_tt = 1;
diff --git a/drivers/usb/host/ehci-msm.c b/drivers/usb/host/ehci-msm.c
index 9ce1b0bc186d..b5a0bf649c95 100644
--- a/drivers/usb/host/ehci-msm.c
+++ b/drivers/usb/host/ehci-msm.c
@@ -41,7 +41,7 @@ static int ehci_msm_reset(struct usb_hcd *hcd)
 
 	ehci->caps = USB_CAPLENGTH;
 	ehci->regs = USB_CAPLENGTH +
-		HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	dbg_hcs_params(ehci, "reset");
 	dbg_hcc_params(ehci, "reset");
 
diff --git a/drivers/usb/host/ehci-mxc.c b/drivers/usb/host/ehci-mxc.c
index 25c8c10bb689..0c058be35a38 100644
--- a/drivers/usb/host/ehci-mxc.c
+++ b/drivers/usb/host/ehci-mxc.c
@@ -208,7 +208,7 @@ static int ehci_mxc_drv_probe(struct platform_device *pdev)
 	/* EHCI registers start at offset 0x100 */
 	ehci->caps = hcd->regs + 0x100;
 	ehci->regs = hcd->regs + 0x100 +
-	    HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 
 	/* set up the PORTSCx register */
 	ehci_writel(ehci, pdata->portsc, &ehci->regs->port_status[0]);
diff --git a/drivers/usb/host/ehci-octeon.c b/drivers/usb/host/ehci-octeon.c
index a31a031178a8..ff55757ba7d8 100644
--- a/drivers/usb/host/ehci-octeon.c
+++ b/drivers/usb/host/ehci-octeon.c
@@ -151,7 +151,7 @@ static int ehci_octeon_drv_probe(struct platform_device *pdev)
 
 	ehci->caps = hcd->regs;
 	ehci->regs = hcd->regs +
-		HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	/* cache this readonly data; minimize chip reads */
 	ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params);
 
diff --git a/drivers/usb/host/ehci-omap.c b/drivers/usb/host/ehci-omap.c
index 7e41a95c5ceb..3c482dc99ece 100644
--- a/drivers/usb/host/ehci-omap.c
+++ b/drivers/usb/host/ehci-omap.c
@@ -188,7 +188,7 @@ static int ehci_hcd_omap_probe(struct platform_device *pdev)
 	/* we know this is the memory we want, no need to ioremap again */
 	omap_ehci->caps = hcd->regs;
 	omap_ehci->regs = hcd->regs
-			+ HC_LENGTH(readl(&omap_ehci->caps->hc_capbase));
+		+ HC_LENGTH(ehci, readl(&omap_ehci->caps->hc_capbase));
 
 	dbg_hcs_params(omap_ehci, "reset");
 	dbg_hcc_params(omap_ehci, "reset");
diff --git a/drivers/usb/host/ehci-orion.c b/drivers/usb/host/ehci-orion.c
index 281e094e1c18..395bdb0248d5 100644
--- a/drivers/usb/host/ehci-orion.c
+++ b/drivers/usb/host/ehci-orion.c
@@ -251,7 +251,7 @@ static int __devinit ehci_orion_drv_probe(struct platform_device *pdev)
 	ehci = hcd_to_ehci(hcd);
 	ehci->caps = hcd->regs + 0x100;
 	ehci->regs = hcd->regs + 0x100 +
-		HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params);
 	hcd->has_tt = 1;
 	ehci->sbrn = 0x20;
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index d5eaea7caf89..660b80a75cac 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -70,7 +70,7 @@ static int ehci_pci_setup(struct usb_hcd *hcd)
 
 	ehci->caps = hcd->regs;
 	ehci->regs = hcd->regs +
-		HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 
 	dbg_hcs_params(ehci, "reset");
 	dbg_hcc_params(ehci, "reset");
diff --git a/drivers/usb/host/ehci-pmcmsp.c b/drivers/usb/host/ehci-pmcmsp.c
index a2168642175b..cd69099cda19 100644
--- a/drivers/usb/host/ehci-pmcmsp.c
+++ b/drivers/usb/host/ehci-pmcmsp.c
@@ -83,7 +83,7 @@ static int ehci_msp_setup(struct usb_hcd *hcd)
 
 	ehci->caps = hcd->regs;
 	ehci->regs = hcd->regs +
-			HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	dbg_hcs_params(ehci, "reset");
 	dbg_hcc_params(ehci, "reset");
 
diff --git a/drivers/usb/host/ehci-ppc-of.c b/drivers/usb/host/ehci-ppc-of.c
index 1f09f253697e..8552db6c29c9 100644
--- a/drivers/usb/host/ehci-ppc-of.c
+++ b/drivers/usb/host/ehci-ppc-of.c
@@ -179,7 +179,7 @@ static int __devinit ehci_hcd_ppc_of_probe(struct platform_device *op)
 
 	ehci->caps = hcd->regs;
 	ehci->regs = hcd->regs +
-			HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 
 	/* cache this readonly data; minimize chip reads */
 	ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params);
diff --git a/drivers/usb/host/ehci-ps3.c b/drivers/usb/host/ehci-ps3.c
index 1dee33b9139e..64626a777d61 100644
--- a/drivers/usb/host/ehci-ps3.c
+++ b/drivers/usb/host/ehci-ps3.c
@@ -29,7 +29,7 @@ static int ps3_ehci_hc_reset(struct usb_hcd *hcd)
 	ehci->big_endian_mmio = 1;
 
 	ehci->caps = hcd->regs;
-	ehci->regs = hcd->regs + HC_LENGTH(ehci_readl(ehci,
+	ehci->regs = hcd->regs + HC_LENGTH(ehci, ehci_readl(ehci,
 		&ehci->caps->hc_capbase));
 
 	dbg_hcs_params(ehci, "reset");
diff --git a/drivers/usb/host/ehci-s5p.c b/drivers/usb/host/ehci-s5p.c
index 0c18f280bf4c..321a03301ad2 100644
--- a/drivers/usb/host/ehci-s5p.c
+++ b/drivers/usb/host/ehci-s5p.c
@@ -126,7 +126,8 @@ static int s5p_ehci_probe(struct platform_device *pdev)
 
 	ehci = hcd_to_ehci(hcd);
 	ehci->caps = hcd->regs;
-	ehci->regs = hcd->regs + HC_LENGTH(readl(&ehci->caps->hc_capbase));
+	ehci->regs = hcd->regs +
+		HC_LENGTH(ehci, readl(&ehci->caps->hc_capbase));
 
 	dbg_hcs_params(ehci, "reset");
 	dbg_hcc_params(ehci, "reset");
diff --git a/drivers/usb/host/ehci-sh.c b/drivers/usb/host/ehci-sh.c
index 595f70f42b52..86a95bb80a61 100644
--- a/drivers/usb/host/ehci-sh.c
+++ b/drivers/usb/host/ehci-sh.c
@@ -23,7 +23,7 @@ static int ehci_sh_reset(struct usb_hcd *hcd)
 	int ret;
 
 	ehci->caps = hcd->regs;
-	ehci->regs = hcd->regs + HC_LENGTH(ehci_readl(ehci,
+	ehci->regs = hcd->regs + HC_LENGTH(ehci, ehci_readl(ehci,
 		&ehci->caps->hc_capbase));
 
 	dbg_hcs_params(ehci, "reset");
diff --git a/drivers/usb/host/ehci-spear.c b/drivers/usb/host/ehci-spear.c
index 75c00873443d..dbf1e4ef3c17 100644
--- a/drivers/usb/host/ehci-spear.c
+++ b/drivers/usb/host/ehci-spear.c
@@ -38,7 +38,7 @@ static int ehci_spear_setup(struct usb_hcd *hcd)
 
 	/* registers start at offset 0x0 */
 	ehci->caps = hcd->regs;
-	ehci->regs = hcd->regs + HC_LENGTH(ehci_readl(ehci,
+	ehci->regs = hcd->regs + HC_LENGTH(ehci, ehci_readl(ehci,
 				&ehci->caps->hc_capbase));
 	/* cache this readonly data; minimize chip reads */
 	ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params);
diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c
index 7359bcbe4176..02b2bfd49a10 100644
--- a/drivers/usb/host/ehci-tegra.c
+++ b/drivers/usb/host/ehci-tegra.c
@@ -400,7 +400,7 @@ static int tegra_ehci_setup(struct usb_hcd *hcd)
 	/* EHCI registers start at offset 0x100 */
 	ehci->caps = hcd->regs + 0x100;
 	ehci->regs = hcd->regs + 0x100 +
-		HC_LENGTH(readl(&ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, readl(&ehci->caps->hc_capbase));
 
 	dbg_hcs_params(ehci, "reset");
 	dbg_hcc_params(ehci, "reset");
diff --git a/drivers/usb/host/ehci-vt8500.c b/drivers/usb/host/ehci-vt8500.c
index 20168062035a..47d749631bc7 100644
--- a/drivers/usb/host/ehci-vt8500.c
+++ b/drivers/usb/host/ehci-vt8500.c
@@ -121,7 +121,8 @@ static int vt8500_ehci_drv_probe(struct platform_device *pdev)
 
 	ehci = hcd_to_ehci(hcd);
 	ehci->caps = hcd->regs;
-	ehci->regs = hcd->regs + HC_LENGTH(readl(&ehci->caps->hc_capbase));
+	ehci->regs = hcd->regs +
+		HC_LENGTH(ehci, readl(&ehci->caps->hc_capbase));
 
 	dbg_hcs_params(ehci, "reset");
 	dbg_hcc_params(ehci, "reset");
diff --git a/drivers/usb/host/ehci-w90x900.c b/drivers/usb/host/ehci-w90x900.c
index 6bc35809a5c6..52a027aaa370 100644
--- a/drivers/usb/host/ehci-w90x900.c
+++ b/drivers/usb/host/ehci-w90x900.c
@@ -57,7 +57,7 @@ static int __devinit usb_w90x900_probe(const struct hc_driver *driver,
 	ehci = hcd_to_ehci(hcd);
 	ehci->caps = hcd->regs;
 	ehci->regs = hcd->regs +
-		 HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 
 	/* enable PHY 0,1,the regs only apply to w90p910
 	*  0xA4,0xA8 were offsets of PHY0 and PHY1 controller of
diff --git a/drivers/usb/host/ehci-xilinx-of.c b/drivers/usb/host/ehci-xilinx-of.c
index effc58d7af8b..a64d6d66d760 100644
--- a/drivers/usb/host/ehci-xilinx-of.c
+++ b/drivers/usb/host/ehci-xilinx-of.c
@@ -220,7 +220,7 @@ static int __devinit ehci_hcd_xilinx_of_probe(struct platform_device *op)
 	 */
 	ehci->caps = hcd->regs + 0x100;
 	ehci->regs = hcd->regs + 0x100 +
-			HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 
 	/* cache this readonly data; minimize chip reads */
 	ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params);
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index e9ba8e252489..d0792f591590 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -128,6 +128,7 @@ struct ehci_hcd {			/* one per controller */
 	unsigned		has_fsl_port_bug:1; /* FreeScale */
 	unsigned		big_endian_mmio:1;
 	unsigned		big_endian_desc:1;
+	unsigned		big_endian_capbase:1;
 	unsigned		has_amcc_usb23:1;
 	unsigned		need_io_watchdog:1;
 	unsigned		broken_periodic:1;
@@ -605,12 +606,18 @@ ehci_port_speed(struct ehci_hcd *ehci, unsigned int portsc)
  * This attempts to support either format at compile time without a
  * runtime penalty, or both formats with the additional overhead
  * of checking a flag bit.
+ *
+ * ehci_big_endian_capbase is a special quirk for controllers that
+ * implement the HC capability registers as separate registers and not
+ * as fields of a 32-bit register.
  */
 
 #ifdef CONFIG_USB_EHCI_BIG_ENDIAN_MMIO
 #define ehci_big_endian_mmio(e)		((e)->big_endian_mmio)
+#define ehci_big_endian_capbase(e)	((e)->big_endian_capbase)
 #else
 #define ehci_big_endian_mmio(e)		0
+#define ehci_big_endian_capbase(e)	0
 #endif
 
 /*
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 78799432008e..7cc95ee3606b 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -25,10 +25,15 @@
 struct ehci_caps {
 	/* these fields are specified as 8 and 16 bit registers,
 	 * but some hosts can't perform 8 or 16 bit PCI accesses.
+	 * some hosts treat caplength and hciversion as parts of a 32-bit
+	 * register, others treat them as two separate registers, this
+	 * affects the memory map for big endian controllers.
 	 */
 	u32		hc_capbase;
-#define HC_LENGTH(p)		(((p)>>00)&0x00ff)	/* bits 7:0 */
-#define HC_VERSION(p)		(((p)>>16)&0xffff)	/* bits 31:16 */
+#define HC_LENGTH(ehci, p)	(0x00ff&((p) >> /* bits 7:0 / offset 00h */ \
+				(ehci_big_endian_capbase(ehci) ? 24 : 0)))
+#define HC_VERSION(ehci, p)	(0xffff&((p) >> /* bits 31:16 / offset 02h */ \
+				(ehci_big_endian_capbase(ehci) ? 0 : 16)))
 	u32		hcs_params;     /* HCSPARAMS - offset 0x4 */
 #define HCS_DEBUG_PORT(p)	(((p)>>20)&0xf)	/* bits 23:20, debug port? */
 #define HCS_INDICATOR(p)	((p)&(1 << 16))	/* true: has port indicators */
-- 
cgit v1.2.3-71-gd317


From 57a503c61db077b923e23f36050c02166a4a1db2 Mon Sep 17 00:00:00 2001
From: Viresh KUMAR <viresh.kumar@st.com>
Date: Mon, 2 May 2011 18:36:45 +0000
Subject: net/stmmac: Move "#include <linux/platform_device.h>" to
 linux/stmmac.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stmmac.h uses struct platform_device and doesn't include
<linux/platform_device.h>. Whereas drivers/net/stmmac/stmmac.h includes it, but
doesn't directly use it. And so we get following compilation warning while using
this file:
	warning: ‘struct platform_device’ declared inside parameter list

This patch includes <linux/platform_device.h> in linux/stmmac.h and removes it
from drivers/net/stmmac/stmmac.h

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Acked-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/stmmac/stmmac.h | 1 -
 include/linux/stmmac.h      | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/stmmac/stmmac.h b/drivers/net/stmmac/stmmac.h
index 5f06c4706abe..2b076b313622 100644
--- a/drivers/net/stmmac/stmmac.h
+++ b/drivers/net/stmmac/stmmac.h
@@ -21,7 +21,6 @@
 *******************************************************************************/
 
 #define DRV_MODULE_VERSION	"Nov_2010"
-#include <linux/platform_device.h>
 #include <linux/stmmac.h>
 
 #include "common.h"
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index f29197a4b227..9529e49b0385 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -26,6 +26,8 @@
 #ifndef __STMMAC_PLATFORM_DATA
 #define __STMMAC_PLATFORM_DATA
 
+#include <linux/platform_device.h>
+
 /* platform data for platform device structure's platform_data field */
 
 /* Private data for the STM on-board ethernet driver */
-- 
cgit v1.2.3-71-gd317


From e7e7ee2eab2080248084d71fe0a115ab745eb2aa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 4 May 2011 08:42:29 +0200
Subject: perf events: Clean up definitions and initializers, update copyrights

Fix a few inconsistent style bits that were added over the past few
months.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-yv4hwf9yhnzoada8pcpb3a97@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 96 +++++++++++++++++++++-------------------------
 kernel/events/core.c       | 40 +++++++++----------
 2 files changed, 64 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9eec53d97370..207c16976a17 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -2,8 +2,8 @@
  * Performance events:
  *
  *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
- *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
- *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
+ *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
+ *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
  *
  * Data type definitions, declarations, prototypes.
  *
@@ -468,9 +468,9 @@ enum perf_callchain_context {
 	PERF_CONTEXT_MAX		= (__u64)-4095,
 };
 
-#define PERF_FLAG_FD_NO_GROUP	(1U << 0)
-#define PERF_FLAG_FD_OUTPUT	(1U << 1)
-#define PERF_FLAG_PID_CGROUP	(1U << 2) /* pid=cgroup id, per-cpu mode only */
+#define PERF_FLAG_FD_NO_GROUP		(1U << 0)
+#define PERF_FLAG_FD_OUTPUT		(1U << 1)
+#define PERF_FLAG_PID_CGROUP		(1U << 2) /* pid=cgroup id, per-cpu mode only */
 
 #ifdef __KERNEL__
 /*
@@ -484,9 +484,9 @@ enum perf_callchain_context {
 #endif
 
 struct perf_guest_info_callbacks {
-	int (*is_in_guest) (void);
-	int (*is_user_mode) (void);
-	unsigned long (*get_guest_ip) (void);
+	int				(*is_in_guest)(void);
+	int				(*is_user_mode)(void);
+	unsigned long			(*get_guest_ip)(void);
 };
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
@@ -652,19 +652,19 @@ struct pmu {
 	 * Start the transaction, after this ->add() doesn't need to
 	 * do schedulability tests.
 	 */
-	void (*start_txn)	(struct pmu *pmu); /* optional */
+	void (*start_txn)		(struct pmu *pmu); /* optional */
 	/*
 	 * If ->start_txn() disabled the ->add() schedulability test
 	 * then ->commit_txn() is required to perform one. On success
 	 * the transaction is closed. On error the transaction is kept
 	 * open until ->cancel_txn() is called.
 	 */
-	int  (*commit_txn)	(struct pmu *pmu); /* optional */
+	int  (*commit_txn)		(struct pmu *pmu); /* optional */
 	/*
 	 * Will cancel the transaction, assumes ->del() is called
 	 * for each successful ->add() during the transaction.
 	 */
-	void (*cancel_txn)	(struct pmu *pmu); /* optional */
+	void (*cancel_txn)		(struct pmu *pmu); /* optional */
 };
 
 /**
@@ -712,15 +712,15 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
 					struct pt_regs *regs);
 
 enum perf_group_flag {
-	PERF_GROUP_SOFTWARE = 0x1,
+	PERF_GROUP_SOFTWARE		= 0x1,
 };
 
-#define SWEVENT_HLIST_BITS	8
-#define SWEVENT_HLIST_SIZE	(1 << SWEVENT_HLIST_BITS)
+#define SWEVENT_HLIST_BITS		8
+#define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
 
 struct swevent_hlist {
-	struct hlist_head	heads[SWEVENT_HLIST_SIZE];
-	struct rcu_head		rcu_head;
+	struct hlist_head		heads[SWEVENT_HLIST_SIZE];
+	struct rcu_head			rcu_head;
 };
 
 #define PERF_ATTACH_CONTEXT	0x01
@@ -733,13 +733,13 @@ struct swevent_hlist {
  * This is a per-cpu dynamically allocated data structure.
  */
 struct perf_cgroup_info {
-	u64 time;
-	u64 timestamp;
+	u64				time;
+	u64				timestamp;
 };
 
 struct perf_cgroup {
-	struct cgroup_subsys_state css;
-	struct perf_cgroup_info *info;	/* timing info, one per cpu */
+	struct				cgroup_subsys_state css;
+	struct				perf_cgroup_info *info;	/* timing info, one per cpu */
 };
 #endif
 
@@ -923,7 +923,7 @@ struct perf_event_context {
 
 /*
  * Number of contexts where an event can trigger:
- * 	task, softirq, hardirq, nmi.
+ *	task, softirq, hardirq, nmi.
  */
 #define PERF_NR_CONTEXTS	4
 
@@ -1001,8 +1001,7 @@ struct perf_sample_data {
 	struct perf_raw_record		*raw;
 };
 
-static inline
-void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
+static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
 {
 	data->addr = addr;
 	data->raw  = NULL;
@@ -1039,8 +1038,7 @@ extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
 extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
 
 #ifndef perf_arch_fetch_caller_regs
-static inline void
-perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
+static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
 #endif
 
 /*
@@ -1080,8 +1078,7 @@ static inline void perf_event_task_sched_in(struct task_struct *task)
 		__perf_event_task_sched_in(task);
 }
 
-static inline
-void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
+static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
 {
 	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
 
@@ -1099,14 +1096,10 @@ extern void perf_event_fork(struct task_struct *tsk);
 /* Callchains */
 DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
 
-extern void perf_callchain_user(struct perf_callchain_entry *entry,
-				struct pt_regs *regs);
-extern void perf_callchain_kernel(struct perf_callchain_entry *entry,
-				  struct pt_regs *regs);
-
+extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs);
+extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs);
 
-static inline void
-perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
+static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
 {
 	if (entry->nr < PERF_MAX_STACK_DEPTH)
 		entry->ip[entry->nr++] = ip;
@@ -1142,9 +1135,9 @@ extern void perf_tp_event(u64 addr, u64 count, void *record,
 extern void perf_bp_event(struct perf_event *event, void *data);
 
 #ifndef perf_misc_flags
-#define perf_misc_flags(regs)	(user_mode(regs) ? PERF_RECORD_MISC_USER : \
-				 PERF_RECORD_MISC_KERNEL)
-#define perf_instruction_pointer(regs)	instruction_pointer(regs)
+# define perf_misc_flags(regs) \
+		(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
+# define perf_instruction_pointer(regs)	instruction_pointer(regs)
 #endif
 
 extern int perf_output_begin(struct perf_output_handle *handle,
@@ -1179,9 +1172,9 @@ static inline void
 perf_bp_event(struct perf_event *event, void *data)			{ }
 
 static inline int perf_register_guest_info_callbacks
-(struct perf_guest_info_callbacks *callbacks) { return 0; }
+(struct perf_guest_info_callbacks *callbacks)				{ return 0; }
 static inline int perf_unregister_guest_info_callbacks
-(struct perf_guest_info_callbacks *callbacks) { return 0; }
+(struct perf_guest_info_callbacks *callbacks)				{ return 0; }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
@@ -1194,23 +1187,22 @@ static inline void perf_event_disable(struct perf_event *event)		{ }
 static inline void perf_event_task_tick(void)				{ }
 #endif
 
-#define perf_output_put(handle, x) \
-	perf_output_copy((handle), &(x), sizeof(x))
+#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
 
 /*
  * This has to have a higher priority than migration_notifier in sched.c.
  */
-#define perf_cpu_notifier(fn)					\
-do {								\
-	static struct notifier_block fn##_nb __cpuinitdata =	\
-		{ .notifier_call = fn, .priority = CPU_PRI_PERF }; \
-	fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,		\
-		(void *)(unsigned long)smp_processor_id());	\
-	fn(&fn##_nb, (unsigned long)CPU_STARTING,		\
-		(void *)(unsigned long)smp_processor_id());	\
-	fn(&fn##_nb, (unsigned long)CPU_ONLINE,			\
-		(void *)(unsigned long)smp_processor_id());	\
-	register_cpu_notifier(&fn##_nb);			\
+#define perf_cpu_notifier(fn)						\
+do {									\
+	static struct notifier_block fn##_nb __cpuinitdata =		\
+		{ .notifier_call = fn, .priority = CPU_PRI_PERF };	\
+	fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,			\
+		(void *)(unsigned long)smp_processor_id());		\
+	fn(&fn##_nb, (unsigned long)CPU_STARTING,			\
+		(void *)(unsigned long)smp_processor_id());		\
+	fn(&fn##_nb, (unsigned long)CPU_ONLINE,				\
+		(void *)(unsigned long)smp_processor_id());		\
+	register_cpu_notifier(&fn##_nb);				\
 } while (0)
 
 #endif /* __KERNEL__ */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 440bc485bbff..0fc34a370ba4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2,8 +2,8 @@
  * Performance events core code:
  *
  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  *  Copyright  �  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
  *
  * For licensing details see kernel-base/COPYING
@@ -39,10 +39,10 @@
 #include <asm/irq_regs.h>
 
 struct remote_function_call {
-	struct task_struct *p;
-	int (*func)(void *info);
-	void *info;
-	int ret;
+	struct task_struct	*p;
+	int			(*func)(void *info);
+	void			*info;
+	int			ret;
 };
 
 static void remote_function(void *data)
@@ -76,10 +76,10 @@ static int
 task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
 {
 	struct remote_function_call data = {
-		.p = p,
-		.func = func,
-		.info = info,
-		.ret = -ESRCH, /* No such (running) process */
+		.p	= p,
+		.func	= func,
+		.info	= info,
+		.ret	= -ESRCH, /* No such (running) process */
 	};
 
 	if (task_curr(p))
@@ -100,10 +100,10 @@ task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
 static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
 {
 	struct remote_function_call data = {
-		.p = NULL,
-		.func = func,
-		.info = info,
-		.ret = -ENXIO, /* No such CPU */
+		.p	= NULL,
+		.func	= func,
+		.info	= info,
+		.ret	= -ENXIO, /* No such CPU */
 	};
 
 	smp_call_function_single(cpu, remote_function, &data, 1);
@@ -7445,11 +7445,11 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
 }
 
 struct cgroup_subsys perf_subsys = {
-	.name = "perf_event",
-	.subsys_id = perf_subsys_id,
-	.create = perf_cgroup_create,
-	.destroy = perf_cgroup_destroy,
-	.exit = perf_cgroup_exit,
-	.attach = perf_cgroup_attach,
+	.name		= "perf_event",
+	.subsys_id	= perf_subsys_id,
+	.create		= perf_cgroup_create,
+	.destroy	= perf_cgroup_destroy,
+	.exit		= perf_cgroup_exit,
+	.attach		= perf_cgroup_attach,
 };
 #endif /* CONFIG_CGROUP_PERF */
-- 
cgit v1.2.3-71-gd317


From 2d06d8c49afdcc9bb35a85039fa50f0fe35bd40e Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 27 Mar 2011 15:04:46 +0200
Subject: [CPUFREQ] use dynamic debug instead of custom infrastructure

With dynamic debug having gained the capability to report debug messages
also during the boot process, it offers a far superior interface for
debug messages than the custom cpufreq infrastructure. As a first step,
remove the old cpufreq_debug_printk() function and replace it with a call
to the generic pr_debug() function.

How can dynamic debug be used on cpufreq? You need a kernel which has
CONFIG_DYNAMIC_DEBUG enabled.

To enabled debugging during runtime, mount debugfs and

$ echo -n 'module cpufreq +p' > /sys/kernel/debug/dynamic_debug/control

for debugging the complete "cpufreq" module. To achieve the same goal during
boot, append

	ddebug_query="module cpufreq +p"

as a boot parameter to the kernel of your choice.

For more detailled instructions, please see
Documentation/dynamic-debug-howto.txt

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/arm/mach-davinci/cpufreq.c                  |   4 +-
 arch/blackfin/mach-common/dpmc.c                 |   3 -
 arch/ia64/kernel/cpufreq/acpi-cpufreq.c          |  44 +++---
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c       |  45 +++---
 arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c    |   6 +-
 arch/x86/kernel/cpu/cpufreq/gx-suspmod.c         |  21 ++-
 arch/x86/kernel/cpu/cpufreq/longhaul.c           |  11 +-
 arch/x86/kernel/cpu/cpufreq/longrun.c            |  17 +--
 arch/x86/kernel/cpu/cpufreq/p4-clockmod.c        |  10 +-
 arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c        |  47 +++---
 arch/x86/kernel/cpu/cpufreq/powernow-k7.c        |  33 ++---
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c        | 100 ++++++-------
 arch/x86/kernel/cpu/cpufreq/powernow-k8.h        |   2 -
 arch/x86/kernel/cpu/cpufreq/sc520_freq.c         |   6 +-
 arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c |  23 ++-
 arch/x86/kernel/cpu/cpufreq/speedstep-ich.c      |  28 ++--
 arch/x86/kernel/cpu/cpufreq/speedstep-lib.c      |  43 +++---
 arch/x86/kernel/cpu/cpufreq/speedstep-smi.c      |  41 +++---
 drivers/acpi/processor_perflib.c                 |   6 +-
 drivers/cpufreq/Kconfig                          |  13 --
 drivers/cpufreq/cpufreq.c                        | 180 +++++------------------
 drivers/cpufreq/cpufreq_performance.c            |   5 +-
 drivers/cpufreq/cpufreq_powersave.c              |   5 +-
 drivers/cpufreq/cpufreq_userspace.c              |  13 +-
 drivers/cpufreq/freq_table.c                     |  19 +--
 include/linux/cpufreq.h                          |  19 ---
 26 files changed, 270 insertions(+), 474 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-davinci/cpufreq.c b/arch/arm/mach-davinci/cpufreq.c
index 0a95be1512bb..41669ecc1f91 100644
--- a/arch/arm/mach-davinci/cpufreq.c
+++ b/arch/arm/mach-davinci/cpufreq.c
@@ -94,9 +94,7 @@ static int davinci_target(struct cpufreq_policy *policy,
 	if (freqs.old == freqs.new)
 		return ret;
 
-	cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER,
-			dev_driver_string(cpufreq.dev),
-			"transition: %u --> %u\n", freqs.old, freqs.new);
+	dev_dbg(&cpufreq.dev, "transition: %u --> %u\n", freqs.old, freqs.new);
 
 	ret = cpufreq_frequency_table_target(policy, pdata->freq_table,
 						freqs.new, relation, &idx);
diff --git a/arch/blackfin/mach-common/dpmc.c b/arch/blackfin/mach-common/dpmc.c
index 382099fd5561..5e4112e518a9 100644
--- a/arch/blackfin/mach-common/dpmc.c
+++ b/arch/blackfin/mach-common/dpmc.c
@@ -19,9 +19,6 @@
 
 #define DRIVER_NAME "bfin dpmc"
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, DRIVER_NAME, msg)
-
 struct bfin_dpmc_platform_data *pdata;
 
 /**
diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
index 22f61526a8e1..f09b174244d5 100644
--- a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
+++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
@@ -23,8 +23,6 @@
 #include <linux/acpi.h>
 #include <acpi/processor.h>
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
-
 MODULE_AUTHOR("Venkatesh Pallipadi");
 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
 MODULE_LICENSE("GPL");
@@ -47,12 +45,12 @@ processor_set_pstate (
 {
 	s64 retval;
 
-	dprintk("processor_set_pstate\n");
+	pr_debug("processor_set_pstate\n");
 
 	retval = ia64_pal_set_pstate((u64)value);
 
 	if (retval) {
-		dprintk("Failed to set freq to 0x%x, with error 0x%lx\n",
+		pr_debug("Failed to set freq to 0x%x, with error 0x%lx\n",
 		        value, retval);
 		return -ENODEV;
 	}
@@ -67,14 +65,14 @@ processor_get_pstate (
 	u64	pstate_index = 0;
 	s64 	retval;
 
-	dprintk("processor_get_pstate\n");
+	pr_debug("processor_get_pstate\n");
 
 	retval = ia64_pal_get_pstate(&pstate_index,
 	                             PAL_GET_PSTATE_TYPE_INSTANT);
 	*value = (u32) pstate_index;
 
 	if (retval)
-		dprintk("Failed to get current freq with "
+		pr_debug("Failed to get current freq with "
 			"error 0x%lx, idx 0x%x\n", retval, *value);
 
 	return (int)retval;
@@ -90,7 +88,7 @@ extract_clock (
 {
 	unsigned long i;
 
-	dprintk("extract_clock\n");
+	pr_debug("extract_clock\n");
 
 	for (i = 0; i < data->acpi_data.state_count; i++) {
 		if (value == data->acpi_data.states[i].status)
@@ -110,7 +108,7 @@ processor_get_freq (
 	cpumask_t		saved_mask;
 	unsigned long 		clock_freq;
 
-	dprintk("processor_get_freq\n");
+	pr_debug("processor_get_freq\n");
 
 	saved_mask = current->cpus_allowed;
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
@@ -148,7 +146,7 @@ processor_set_freq (
 	cpumask_t		saved_mask;
 	int			retval;
 
-	dprintk("processor_set_freq\n");
+	pr_debug("processor_set_freq\n");
 
 	saved_mask = current->cpus_allowed;
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
@@ -159,16 +157,16 @@ processor_set_freq (
 
 	if (state == data->acpi_data.state) {
 		if (unlikely(data->resume)) {
-			dprintk("Called after resume, resetting to P%d\n", state);
+			pr_debug("Called after resume, resetting to P%d\n", state);
 			data->resume = 0;
 		} else {
-			dprintk("Already at target state (P%d)\n", state);
+			pr_debug("Already at target state (P%d)\n", state);
 			retval = 0;
 			goto migrate_end;
 		}
 	}
 
-	dprintk("Transitioning from P%d to P%d\n",
+	pr_debug("Transitioning from P%d to P%d\n",
 		data->acpi_data.state, state);
 
 	/* cpufreq frequency struct */
@@ -186,7 +184,7 @@ processor_set_freq (
 
 	value = (u32) data->acpi_data.states[state].control;
 
-	dprintk("Transitioning to state: 0x%08x\n", value);
+	pr_debug("Transitioning to state: 0x%08x\n", value);
 
 	ret = processor_set_pstate(value);
 	if (ret) {
@@ -219,7 +217,7 @@ acpi_cpufreq_get (
 {
 	struct cpufreq_acpi_io *data = acpi_io_data[cpu];
 
-	dprintk("acpi_cpufreq_get\n");
+	pr_debug("acpi_cpufreq_get\n");
 
 	return processor_get_freq(data, cpu);
 }
@@ -235,7 +233,7 @@ acpi_cpufreq_target (
 	unsigned int next_state = 0;
 	unsigned int result = 0;
 
-	dprintk("acpi_cpufreq_setpolicy\n");
+	pr_debug("acpi_cpufreq_setpolicy\n");
 
 	result = cpufreq_frequency_table_target(policy,
 			data->freq_table, target_freq, relation, &next_state);
@@ -255,7 +253,7 @@ acpi_cpufreq_verify (
 	unsigned int result = 0;
 	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
 
-	dprintk("acpi_cpufreq_verify\n");
+	pr_debug("acpi_cpufreq_verify\n");
 
 	result = cpufreq_frequency_table_verify(policy,
 			data->freq_table);
@@ -273,7 +271,7 @@ acpi_cpufreq_cpu_init (
 	struct cpufreq_acpi_io	*data;
 	unsigned int		result = 0;
 
-	dprintk("acpi_cpufreq_cpu_init\n");
+	pr_debug("acpi_cpufreq_cpu_init\n");
 
 	data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
 	if (!data)
@@ -288,7 +286,7 @@ acpi_cpufreq_cpu_init (
 
 	/* capability check */
 	if (data->acpi_data.state_count <= 1) {
-		dprintk("No P-States\n");
+		pr_debug("No P-States\n");
 		result = -ENODEV;
 		goto err_unreg;
 	}
@@ -297,7 +295,7 @@ acpi_cpufreq_cpu_init (
 					ACPI_ADR_SPACE_FIXED_HARDWARE) ||
 	    (data->acpi_data.status_register.space_id !=
 					ACPI_ADR_SPACE_FIXED_HARDWARE)) {
-		dprintk("Unsupported address space [%d, %d]\n",
+		pr_debug("Unsupported address space [%d, %d]\n",
 			(u32) (data->acpi_data.control_register.space_id),
 			(u32) (data->acpi_data.status_register.space_id));
 		result = -ENODEV;
@@ -348,7 +346,7 @@ acpi_cpufreq_cpu_init (
 	       "activated.\n", cpu);
 
 	for (i = 0; i < data->acpi_data.state_count; i++)
-		dprintk("     %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",
+		pr_debug("     %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",
 			(i == data->acpi_data.state?'*':' '), i,
 			(u32) data->acpi_data.states[i].core_frequency,
 			(u32) data->acpi_data.states[i].power,
@@ -383,7 +381,7 @@ acpi_cpufreq_cpu_exit (
 {
 	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
 
-	dprintk("acpi_cpufreq_cpu_exit\n");
+	pr_debug("acpi_cpufreq_cpu_exit\n");
 
 	if (data) {
 		cpufreq_frequency_table_put_attr(policy->cpu);
@@ -418,7 +416,7 @@ static struct cpufreq_driver acpi_cpufreq_driver = {
 static int __init
 acpi_cpufreq_init (void)
 {
-	dprintk("acpi_cpufreq_init\n");
+	pr_debug("acpi_cpufreq_init\n");
 
  	return cpufreq_register_driver(&acpi_cpufreq_driver);
 }
@@ -427,7 +425,7 @@ acpi_cpufreq_init (void)
 static void __exit
 acpi_cpufreq_exit (void)
 {
-	dprintk("acpi_cpufreq_exit\n");
+	pr_debug("acpi_cpufreq_exit\n");
 
 	cpufreq_unregister_driver(&acpi_cpufreq_driver);
 	return;
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index a2baafb2fe6d..4e04e1274388 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -47,9 +47,6 @@
 #include <asm/cpufeature.h>
 #include "mperf.h"
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"acpi-cpufreq", msg)
-
 MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
 MODULE_LICENSE("GPL");
@@ -233,7 +230,7 @@ static u32 get_cur_val(const struct cpumask *mask)
 	cmd.mask = mask;
 	drv_read(&cmd);
 
-	dprintk("get_cur_val = %u\n", cmd.val);
+	pr_debug("get_cur_val = %u\n", cmd.val);
 
 	return cmd.val;
 }
@@ -244,7 +241,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 	unsigned int freq;
 	unsigned int cached_freq;
 
-	dprintk("get_cur_freq_on_cpu (%d)\n", cpu);
+	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
 
 	if (unlikely(data == NULL ||
 		     data->acpi_data == NULL || data->freq_table == NULL)) {
@@ -261,7 +258,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 		data->resume = 1;
 	}
 
-	dprintk("cur freq = %u\n", freq);
+	pr_debug("cur freq = %u\n", freq);
 
 	return freq;
 }
@@ -293,7 +290,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	unsigned int i;
 	int result = 0;
 
-	dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
+	pr_debug("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
 
 	if (unlikely(data == NULL ||
 	     data->acpi_data == NULL || data->freq_table == NULL)) {
@@ -313,11 +310,11 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	next_perf_state = data->freq_table[next_state].index;
 	if (perf->state == next_perf_state) {
 		if (unlikely(data->resume)) {
-			dprintk("Called after resume, resetting to P%d\n",
+			pr_debug("Called after resume, resetting to P%d\n",
 				next_perf_state);
 			data->resume = 0;
 		} else {
-			dprintk("Already at target state (P%d)\n",
+			pr_debug("Already at target state (P%d)\n",
 				next_perf_state);
 			goto out;
 		}
@@ -357,7 +354,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 
 	if (acpi_pstate_strict) {
 		if (!check_freqs(cmd.mask, freqs.new, data)) {
-			dprintk("acpi_cpufreq_target failed (%d)\n",
+			pr_debug("acpi_cpufreq_target failed (%d)\n",
 				policy->cpu);
 			result = -EAGAIN;
 			goto out;
@@ -378,7 +375,7 @@ static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
 {
 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
 
-	dprintk("acpi_cpufreq_verify\n");
+	pr_debug("acpi_cpufreq_verify\n");
 
 	return cpufreq_frequency_table_verify(policy, data->freq_table);
 }
@@ -433,11 +430,11 @@ static void free_acpi_perf_data(void)
 static int __init acpi_cpufreq_early_init(void)
 {
 	unsigned int i;
-	dprintk("acpi_cpufreq_early_init\n");
+	pr_debug("acpi_cpufreq_early_init\n");
 
 	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
 	if (!acpi_perf_data) {
-		dprintk("Memory allocation error for acpi_perf_data.\n");
+		pr_debug("Memory allocation error for acpi_perf_data.\n");
 		return -ENOMEM;
 	}
 	for_each_possible_cpu(i) {
@@ -519,7 +516,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	static int blacklisted;
 #endif
 
-	dprintk("acpi_cpufreq_cpu_init\n");
+	pr_debug("acpi_cpufreq_cpu_init\n");
 
 #ifdef CONFIG_SMP
 	if (blacklisted)
@@ -566,7 +563,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	/* capability check */
 	if (perf->state_count <= 1) {
-		dprintk("No P-States\n");
+		pr_debug("No P-States\n");
 		result = -ENODEV;
 		goto err_unreg;
 	}
@@ -578,11 +575,11 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	switch (perf->control_register.space_id) {
 	case ACPI_ADR_SPACE_SYSTEM_IO:
-		dprintk("SYSTEM IO addr space\n");
+		pr_debug("SYSTEM IO addr space\n");
 		data->cpu_feature = SYSTEM_IO_CAPABLE;
 		break;
 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
-		dprintk("HARDWARE addr space\n");
+		pr_debug("HARDWARE addr space\n");
 		if (!check_est_cpu(cpu)) {
 			result = -ENODEV;
 			goto err_unreg;
@@ -590,7 +587,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
 		break;
 	default:
-		dprintk("Unknown addr space %d\n",
+		pr_debug("Unknown addr space %d\n",
 			(u32) (perf->control_register.space_id));
 		result = -ENODEV;
 		goto err_unreg;
@@ -661,9 +658,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	if (cpu_has(c, X86_FEATURE_APERFMPERF))
 		acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
 
-	dprintk("CPU%u - ACPI performance management activated.\n", cpu);
+	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
 	for (i = 0; i < perf->state_count; i++)
-		dprintk("     %cP%d: %d MHz, %d mW, %d uS\n",
+		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
 			(i == perf->state ? '*' : ' '), i,
 			(u32) perf->states[i].core_frequency,
 			(u32) perf->states[i].power,
@@ -694,7 +691,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 {
 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
 
-	dprintk("acpi_cpufreq_cpu_exit\n");
+	pr_debug("acpi_cpufreq_cpu_exit\n");
 
 	if (data) {
 		cpufreq_frequency_table_put_attr(policy->cpu);
@@ -712,7 +709,7 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
 {
 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
 
-	dprintk("acpi_cpufreq_resume\n");
+	pr_debug("acpi_cpufreq_resume\n");
 
 	data->resume = 1;
 
@@ -743,7 +740,7 @@ static int __init acpi_cpufreq_init(void)
 	if (acpi_disabled)
 		return 0;
 
-	dprintk("acpi_cpufreq_init\n");
+	pr_debug("acpi_cpufreq_init\n");
 
 	ret = acpi_cpufreq_early_init();
 	if (ret)
@@ -758,7 +755,7 @@ static int __init acpi_cpufreq_init(void)
 
 static void __exit acpi_cpufreq_exit(void)
 {
-	dprintk("acpi_cpufreq_exit\n");
+	pr_debug("acpi_cpufreq_exit\n");
 
 	cpufreq_unregister_driver(&acpi_cpufreq_driver);
 
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index 141abebc4516..7bac808804f3 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -57,8 +57,6 @@ MODULE_PARM_DESC(min_fsb,
 		"Minimum FSB to use, if not defined: current FSB - 50");
 
 #define PFX "cpufreq-nforce2: "
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"cpufreq-nforce2", msg)
 
 /**
  * nforce2_calc_fsb - calculate FSB
@@ -270,7 +268,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
 	if (freqs.old == freqs.new)
 		return 0;
 
-	dprintk("Old CPU frequency %d kHz, new %d kHz\n",
+	pr_debug("Old CPU frequency %d kHz, new %d kHz\n",
 	       freqs.old, freqs.new);
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
@@ -282,7 +280,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
 		printk(KERN_ERR PFX "Changing FSB to %d failed\n",
 			target_fsb);
 	else
-		dprintk("Changed FSB successfully to %d\n",
+		pr_debug("Changed FSB successfully to %d\n",
 			target_fsb);
 
 	/* Enable IRQs */
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
index 32974cf84232..ffe1f2c92ed3 100644
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
@@ -142,9 +142,6 @@ module_param(max_duration, int, 0444);
 #define POLICY_MIN_DIV 20
 
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"gx-suspmod", msg)
-
 /**
  * we can detect a core multipiler from dir0_lsb
  * from GX1 datasheet p.56,
@@ -191,7 +188,7 @@ static __init struct pci_dev *gx_detect_chipset(void)
 	/* check if CPU is a MediaGX or a Geode. */
 	if ((boot_cpu_data.x86_vendor != X86_VENDOR_NSC) &&
 	    (boot_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) {
-		dprintk("error: no MediaGX/Geode processor found!\n");
+		pr_debug("error: no MediaGX/Geode processor found!\n");
 		return NULL;
 	}
 
@@ -201,7 +198,7 @@ static __init struct pci_dev *gx_detect_chipset(void)
 			return gx_pci;
 	}
 
-	dprintk("error: no supported chipset found!\n");
+	pr_debug("error: no supported chipset found!\n");
 	return NULL;
 }
 
@@ -305,14 +302,14 @@ static void gx_set_cpuspeed(unsigned int khz)
 			break;
 		default:
 			local_irq_restore(flags);
-			dprintk("fatal: try to set unknown chipset.\n");
+			pr_debug("fatal: try to set unknown chipset.\n");
 			return;
 		}
 	} else {
 		suscfg = gx_params->pci_suscfg & ~(SUSMOD);
 		gx_params->off_duration = 0;
 		gx_params->on_duration = 0;
-		dprintk("suspend modulation disabled: cpu runs 100%% speed.\n");
+		pr_debug("suspend modulation disabled: cpu runs 100%% speed.\n");
 	}
 
 	gx_write_byte(PCI_MODOFF, gx_params->off_duration);
@@ -327,9 +324,9 @@ static void gx_set_cpuspeed(unsigned int khz)
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 
-	dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n",
+	pr_debug("suspend modulation w/ duration of ON:%d us, OFF:%d us\n",
 		gx_params->on_duration * 32, gx_params->off_duration * 32);
-	dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new);
+	pr_debug("suspend modulation w/ clock speed: %d kHz.\n", freqs.new);
 }
 
 /****************************************************************
@@ -428,8 +425,8 @@ static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
 	stock_freq = maxfreq;
 	curfreq = gx_get_cpuspeed(0);
 
-	dprintk("cpu max frequency is %d.\n", maxfreq);
-	dprintk("cpu current frequency is %dkHz.\n", curfreq);
+	pr_debug("cpu max frequency is %d.\n", maxfreq);
+	pr_debug("cpu current frequency is %dkHz.\n", curfreq);
 
 	/* setup basic struct for cpufreq API */
 	policy->cpu = 0;
@@ -475,7 +472,7 @@ static int __init cpufreq_gx_init(void)
 	if (max_duration > 0xff)
 		max_duration = 0xff;
 
-	dprintk("geode suspend modulation available.\n");
+	pr_debug("geode suspend modulation available.\n");
 
 	params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL);
 	if (params == NULL)
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index cf48cdd6907d..f47d26e2a135 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -77,9 +77,6 @@ static int scale_voltage;
 static int disable_acpi_c3;
 static int revid_errata;
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"longhaul", msg)
-
 
 /* Clock ratios multiplied by 10 */
 static int mults[32];
@@ -87,7 +84,6 @@ static int eblcr[32];
 static int longhaul_version;
 static struct cpufreq_frequency_table *longhaul_table;
 
-#ifdef CONFIG_CPU_FREQ_DEBUG
 static char speedbuffer[8];
 
 static char *print_speed(int speed)
@@ -106,7 +102,6 @@ static char *print_speed(int speed)
 
 	return speedbuffer;
 }
-#endif
 
 
 static unsigned int calc_speed(int mult)
@@ -275,7 +270,7 @@ static void longhaul_setstate(unsigned int table_index)
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 
-	dprintk("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
+	pr_debug("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
 			fsb, mult/10, mult%10, print_speed(speed/1000));
 retry_loop:
 	preempt_disable();
@@ -460,12 +455,12 @@ static int __cpuinit longhaul_get_ranges(void)
 		break;
 	}
 
-	dprintk("MinMult:%d.%dx MaxMult:%d.%dx\n",
+	pr_debug("MinMult:%d.%dx MaxMult:%d.%dx\n",
 		 minmult/10, minmult%10, maxmult/10, maxmult%10);
 
 	highest_speed = calc_speed(maxmult);
 	lowest_speed = calc_speed(minmult);
-	dprintk("FSB:%dMHz  Lowest speed: %s   Highest speed:%s\n", fsb,
+	pr_debug("FSB:%dMHz  Lowest speed: %s   Highest speed:%s\n", fsb,
 		 print_speed(lowest_speed/1000),
 		 print_speed(highest_speed/1000));
 
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index d9f51367666b..34ea359b370e 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -15,9 +15,6 @@
 #include <asm/msr.h>
 #include <asm/processor.h>
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"longrun", msg)
-
 static struct cpufreq_driver	longrun_driver;
 
 /**
@@ -40,14 +37,14 @@ static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy)
 	u32 msr_lo, msr_hi;
 
 	rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
-	dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi);
+	pr_debug("longrun flags are %x - %x\n", msr_lo, msr_hi);
 	if (msr_lo & 0x01)
 		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
 	else
 		policy->policy = CPUFREQ_POLICY_POWERSAVE;
 
 	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
-	dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi);
+	pr_debug("longrun ctrl is %x - %x\n", msr_lo, msr_hi);
 	msr_lo &= 0x0000007F;
 	msr_hi &= 0x0000007F;
 
@@ -150,7 +147,7 @@ static unsigned int longrun_get(unsigned int cpu)
 		return 0;
 
 	cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
-	dprintk("cpuid eax is %u\n", eax);
+	pr_debug("cpuid eax is %u\n", eax);
 
 	return eax * 1000;
 }
@@ -196,7 +193,7 @@ static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
 		rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
 		*high_freq = msr_lo * 1000; /* to kHz */
 
-		dprintk("longrun table interface told %u - %u kHz\n",
+		pr_debug("longrun table interface told %u - %u kHz\n",
 				*low_freq, *high_freq);
 
 		if (*low_freq > *high_freq)
@@ -207,7 +204,7 @@ static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
 	/* set the upper border to the value determined during TSC init */
 	*high_freq = (cpu_khz / 1000);
 	*high_freq = *high_freq * 1000;
-	dprintk("high frequency is %u kHz\n", *high_freq);
+	pr_debug("high frequency is %u kHz\n", *high_freq);
 
 	/* get current borders */
 	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
@@ -233,7 +230,7 @@ static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
 		/* restore values */
 		wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi);
 	}
-	dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax);
+	pr_debug("percentage is %u %%, freq is %u MHz\n", ecx, eax);
 
 	/* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
 	 * eqals
@@ -249,7 +246,7 @@ static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
 	edx = ((eax - ebx) * 100) / (100 - ecx);
 	*low_freq = edx * 1000; /* back to kHz */
 
-	dprintk("low frequency is %u kHz\n", *low_freq);
+	pr_debug("low frequency is %u kHz\n", *low_freq);
 
 	if (*low_freq > *high_freq)
 		*low_freq = *high_freq;
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 52c93648e492..6be3e0760c26 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -35,8 +35,6 @@
 #include "speedstep-lib.h"
 
 #define PFX	"p4-clockmod: "
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"p4-clockmod", msg)
 
 /*
  * Duty Cycle (3bits), note DC_DISABLE is not specified in
@@ -66,7 +64,7 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
 	rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
 
 	if (l & 0x01)
-		dprintk("CPU#%d currently thermal throttled\n", cpu);
+		pr_debug("CPU#%d currently thermal throttled\n", cpu);
 
 	if (has_N44_O17_errata[cpu] &&
 	    (newstate == DC_25PT || newstate == DC_DFLT))
@@ -74,10 +72,10 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
 
 	rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
 	if (newstate == DC_DISABLE) {
-		dprintk("CPU#%d disabling modulation\n", cpu);
+		pr_debug("CPU#%d disabling modulation\n", cpu);
 		wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h);
 	} else {
-		dprintk("CPU#%d setting duty cycle to %d%%\n",
+		pr_debug("CPU#%d setting duty cycle to %d%%\n",
 			cpu, ((125 * newstate) / 10));
 		/* bits 63 - 5	: reserved
 		 * bit  4	: enable/disable
@@ -217,7 +215,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
 	case 0x0f11:
 	case 0x0f12:
 		has_N44_O17_errata[policy->cpu] = 1;
-		dprintk("has errata -- disabling low frequencies\n");
+		pr_debug("has errata -- disabling low frequencies\n");
 	}
 
 	if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D &&
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index 907c8e637ef5..7b0603eb0129 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -48,9 +48,6 @@
 
 #define BUF_SZ		4
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER,	\
-					     "pcc-cpufreq", msg)
-
 struct pcc_register_resource {
 	u8 descriptor;
 	u16 length;
@@ -152,7 +149,7 @@ static unsigned int pcc_get_freq(unsigned int cpu)
 
 	spin_lock(&pcc_lock);
 
-	dprintk("get: get_freq for CPU %d\n", cpu);
+	pr_debug("get: get_freq for CPU %d\n", cpu);
 	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
 
 	input_buffer = 0x1;
@@ -170,7 +167,7 @@ static unsigned int pcc_get_freq(unsigned int cpu)
 
 	status = ioread16(&pcch_hdr->status);
 	if (status != CMD_COMPLETE) {
-		dprintk("get: FAILED: for CPU %d, status is %d\n",
+		pr_debug("get: FAILED: for CPU %d, status is %d\n",
 			cpu, status);
 		goto cmd_incomplete;
 	}
@@ -178,14 +175,14 @@ static unsigned int pcc_get_freq(unsigned int cpu)
 	curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff))
 			/ 100) * 1000);
 
-	dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is "
-		"0x%x, contains a value of: 0x%x. Speed is: %d MHz\n",
+	pr_debug("get: SUCCESS: (virtual) output_offset for cpu %d is "
+		"0x%p, contains a value of: 0x%x. Speed is: %d MHz\n",
 		cpu, (pcch_virt_addr + pcc_cpu_data->output_offset),
 		output_buffer, curr_freq);
 
 	freq_limit = (output_buffer >> 8) & 0xff;
 	if (freq_limit != 0xff) {
-		dprintk("get: frequency for cpu %d is being temporarily"
+		pr_debug("get: frequency for cpu %d is being temporarily"
 			" capped at %d\n", cpu, curr_freq);
 	}
 
@@ -212,8 +209,8 @@ static int pcc_cpufreq_target(struct cpufreq_policy *policy,
 	cpu = policy->cpu;
 	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
 
-	dprintk("target: CPU %d should go to target freq: %d "
-		"(virtual) input_offset is 0x%x\n",
+	pr_debug("target: CPU %d should go to target freq: %d "
+		"(virtual) input_offset is 0x%p\n",
 		cpu, target_freq,
 		(pcch_virt_addr + pcc_cpu_data->input_offset));
 
@@ -234,14 +231,14 @@ static int pcc_cpufreq_target(struct cpufreq_policy *policy,
 
 	status = ioread16(&pcch_hdr->status);
 	if (status != CMD_COMPLETE) {
-		dprintk("target: FAILED for cpu %d, with status: 0x%x\n",
+		pr_debug("target: FAILED for cpu %d, with status: 0x%x\n",
 			cpu, status);
 		goto cmd_incomplete;
 	}
 	iowrite16(0, &pcch_hdr->status);
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	dprintk("target: was SUCCESSFUL for cpu %d\n", cpu);
+	pr_debug("target: was SUCCESSFUL for cpu %d\n", cpu);
 	spin_unlock(&pcc_lock);
 
 	return 0;
@@ -293,7 +290,7 @@ static int pcc_get_offset(int cpu)
 	memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
 	memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ);
 
-	dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data "
+	pr_debug("pcc_get_offset: for CPU %d: pcc_cpu_data "
 		"input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n",
 		cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset);
 out_free:
@@ -410,7 +407,7 @@ static int __init pcc_cpufreq_probe(void)
 	if (ACPI_SUCCESS(status)) {
 		ret = pcc_cpufreq_do_osc(&osc_handle);
 		if (ret)
-			dprintk("probe: _OSC evaluation did not succeed\n");
+			pr_debug("probe: _OSC evaluation did not succeed\n");
 		/* Firmware's use of _OSC is optional */
 		ret = 0;
 	}
@@ -433,7 +430,7 @@ static int __init pcc_cpufreq_probe(void)
 
 	mem_resource = (struct pcc_memory_resource *)member->buffer.pointer;
 
-	dprintk("probe: mem_resource descriptor: 0x%x,"
+	pr_debug("probe: mem_resource descriptor: 0x%x,"
 		" length: %d, space_id: %d, resource_usage: %d,"
 		" type_specific: %d, granularity: 0x%llx,"
 		" minimum: 0x%llx, maximum: 0x%llx,"
@@ -453,13 +450,13 @@ static int __init pcc_cpufreq_probe(void)
 	pcch_virt_addr = ioremap_nocache(mem_resource->minimum,
 					mem_resource->address_length);
 	if (pcch_virt_addr == NULL) {
-		dprintk("probe: could not map shared mem region\n");
+		pr_debug("probe: could not map shared mem region\n");
 		goto out_free;
 	}
 	pcch_hdr = pcch_virt_addr;
 
-	dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
-	dprintk("probe: PCCH header is at physical address: 0x%llx,"
+	pr_debug("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
+	pr_debug("probe: PCCH header is at physical address: 0x%llx,"
 		" signature: 0x%x, length: %d bytes, major: %d, minor: %d,"
 		" supported features: 0x%x, command field: 0x%x,"
 		" status field: 0x%x, nominal latency: %d us\n",
@@ -469,7 +466,7 @@ static int __init pcc_cpufreq_probe(void)
 		ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status),
 		ioread32(&pcch_hdr->latency));
 
-	dprintk("probe: min time between commands: %d us,"
+	pr_debug("probe: min time between commands: %d us,"
 		" max time between commands: %d us,"
 		" nominal CPU frequency: %d MHz,"
 		" minimum CPU frequency: %d MHz,"
@@ -494,7 +491,7 @@ static int __init pcc_cpufreq_probe(void)
 	doorbell.access_width = 64;
 	doorbell.address = reg_resource->address;
 
-	dprintk("probe: doorbell: space_id is %d, bit_width is %d, "
+	pr_debug("probe: doorbell: space_id is %d, bit_width is %d, "
 		"bit_offset is %d, access_width is %d, address is 0x%llx\n",
 		doorbell.space_id, doorbell.bit_width, doorbell.bit_offset,
 		doorbell.access_width, reg_resource->address);
@@ -515,7 +512,7 @@ static int __init pcc_cpufreq_probe(void)
 
 	doorbell_write = member->integer.value;
 
-	dprintk("probe: doorbell_preserve: 0x%llx,"
+	pr_debug("probe: doorbell_preserve: 0x%llx,"
 		" doorbell_write: 0x%llx\n",
 		doorbell_preserve, doorbell_write);
 
@@ -550,7 +547,7 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	result = pcc_get_offset(cpu);
 	if (result) {
-		dprintk("init: PCCP evaluation failed\n");
+		pr_debug("init: PCCP evaluation failed\n");
 		goto out;
 	}
 
@@ -561,12 +558,12 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	policy->cur = pcc_get_freq(cpu);
 
 	if (!policy->cur) {
-		dprintk("init: Unable to get current CPU frequency\n");
+		pr_debug("init: Unable to get current CPU frequency\n");
 		result = -EINVAL;
 		goto out;
 	}
 
-	dprintk("init: policy->max is %d, policy->min is %d\n",
+	pr_debug("init: policy->max is %d, policy->min is %d\n",
 		policy->max, policy->min);
 out:
 	return result;
@@ -597,7 +594,7 @@ static int __init pcc_cpufreq_init(void)
 
 	ret = pcc_cpufreq_probe();
 	if (ret) {
-		dprintk("pcc_cpufreq_init: PCCH evaluation failed\n");
+		pr_debug("pcc_cpufreq_init: PCCH evaluation failed\n");
 		return ret;
 	}
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 4a45fd6e41ba..d71d9f372359 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -68,7 +68,6 @@ union powernow_acpi_control_t {
 };
 #endif
 
-#ifdef CONFIG_CPU_FREQ_DEBUG
 /* divide by 1000 to get VCore voltage in V. */
 static const int mobile_vid_table[32] = {
     2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
@@ -76,7 +75,6 @@ static const int mobile_vid_table[32] = {
     1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
     1075, 1050, 1025, 1000, 975, 950, 925, 0,
 };
-#endif
 
 /* divide by 10 to get FID. */
 static const int fid_codes[32] = {
@@ -103,9 +101,6 @@ static unsigned int fsb;
 static unsigned int latency;
 static char have_a0;
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"powernow-k7", msg)
-
 static int check_fsb(unsigned int fsbspeed)
 {
 	int delta;
@@ -209,7 +204,7 @@ static int get_ranges(unsigned char *pst)
 		vid = *pst++;
 		powernow_table[j].index |= (vid << 8); /* upper 8 bits */
 
-		dprintk("   FID: 0x%x (%d.%dx [%dMHz])  "
+		pr_debug("   FID: 0x%x (%d.%dx [%dMHz])  "
 			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
 			 fid_codes[fid] % 10, speed/1000, vid,
 			 mobile_vid_table[vid]/1000,
@@ -367,7 +362,7 @@ static int powernow_acpi_init(void)
 		unsigned int speed, speed_mhz;
 
 		pc.val = (unsigned long) state->control;
-		dprintk("acpi:  P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
+		pr_debug("acpi:  P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
 			 i,
 			 (u32) state->core_frequency,
 			 (u32) state->power,
@@ -401,7 +396,7 @@ static int powernow_acpi_init(void)
 				invalidate_entry(i);
 		}
 
-		dprintk("   FID: 0x%x (%d.%dx [%dMHz])  "
+		pr_debug("   FID: 0x%x (%d.%dx [%dMHz])  "
 			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
 			 fid_codes[fid] % 10, speed_mhz, vid,
 			 mobile_vid_table[vid]/1000,
@@ -409,7 +404,7 @@ static int powernow_acpi_init(void)
 
 		if (state->core_frequency != speed_mhz) {
 			state->core_frequency = speed_mhz;
-			dprintk("   Corrected ACPI frequency to %d\n",
+			pr_debug("   Corrected ACPI frequency to %d\n",
 				speed_mhz);
 		}
 
@@ -453,8 +448,8 @@ static int powernow_acpi_init(void)
 
 static void print_pst_entry(struct pst_s *pst, unsigned int j)
 {
-	dprintk("PST:%d (@%p)\n", j, pst);
-	dprintk(" cpuid: 0x%x  fsb: %d  maxFID: 0x%x  startvid: 0x%x\n",
+	pr_debug("PST:%d (@%p)\n", j, pst);
+	pr_debug(" cpuid: 0x%x  fsb: %d  maxFID: 0x%x  startvid: 0x%x\n",
 		pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
 }
 
@@ -474,20 +469,20 @@ static int powernow_decode_bios(int maxfid, int startvid)
 		p = phys_to_virt(i);
 
 		if (memcmp(p, "AMDK7PNOW!",  10) == 0) {
-			dprintk("Found PSB header at %p\n", p);
+			pr_debug("Found PSB header at %p\n", p);
 			psb = (struct psb_s *) p;
-			dprintk("Table version: 0x%x\n", psb->tableversion);
+			pr_debug("Table version: 0x%x\n", psb->tableversion);
 			if (psb->tableversion != 0x12) {
 				printk(KERN_INFO PFX "Sorry, only v1.2 tables"
 						" supported right now\n");
 				return -ENODEV;
 			}
 
-			dprintk("Flags: 0x%x\n", psb->flags);
+			pr_debug("Flags: 0x%x\n", psb->flags);
 			if ((psb->flags & 1) == 0)
-				dprintk("Mobile voltage regulator\n");
+				pr_debug("Mobile voltage regulator\n");
 			else
-				dprintk("Desktop voltage regulator\n");
+				pr_debug("Desktop voltage regulator\n");
 
 			latency = psb->settlingtime;
 			if (latency < 100) {
@@ -497,9 +492,9 @@ static int powernow_decode_bios(int maxfid, int startvid)
 						"Correcting.\n", latency);
 				latency = 100;
 			}
-			dprintk("Settling Time: %d microseconds.\n",
+			pr_debug("Settling Time: %d microseconds.\n",
 					psb->settlingtime);
-			dprintk("Has %d PST tables. (Only dumping ones "
+			pr_debug("Has %d PST tables. (Only dumping ones "
 					"relevant to this CPU).\n",
 					psb->numpst);
 
@@ -650,7 +645,7 @@ static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy)
 		printk(KERN_WARNING PFX "can not determine bus frequency\n");
 		return -EINVAL;
 	}
-	dprintk("FSB: %3dMHz\n", fsb/1000);
+	pr_debug("FSB: %3dMHz\n", fsb/1000);
 
 	if (dmi_check_system(powernow_dmi_table) || acpi_force) {
 		printk(KERN_INFO PFX "PSB/PST known to be broken.  "
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 2368e38327b3..83479b6fb9a1 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -139,7 +139,7 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
 	}
 	do {
 		if (i++ > 10000) {
-			dprintk("detected change pending stuck\n");
+			pr_debug("detected change pending stuck\n");
 			return 1;
 		}
 		rdmsr(MSR_FIDVID_STATUS, lo, hi);
@@ -176,7 +176,7 @@ static void fidvid_msr_init(void)
 	fid = lo & MSR_S_LO_CURRENT_FID;
 	lo = fid | (vid << MSR_C_LO_VID_SHIFT);
 	hi = MSR_C_HI_STP_GNT_BENIGN;
-	dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
+	pr_debug("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
 	wrmsr(MSR_FIDVID_CTL, lo, hi);
 }
 
@@ -196,7 +196,7 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
 	lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
 	lo |= MSR_C_LO_INIT_FID_VID;
 
-	dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
+	pr_debug("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
 		fid, lo, data->plllock * PLL_LOCK_CONVERSION);
 
 	do {
@@ -244,7 +244,7 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
 	lo |= (vid << MSR_C_LO_VID_SHIFT);
 	lo |= MSR_C_LO_INIT_FID_VID;
 
-	dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
+	pr_debug("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
 		vid, lo, STOP_GRANT_5NS);
 
 	do {
@@ -325,7 +325,7 @@ static int transition_fid_vid(struct powernow_k8_data *data,
 		return 1;
 	}
 
-	dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
+	pr_debug("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
 		smp_processor_id(), data->currfid, data->currvid);
 
 	return 0;
@@ -339,7 +339,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
 	u32 savefid = data->currfid;
 	u32 maxvid, lo, rvomult = 1;
 
-	dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
+	pr_debug("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
 		"reqvid 0x%x, rvo 0x%x\n",
 		smp_processor_id(),
 		data->currfid, data->currvid, reqvid, data->rvo);
@@ -349,12 +349,12 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
 	rvosteps *= rvomult;
 	rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
 	maxvid = 0x1f & (maxvid >> 16);
-	dprintk("ph1 maxvid=0x%x\n", maxvid);
+	pr_debug("ph1 maxvid=0x%x\n", maxvid);
 	if (reqvid < maxvid) /* lower numbers are higher voltages */
 		reqvid = maxvid;
 
 	while (data->currvid > reqvid) {
-		dprintk("ph1: curr 0x%x, req vid 0x%x\n",
+		pr_debug("ph1: curr 0x%x, req vid 0x%x\n",
 			data->currvid, reqvid);
 		if (decrease_vid_code_by_step(data, reqvid, data->vidmvs))
 			return 1;
@@ -365,7 +365,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
 		if (data->currvid == maxvid) {
 			rvosteps = 0;
 		} else {
-			dprintk("ph1: changing vid for rvo, req 0x%x\n",
+			pr_debug("ph1: changing vid for rvo, req 0x%x\n",
 				data->currvid - 1);
 			if (decrease_vid_code_by_step(data, data->currvid-1, 1))
 				return 1;
@@ -382,7 +382,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
 		return 1;
 	}
 
-	dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n",
+	pr_debug("ph1 complete, currfid 0x%x, currvid 0x%x\n",
 		data->currfid, data->currvid);
 
 	return 0;
@@ -400,7 +400,7 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
 		return 0;
 	}
 
-	dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
+	pr_debug("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
 		"reqfid 0x%x\n",
 		smp_processor_id(),
 		data->currfid, data->currvid, reqfid);
@@ -457,7 +457,7 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
 		return 1;
 	}
 
-	dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n",
+	pr_debug("ph2 complete, currfid 0x%x, currvid 0x%x\n",
 		data->currfid, data->currvid);
 
 	return 0;
@@ -470,7 +470,7 @@ static int core_voltage_post_transition(struct powernow_k8_data *data,
 	u32 savefid = data->currfid;
 	u32 savereqvid = reqvid;
 
-	dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
+	pr_debug("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
 		smp_processor_id(),
 		data->currfid, data->currvid);
 
@@ -498,17 +498,17 @@ static int core_voltage_post_transition(struct powernow_k8_data *data,
 		return 1;
 
 	if (savereqvid != data->currvid) {
-		dprintk("ph3 failed, currvid 0x%x\n", data->currvid);
+		pr_debug("ph3 failed, currvid 0x%x\n", data->currvid);
 		return 1;
 	}
 
 	if (savefid != data->currfid) {
-		dprintk("ph3 failed, currfid changed 0x%x\n",
+		pr_debug("ph3 failed, currfid changed 0x%x\n",
 			data->currfid);
 		return 1;
 	}
 
-	dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n",
+	pr_debug("ph3 complete, currfid 0x%x, currvid 0x%x\n",
 		data->currfid, data->currvid);
 
 	return 0;
@@ -707,7 +707,7 @@ static int fill_powernow_table(struct powernow_k8_data *data,
 		return -EIO;
 	}
 
-	dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
+	pr_debug("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
 	data->powernow_table = powernow_table;
 	if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
 		print_basics(data);
@@ -717,7 +717,7 @@ static int fill_powernow_table(struct powernow_k8_data *data,
 		    (pst[j].vid == data->currvid))
 			return 0;
 
-	dprintk("currfid/vid do not match PST, ignoring\n");
+	pr_debug("currfid/vid do not match PST, ignoring\n");
 	return 0;
 }
 
@@ -739,36 +739,36 @@ static int find_psb_table(struct powernow_k8_data *data)
 		if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0)
 			continue;
 
-		dprintk("found PSB header at 0x%p\n", psb);
+		pr_debug("found PSB header at 0x%p\n", psb);
 
-		dprintk("table vers: 0x%x\n", psb->tableversion);
+		pr_debug("table vers: 0x%x\n", psb->tableversion);
 		if (psb->tableversion != PSB_VERSION_1_4) {
 			printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n");
 			return -ENODEV;
 		}
 
-		dprintk("flags: 0x%x\n", psb->flags1);
+		pr_debug("flags: 0x%x\n", psb->flags1);
 		if (psb->flags1) {
 			printk(KERN_ERR FW_BUG PFX "unknown flags\n");
 			return -ENODEV;
 		}
 
 		data->vstable = psb->vstable;
-		dprintk("voltage stabilization time: %d(*20us)\n",
+		pr_debug("voltage stabilization time: %d(*20us)\n",
 				data->vstable);
 
-		dprintk("flags2: 0x%x\n", psb->flags2);
+		pr_debug("flags2: 0x%x\n", psb->flags2);
 		data->rvo = psb->flags2 & 3;
 		data->irt = ((psb->flags2) >> 2) & 3;
 		mvs = ((psb->flags2) >> 4) & 3;
 		data->vidmvs = 1 << mvs;
 		data->batps = ((psb->flags2) >> 6) & 3;
 
-		dprintk("ramp voltage offset: %d\n", data->rvo);
-		dprintk("isochronous relief time: %d\n", data->irt);
-		dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
+		pr_debug("ramp voltage offset: %d\n", data->rvo);
+		pr_debug("isochronous relief time: %d\n", data->irt);
+		pr_debug("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
 
-		dprintk("numpst: 0x%x\n", psb->num_tables);
+		pr_debug("numpst: 0x%x\n", psb->num_tables);
 		cpst = psb->num_tables;
 		if ((psb->cpuid == 0x00000fc0) ||
 		    (psb->cpuid == 0x00000fe0)) {
@@ -783,13 +783,13 @@ static int find_psb_table(struct powernow_k8_data *data)
 		}
 
 		data->plllock = psb->plllocktime;
-		dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
-		dprintk("maxfid: 0x%x\n", psb->maxfid);
-		dprintk("maxvid: 0x%x\n", psb->maxvid);
+		pr_debug("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
+		pr_debug("maxfid: 0x%x\n", psb->maxfid);
+		pr_debug("maxvid: 0x%x\n", psb->maxvid);
 		maxvid = psb->maxvid;
 
 		data->numps = psb->numps;
-		dprintk("numpstates: 0x%x\n", data->numps);
+		pr_debug("numpstates: 0x%x\n", data->numps);
 		return fill_powernow_table(data,
 				(struct pst_s *)(psb+1), maxvid);
 	}
@@ -834,13 +834,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 	u64 control, status;
 
 	if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
-		dprintk("register performance failed: bad ACPI data\n");
+		pr_debug("register performance failed: bad ACPI data\n");
 		return -EIO;
 	}
 
 	/* verify the data contained in the ACPI structures */
 	if (data->acpi_data.state_count <= 1) {
-		dprintk("No ACPI P-States\n");
+		pr_debug("No ACPI P-States\n");
 		goto err_out;
 	}
 
@@ -849,7 +849,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 
 	if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
 	    (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
-		dprintk("Invalid control/status registers (%x - %x)\n",
+		pr_debug("Invalid control/status registers (%llx - %llx)\n",
 			control, status);
 		goto err_out;
 	}
@@ -858,7 +858,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
 		* (data->acpi_data.state_count + 1)), GFP_KERNEL);
 	if (!powernow_table) {
-		dprintk("powernow_table memory alloc failure\n");
+		pr_debug("powernow_table memory alloc failure\n");
 		goto err_out;
 	}
 
@@ -928,7 +928,7 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data,
 		}
 		rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
 		if (!(hi & HW_PSTATE_VALID_MASK)) {
-			dprintk("invalid pstate %d, ignoring\n", index);
+			pr_debug("invalid pstate %d, ignoring\n", index);
 			invalidate_entry(powernow_table, i);
 			continue;
 		}
@@ -968,7 +968,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
 			vid = (control >> VID_SHIFT) & VID_MASK;
 		}
 
-		dprintk("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
+		pr_debug("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
 
 		index = fid | (vid<<8);
 		powernow_table[i].index = index;
@@ -978,7 +978,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
 
 		/* verify frequency is OK */
 		if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
-			dprintk("invalid freq %u kHz, ignoring\n", freq);
+			pr_debug("invalid freq %u kHz, ignoring\n", freq);
 			invalidate_entry(powernow_table, i);
 			continue;
 		}
@@ -986,7 +986,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
 		/* verify voltage is OK -
 		 * BIOSs are using "off" to indicate invalid */
 		if (vid == VID_OFF) {
-			dprintk("invalid vid %u, ignoring\n", vid);
+			pr_debug("invalid vid %u, ignoring\n", vid);
 			invalidate_entry(powernow_table, i);
 			continue;
 		}
@@ -1047,7 +1047,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
 	int res, i;
 	struct cpufreq_freqs freqs;
 
-	dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
+	pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
 
 	/* fid/vid correctness check for k8 */
 	/* fid are the lower 8 bits of the index we stored into
@@ -1057,18 +1057,18 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
 	fid = data->powernow_table[index].index & 0xFF;
 	vid = (data->powernow_table[index].index & 0xFF00) >> 8;
 
-	dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
+	pr_debug("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
 
 	if (query_current_values_with_pending_wait(data))
 		return 1;
 
 	if ((data->currvid == vid) && (data->currfid == fid)) {
-		dprintk("target matches current values (fid 0x%x, vid 0x%x)\n",
+		pr_debug("target matches current values (fid 0x%x, vid 0x%x)\n",
 			fid, vid);
 		return 0;
 	}
 
-	dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n",
+	pr_debug("cpu %d, changing to fid 0x%x, vid 0x%x\n",
 		smp_processor_id(), fid, vid);
 	freqs.old = find_khz_freq_from_fid(data->currfid);
 	freqs.new = find_khz_freq_from_fid(fid);
@@ -1096,7 +1096,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data,
 	int res, i;
 	struct cpufreq_freqs freqs;
 
-	dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
+	pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
 
 	/* get MSR index for hardware pstate transition */
 	pstate = index & HW_PSTATE_MASK;
@@ -1156,14 +1156,14 @@ static int powernowk8_target(struct cpufreq_policy *pol,
 		goto err_out;
 	}
 
-	dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
+	pr_debug("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
 		pol->cpu, targfreq, pol->min, pol->max, relation);
 
 	if (query_current_values_with_pending_wait(data))
 		goto err_out;
 
 	if (cpu_family != CPU_HW_PSTATE) {
-		dprintk("targ: curr fid 0x%x, vid 0x%x\n",
+		pr_debug("targ: curr fid 0x%x, vid 0x%x\n",
 		data->currfid, data->currvid);
 
 		if ((checkvid != data->currvid) ||
@@ -1319,7 +1319,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 				data->currpstate);
 	else
 		pol->cur = find_khz_freq_from_fid(data->currfid);
-	dprintk("policy current frequency %d kHz\n", pol->cur);
+	pr_debug("policy current frequency %d kHz\n", pol->cur);
 
 	/* min/max the cpu is capable of */
 	if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) {
@@ -1337,10 +1337,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 	cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
 
 	if (cpu_family == CPU_HW_PSTATE)
-		dprintk("cpu_init done, current pstate 0x%x\n",
+		pr_debug("cpu_init done, current pstate 0x%x\n",
 				data->currpstate);
 	else
-		dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
+		pr_debug("cpu_init done, current fid 0x%x, vid 0x%x\n",
 			data->currfid, data->currvid);
 
 	per_cpu(powernow_data, pol->cpu) = data;
@@ -1586,7 +1586,7 @@ static int __cpuinit powernowk8_init(void)
 /* driver entry point for term */
 static void __exit powernowk8_exit(void)
 {
-	dprintk("exit\n");
+	pr_debug("exit\n");
 
 	if (boot_cpu_has(X86_FEATURE_CPB)) {
 		msrs_free(msrs);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index df3529b1c02d..3744d26cdc2b 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -211,8 +211,6 @@ struct pst_s {
 	u8 vid;
 };
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg)
-
 static int core_voltage_pre_transition(struct powernow_k8_data *data,
 	u32 reqvid, u32 regfid);
 static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid);
diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
index 435a996a613a..1e205e6b1727 100644
--- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
+++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
@@ -29,8 +29,6 @@
 
 static __u8 __iomem *cpuctl;
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"sc520_freq", msg)
 #define PFX "sc520_freq: "
 
 static struct cpufreq_frequency_table sc520_freq_table[] = {
@@ -66,7 +64,7 @@ static void sc520_freq_set_cpu_state(unsigned int state)
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 
-	dprintk("attempting to set frequency to %i kHz\n",
+	pr_debug("attempting to set frequency to %i kHz\n",
 			sc520_freq_table[state].frequency);
 
 	local_irq_disable();
@@ -161,7 +159,7 @@ static int __init sc520_freq_init(void)
 	/* Test if we have the right hardware */
 	if (c->x86_vendor != X86_VENDOR_AMD ||
 	    c->x86 != 4 || c->x86_model != 9) {
-		dprintk("no Elan SC520 processor found!\n");
+		pr_debug("no Elan SC520 processor found!\n");
 		return -ENODEV;
 	}
 	cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 9b1ff37de46a..6ea3455def21 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -29,9 +29,6 @@
 #define PFX		"speedstep-centrino: "
 #define MAINTAINER	"cpufreq@vger.kernel.org"
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
-
 #define INTEL_MSR_RANGE	(0xffff)
 
 struct cpu_id
@@ -244,7 +241,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
 
 	if (model->cpu_id == NULL) {
 		/* No match at all */
-		dprintk("no support for CPU model \"%s\": "
+		pr_debug("no support for CPU model \"%s\": "
 		       "send /proc/cpuinfo to " MAINTAINER "\n",
 		       cpu->x86_model_id);
 		return -ENOENT;
@@ -252,15 +249,15 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
 
 	if (model->op_points == NULL) {
 		/* Matched a non-match */
-		dprintk("no table support for CPU model \"%s\"\n",
+		pr_debug("no table support for CPU model \"%s\"\n",
 		       cpu->x86_model_id);
-		dprintk("try using the acpi-cpufreq driver\n");
+		pr_debug("try using the acpi-cpufreq driver\n");
 		return -ENOENT;
 	}
 
 	per_cpu(centrino_model, policy->cpu) = model;
 
-	dprintk("found \"%s\": max frequency: %dkHz\n",
+	pr_debug("found \"%s\": max frequency: %dkHz\n",
 	       model->model_name, model->max_freq);
 
 	return 0;
@@ -369,7 +366,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
 		per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i];
 
 	if (!per_cpu(centrino_cpu, policy->cpu)) {
-		dprintk("found unsupported CPU with "
+		pr_debug("found unsupported CPU with "
 		"Enhanced SpeedStep: send /proc/cpuinfo to "
 		MAINTAINER "\n");
 		return -ENODEV;
@@ -385,7 +382,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
 
 	if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
 		l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
-		dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
+		pr_debug("trying to enable Enhanced SpeedStep (%x)\n", l);
 		wrmsr(MSR_IA32_MISC_ENABLE, l, h);
 
 		/* check to see if it stuck */
@@ -402,7 +399,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
 						/* 10uS transition latency */
 	policy->cur = freq;
 
-	dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur);
+	pr_debug("centrino_cpu_init: cur=%dkHz\n", policy->cur);
 
 	ret = cpufreq_frequency_table_cpuinfo(policy,
 		per_cpu(centrino_model, policy->cpu)->op_points);
@@ -498,7 +495,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 			good_cpu = j;
 
 		if (good_cpu >= nr_cpu_ids) {
-			dprintk("couldn't limit to CPUs in this domain\n");
+			pr_debug("couldn't limit to CPUs in this domain\n");
 			retval = -EAGAIN;
 			if (first_cpu) {
 				/* We haven't started the transition yet. */
@@ -512,7 +509,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 		if (first_cpu) {
 			rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h);
 			if (msr == (oldmsr & 0xffff)) {
-				dprintk("no change needed - msr was and needs "
+				pr_debug("no change needed - msr was and needs "
 					"to be %x\n", oldmsr);
 				retval = 0;
 				goto out;
@@ -521,7 +518,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 			freqs.old = extract_clock(oldmsr, cpu, 0);
 			freqs.new = extract_clock(msr, cpu, 0);
 
-			dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
+			pr_debug("target=%dkHz old=%d new=%d msr=%04x\n",
 				target_freq, freqs.old, freqs.new, msr);
 
 			for_each_cpu(k, policy->cpus) {
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 561758e95180..a748ce782fee 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -53,10 +53,6 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
 };
 
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"speedstep-ich", msg)
-
-
 /**
  * speedstep_find_register - read the PMBASE address
  *
@@ -80,7 +76,7 @@ static int speedstep_find_register(void)
 		return -ENODEV;
 	}
 
-	dprintk("pmbase is 0x%x\n", pmbase);
+	pr_debug("pmbase is 0x%x\n", pmbase);
 	return 0;
 }
 
@@ -106,13 +102,13 @@ static void speedstep_set_state(unsigned int state)
 	/* read state */
 	value = inb(pmbase + 0x50);
 
-	dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
+	pr_debug("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
 
 	/* write new state */
 	value &= 0xFE;
 	value |= state;
 
-	dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase);
+	pr_debug("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase);
 
 	/* Disable bus master arbitration */
 	pm2_blk = inb(pmbase + 0x20);
@@ -132,10 +128,10 @@ static void speedstep_set_state(unsigned int state)
 	/* Enable IRQs */
 	local_irq_restore(flags);
 
-	dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
+	pr_debug("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
 
 	if (state == (value & 0x1))
-		dprintk("change to %u MHz succeeded\n",
+		pr_debug("change to %u MHz succeeded\n",
 			speedstep_get_frequency(speedstep_processor) / 1000);
 	else
 		printk(KERN_ERR "cpufreq: change failed - I/O error\n");
@@ -165,7 +161,7 @@ static int speedstep_activate(void)
 	pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value);
 	if (!(value & 0x08)) {
 		value |= 0x08;
-		dprintk("activating SpeedStep (TM) registers\n");
+		pr_debug("activating SpeedStep (TM) registers\n");
 		pci_write_config_word(speedstep_chipset_dev, 0x00A0, value);
 	}
 
@@ -218,7 +214,7 @@ static unsigned int speedstep_detect_chipset(void)
 			return 2; /* 2-M */
 
 		if (hostbridge->revision < 5) {
-			dprintk("hostbridge does not support speedstep\n");
+			pr_debug("hostbridge does not support speedstep\n");
 			speedstep_chipset_dev = NULL;
 			pci_dev_put(hostbridge);
 			return 0;
@@ -246,7 +242,7 @@ static unsigned int speedstep_get(unsigned int cpu)
 	if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0)
 		BUG();
 
-	dprintk("detected %u kHz as current frequency\n", speed);
+	pr_debug("detected %u kHz as current frequency\n", speed);
 	return speed;
 }
 
@@ -276,7 +272,7 @@ static int speedstep_target(struct cpufreq_policy *policy,
 	freqs.new = speedstep_freqs[newstate].frequency;
 	freqs.cpu = policy->cpu;
 
-	dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new);
+	pr_debug("transiting from %u to %u kHz\n", freqs.old, freqs.new);
 
 	/* no transition necessary */
 	if (freqs.old == freqs.new)
@@ -351,7 +347,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 	if (!speed)
 		return -EIO;
 
-	dprintk("currently at %s speed setting - %i MHz\n",
+	pr_debug("currently at %s speed setting - %i MHz\n",
 		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
 		? "low" : "high",
 		(speed / 1000));
@@ -405,14 +401,14 @@ static int __init speedstep_init(void)
 	/* detect processor */
 	speedstep_processor = speedstep_detect_processor();
 	if (!speedstep_processor) {
-		dprintk("Intel(R) SpeedStep(TM) capable processor "
+		pr_debug("Intel(R) SpeedStep(TM) capable processor "
 				"not found\n");
 		return -ENODEV;
 	}
 
 	/* detect chipset */
 	if (!speedstep_detect_chipset()) {
-		dprintk("Intel(R) SpeedStep(TM) for this chipset not "
+		pr_debug("Intel(R) SpeedStep(TM) for this chipset not "
 				"(yet) available.\n");
 		return -ENODEV;
 	}
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index a94ec6be69fa..8af2d2fd9d51 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -18,9 +18,6 @@
 #include <asm/tsc.h>
 #include "speedstep-lib.h"
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"speedstep-lib", msg)
-
 #define PFX "speedstep-lib: "
 
 #ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
@@ -75,7 +72,7 @@ static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
 
 	/* read MSR 0x2a - we only need the low 32 bits */
 	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+	pr_debug("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
 	msr_tmp = msr_lo;
 
 	/* decode the FSB */
@@ -89,7 +86,7 @@ static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
 
 	/* decode the multiplier */
 	if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) {
-		dprintk("workaround for early PIIIs\n");
+		pr_debug("workaround for early PIIIs\n");
 		msr_lo &= 0x03c00000;
 	} else
 		msr_lo &= 0x0bc00000;
@@ -100,7 +97,7 @@ static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
 		j++;
 	}
 
-	dprintk("speed is %u\n",
+	pr_debug("speed is %u\n",
 		(msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
 
 	return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100;
@@ -112,7 +109,7 @@ static unsigned int pentiumM_get_frequency(void)
 	u32 msr_lo, msr_tmp;
 
 	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+	pr_debug("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
 
 	/* see table B-2 of 24547212.pdf */
 	if (msr_lo & 0x00040000) {
@@ -122,7 +119,7 @@ static unsigned int pentiumM_get_frequency(void)
 	}
 
 	msr_tmp = (msr_lo >> 22) & 0x1f;
-	dprintk("bits 22-26 are 0x%x, speed is %u\n",
+	pr_debug("bits 22-26 are 0x%x, speed is %u\n",
 			msr_tmp, (msr_tmp * 100 * 1000));
 
 	return msr_tmp * 100 * 1000;
@@ -160,11 +157,11 @@ static unsigned int pentium_core_get_frequency(void)
 	}
 
 	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n",
+	pr_debug("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n",
 			msr_lo, msr_tmp);
 
 	msr_tmp = (msr_lo >> 22) & 0x1f;
-	dprintk("bits 22-26 are 0x%x, speed is %u\n",
+	pr_debug("bits 22-26 are 0x%x, speed is %u\n",
 			msr_tmp, (msr_tmp * fsb));
 
 	ret = (msr_tmp * fsb);
@@ -190,7 +187,7 @@ static unsigned int pentium4_get_frequency(void)
 
 	rdmsr(0x2c, msr_lo, msr_hi);
 
-	dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi);
+	pr_debug("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi);
 
 	/* decode the FSB: see IA-32 Intel (C) Architecture Software
 	 * Developer's Manual, Volume 3: System Prgramming Guide,
@@ -217,7 +214,7 @@ static unsigned int pentium4_get_frequency(void)
 	/* Multiplier. */
 	mult = msr_lo >> 24;
 
-	dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n",
+	pr_debug("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n",
 			fsb, mult, (fsb * mult));
 
 	ret = (fsb * mult);
@@ -257,7 +254,7 @@ unsigned int speedstep_detect_processor(void)
 	struct cpuinfo_x86 *c = &cpu_data(0);
 	u32 ebx, msr_lo, msr_hi;
 
-	dprintk("x86: %x, model: %x\n", c->x86, c->x86_model);
+	pr_debug("x86: %x, model: %x\n", c->x86, c->x86_model);
 
 	if ((c->x86_vendor != X86_VENDOR_INTEL) ||
 	    ((c->x86 != 6) && (c->x86 != 0xF)))
@@ -272,7 +269,7 @@ unsigned int speedstep_detect_processor(void)
 		ebx = cpuid_ebx(0x00000001);
 		ebx &= 0x000000FF;
 
-		dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
+		pr_debug("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
 
 		switch (c->x86_mask) {
 		case 4:
@@ -327,7 +324,7 @@ unsigned int speedstep_detect_processor(void)
 		/* cpuid_ebx(1) is 0x04 for desktop PIII,
 		 * 0x06 for mobile PIII-M */
 		ebx = cpuid_ebx(0x00000001);
-		dprintk("ebx is %x\n", ebx);
+		pr_debug("ebx is %x\n", ebx);
 
 		ebx &= 0x000000FF;
 
@@ -344,7 +341,7 @@ unsigned int speedstep_detect_processor(void)
 		/* all mobile PIII Coppermines have FSB 100 MHz
 		 * ==> sort out a few desktop PIIIs. */
 		rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi);
-		dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n",
+		pr_debug("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n",
 				msr_lo, msr_hi);
 		msr_lo &= 0x00c0000;
 		if (msr_lo != 0x0080000)
@@ -357,12 +354,12 @@ unsigned int speedstep_detect_processor(void)
 		 * bit 56 or 57 is set
 		 */
 		rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi);
-		dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n",
+		pr_debug("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n",
 				msr_lo, msr_hi);
 		if ((msr_hi & (1<<18)) &&
 		    (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
 			if (c->x86_mask == 0x01) {
-				dprintk("early PIII version\n");
+				pr_debug("early PIII version\n");
 				return SPEEDSTEP_CPU_PIII_C_EARLY;
 			} else
 				return SPEEDSTEP_CPU_PIII_C;
@@ -393,14 +390,14 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 	if ((!processor) || (!low_speed) || (!high_speed) || (!set_state))
 		return -EINVAL;
 
-	dprintk("trying to determine both speeds\n");
+	pr_debug("trying to determine both speeds\n");
 
 	/* get current speed */
 	prev_speed = speedstep_get_frequency(processor);
 	if (!prev_speed)
 		return -EIO;
 
-	dprintk("previous speed is %u\n", prev_speed);
+	pr_debug("previous speed is %u\n", prev_speed);
 
 	local_irq_save(flags);
 
@@ -412,7 +409,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 		goto out;
 	}
 
-	dprintk("low speed is %u\n", *low_speed);
+	pr_debug("low speed is %u\n", *low_speed);
 
 	/* start latency measurement */
 	if (transition_latency)
@@ -431,7 +428,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 		goto out;
 	}
 
-	dprintk("high speed is %u\n", *high_speed);
+	pr_debug("high speed is %u\n", *high_speed);
 
 	if (*low_speed == *high_speed) {
 		ret = -ENODEV;
@@ -445,7 +442,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 	if (transition_latency) {
 		*transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC +
 			tv2.tv_usec - tv1.tv_usec;
-		dprintk("transition latency is %u uSec\n", *transition_latency);
+		pr_debug("transition latency is %u uSec\n", *transition_latency);
 
 		/* convert uSec to nSec and add 20% for safety reasons */
 		*transition_latency *= 1200;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index 91bc25b67bc1..c76ead3490bf 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -55,9 +55,6 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
  * of DMA activity going on? */
 #define SMI_TRIES 5
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"speedstep-smi", msg)
-
 /**
  * speedstep_smi_ownership
  */
@@ -70,7 +67,7 @@ static int speedstep_smi_ownership(void)
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 	magic = virt_to_phys(magic_data);
 
-	dprintk("trying to obtain ownership with command %x at port %x\n",
+	pr_debug("trying to obtain ownership with command %x at port %x\n",
 			command, smi_port);
 
 	__asm__ __volatile__(
@@ -85,7 +82,7 @@ static int speedstep_smi_ownership(void)
 		: "memory"
 	);
 
-	dprintk("result is %x\n", result);
+	pr_debug("result is %x\n", result);
 
 	return result;
 }
@@ -106,13 +103,13 @@ static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high)
 	u32 function = GET_SPEEDSTEP_FREQS;
 
 	if (!(ist_info.event & 0xFFFF)) {
-		dprintk("bug #1422 -- can't read freqs from BIOS\n");
+		pr_debug("bug #1422 -- can't read freqs from BIOS\n");
 		return -ENODEV;
 	}
 
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 
-	dprintk("trying to determine frequencies with command %x at port %x\n",
+	pr_debug("trying to determine frequencies with command %x at port %x\n",
 			command, smi_port);
 
 	__asm__ __volatile__(
@@ -129,7 +126,7 @@ static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high)
 		  "d" (smi_port), "S" (0), "D" (0)
 	);
 
-	dprintk("result %x, low_freq %u, high_freq %u\n",
+	pr_debug("result %x, low_freq %u, high_freq %u\n",
 			result, low_mhz, high_mhz);
 
 	/* abort if results are obviously incorrect... */
@@ -154,7 +151,7 @@ static int speedstep_get_state(void)
 
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 
-	dprintk("trying to determine current setting with command %x "
+	pr_debug("trying to determine current setting with command %x "
 		"at port %x\n", command, smi_port);
 
 	__asm__ __volatile__(
@@ -168,7 +165,7 @@ static int speedstep_get_state(void)
 		  "d" (smi_port), "S" (0), "D" (0)
 	);
 
-	dprintk("state is %x, result is %x\n", state, result);
+	pr_debug("state is %x, result is %x\n", state, result);
 
 	return state & 1;
 }
@@ -194,13 +191,13 @@ static void speedstep_set_state(unsigned int state)
 
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 
-	dprintk("trying to set frequency to state %u "
+	pr_debug("trying to set frequency to state %u "
 		"with command %x at port %x\n",
 		state, command, smi_port);
 
 	do {
 		if (retry) {
-			dprintk("retry %u, previous result %u, waiting...\n",
+			pr_debug("retry %u, previous result %u, waiting...\n",
 					retry, result);
 			mdelay(retry * 50);
 		}
@@ -221,7 +218,7 @@ static void speedstep_set_state(unsigned int state)
 	local_irq_restore(flags);
 
 	if (new_state == state)
-		dprintk("change to %u MHz succeeded after %u tries "
+		pr_debug("change to %u MHz succeeded after %u tries "
 			"with result %u\n",
 			(speedstep_freqs[new_state].frequency / 1000),
 			retry, result);
@@ -292,7 +289,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 
 	result = speedstep_smi_ownership();
 	if (result) {
-		dprintk("fails in acquiring ownership of a SMI interface.\n");
+		pr_debug("fails in acquiring ownership of a SMI interface.\n");
 		return -EINVAL;
 	}
 
@@ -304,7 +301,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 	if (result) {
 		/* fall back to speedstep_lib.c dection mechanism:
 		 * try both states out */
-		dprintk("could not detect low and high frequencies "
+		pr_debug("could not detect low and high frequencies "
 				"by SMI call.\n");
 		result = speedstep_get_freqs(speedstep_processor,
 				low, high,
@@ -312,18 +309,18 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 				&speedstep_set_state);
 
 		if (result) {
-			dprintk("could not detect two different speeds"
+			pr_debug("could not detect two different speeds"
 					" -- aborting.\n");
 			return result;
 		} else
-			dprintk("workaround worked.\n");
+			pr_debug("workaround worked.\n");
 	}
 
 	/* get current speed setting */
 	state = speedstep_get_state();
 	speed = speedstep_freqs[state].frequency;
 
-	dprintk("currently at %s speed setting - %i MHz\n",
+	pr_debug("currently at %s speed setting - %i MHz\n",
 		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
 		? "low" : "high",
 		(speed / 1000));
@@ -360,7 +357,7 @@ static int speedstep_resume(struct cpufreq_policy *policy)
 	int result = speedstep_smi_ownership();
 
 	if (result)
-		dprintk("fails in re-acquiring ownership of a SMI interface.\n");
+		pr_debug("fails in re-acquiring ownership of a SMI interface.\n");
 
 	return result;
 }
@@ -403,12 +400,12 @@ static int __init speedstep_init(void)
 	}
 
 	if (!speedstep_processor) {
-		dprintk("No supported Intel CPU detected.\n");
+		pr_debug("No supported Intel CPU detected.\n");
 		return -ENODEV;
 	}
 
-	dprintk("signature:0x%.8lx, command:0x%.8lx, "
-		"event:0x%.8lx, perf_level:0x%.8lx.\n",
+	pr_debug("signature:0x%.8ulx, command:0x%.8ulx, "
+		"event:0x%.8ulx, perf_level:0x%.8ulx.\n",
 		ist_info.signature, ist_info.command,
 		ist_info.event, ist_info.perf_level);
 
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 3a73a93596e8..85b32376dad7 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -49,10 +49,6 @@ ACPI_MODULE_NAME("processor_perflib");
 
 static DEFINE_MUTEX(performance_mutex);
 
-/* Use cpufreq debug layer for _PPC changes. */
-#define cpufreq_printk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
-						"cpufreq-core", msg)
-
 /*
  * _PPC support is implemented as a CPUfreq policy notifier:
  * This means each time a CPUfreq driver registered also with
@@ -145,7 +141,7 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
 		return -ENODEV;
 	}
 
-	cpufreq_printk("CPU %d: _PPC is %d - frequency %s limited\n", pr->id,
+	pr_debug("CPU %d: _PPC is %d - frequency %s limited\n", pr->id,
 		       (int)ppc, ppc ? "" : "not");
 
 	pr->performance_platform_limit = (int)ppc;
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index ca8ee8093d6c..b78baa547ef5 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -18,19 +18,6 @@ if CPU_FREQ
 config CPU_FREQ_TABLE
 	tristate
 
-config CPU_FREQ_DEBUG
-	bool "Enable CPUfreq debugging"
-	help
-	  Say Y here to enable CPUfreq subsystem (including drivers)
-	  debugging. You will need to activate it via the kernel
-	  command line by passing
-	     cpufreq.debug=<value>
-
-	  To get <value>, add 
-	       1 to activate CPUfreq core debugging,
-	       2 to activate CPUfreq drivers debugging, and
-	       4 to activate CPUfreq governor debugging
-
 config CPU_FREQ_STAT
 	tristate "CPU frequency translation statistics"
 	select CPU_FREQ_TABLE
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 7c10f96c5ae9..1e08af43ae72 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -32,9 +32,6 @@
 
 #include <trace/events/power.h>
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
-						"cpufreq-core", msg)
-
 /**
  * The "cpufreq driver" - the arch- or hardware-dependent low
  * level driver of CPUFreq support, and its spinlock. This lock
@@ -180,93 +177,6 @@ void cpufreq_cpu_put(struct cpufreq_policy *data)
 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
 
 
-/*********************************************************************
- *                     UNIFIED DEBUG HELPERS                         *
- *********************************************************************/
-#ifdef CONFIG_CPU_FREQ_DEBUG
-
-/* what part(s) of the CPUfreq subsystem are debugged? */
-static unsigned int debug;
-
-/* is the debug output ratelimit'ed using printk_ratelimit? User can
- * set or modify this value.
- */
-static unsigned int debug_ratelimit = 1;
-
-/* is the printk_ratelimit'ing enabled? It's enabled after a successful
- * loading of a cpufreq driver, temporarily disabled when a new policy
- * is set, and disabled upon cpufreq driver removal
- */
-static unsigned int disable_ratelimit = 1;
-static DEFINE_SPINLOCK(disable_ratelimit_lock);
-
-static void cpufreq_debug_enable_ratelimit(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&disable_ratelimit_lock, flags);
-	if (disable_ratelimit)
-		disable_ratelimit--;
-	spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
-}
-
-static void cpufreq_debug_disable_ratelimit(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&disable_ratelimit_lock, flags);
-	disable_ratelimit++;
-	spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
-}
-
-void cpufreq_debug_printk(unsigned int type, const char *prefix,
-			const char *fmt, ...)
-{
-	char s[256];
-	va_list args;
-	unsigned int len;
-	unsigned long flags;
-
-	WARN_ON(!prefix);
-	if (type & debug) {
-		spin_lock_irqsave(&disable_ratelimit_lock, flags);
-		if (!disable_ratelimit && debug_ratelimit
-					&& !printk_ratelimit()) {
-			spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
-			return;
-		}
-		spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
-
-		len = snprintf(s, 256, KERN_DEBUG "%s: ", prefix);
-
-		va_start(args, fmt);
-		len += vsnprintf(&s[len], (256 - len), fmt, args);
-		va_end(args);
-
-		printk(s);
-
-		WARN_ON(len < 5);
-	}
-}
-EXPORT_SYMBOL(cpufreq_debug_printk);
-
-
-module_param(debug, uint, 0644);
-MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core,"
-			" 2 to debug drivers, and 4 to debug governors.");
-
-module_param(debug_ratelimit, uint, 0644);
-MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:"
-					" set to 0 to disable ratelimiting.");
-
-#else /* !CONFIG_CPU_FREQ_DEBUG */
-
-static inline void cpufreq_debug_enable_ratelimit(void) { return; }
-static inline void cpufreq_debug_disable_ratelimit(void) { return; }
-
-#endif /* CONFIG_CPU_FREQ_DEBUG */
-
-
 /*********************************************************************
  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
  *********************************************************************/
@@ -291,7 +201,7 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
 	if (!l_p_j_ref_freq) {
 		l_p_j_ref = loops_per_jiffy;
 		l_p_j_ref_freq = ci->old;
-		dprintk("saving %lu as reference value for loops_per_jiffy; "
+		pr_debug("saving %lu as reference value for loops_per_jiffy; "
 			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
 	}
 	if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
@@ -299,7 +209,7 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
 	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
 		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
 								ci->new);
-		dprintk("scaling loops_per_jiffy to %lu "
+		pr_debug("scaling loops_per_jiffy to %lu "
 			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
 	}
 }
@@ -326,7 +236,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 	BUG_ON(irqs_disabled());
 
 	freqs->flags = cpufreq_driver->flags;
-	dprintk("notification %u of frequency transition to %u kHz\n",
+	pr_debug("notification %u of frequency transition to %u kHz\n",
 		state, freqs->new);
 
 	policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
@@ -340,7 +250,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
 			if ((policy) && (policy->cpu == freqs->cpu) &&
 			    (policy->cur) && (policy->cur != freqs->old)) {
-				dprintk("Warning: CPU frequency is"
+				pr_debug("Warning: CPU frequency is"
 					" %u, cpufreq assumed %u kHz.\n",
 					freqs->old, policy->cur);
 				freqs->old = policy->cur;
@@ -353,7 +263,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 
 	case CPUFREQ_POSTCHANGE:
 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
-		dprintk("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
+		pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
 			(unsigned long)freqs->cpu);
 		trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
 		trace_cpu_frequency(freqs->new, freqs->cpu);
@@ -753,7 +663,7 @@ no_policy:
 static void cpufreq_sysfs_release(struct kobject *kobj)
 {
 	struct cpufreq_policy *policy = to_policy(kobj);
-	dprintk("last reference is dropped\n");
+	pr_debug("last reference is dropped\n");
 	complete(&policy->kobj_unregister);
 }
 
@@ -788,7 +698,7 @@ static int cpufreq_add_dev_policy(unsigned int cpu,
 	gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
 	if (gov) {
 		policy->governor = gov;
-		dprintk("Restoring governor %s for cpu %d\n",
+		pr_debug("Restoring governor %s for cpu %d\n",
 		       policy->governor->name, cpu);
 	}
 #endif
@@ -824,7 +734,7 @@ static int cpufreq_add_dev_policy(unsigned int cpu,
 			per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-			dprintk("CPU already managed, adding link\n");
+			pr_debug("CPU already managed, adding link\n");
 			ret = sysfs_create_link(&sys_dev->kobj,
 						&managed_policy->kobj,
 						"cpufreq");
@@ -865,7 +775,7 @@ static int cpufreq_add_dev_symlink(unsigned int cpu,
 		if (!cpu_online(j))
 			continue;
 
-		dprintk("CPU %u already managed, adding link\n", j);
+		pr_debug("CPU %u already managed, adding link\n", j);
 		managed_policy = cpufreq_cpu_get(cpu);
 		cpu_sys_dev = get_cpu_sysdev(j);
 		ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
@@ -941,7 +851,7 @@ static int cpufreq_add_dev_interface(unsigned int cpu,
 	policy->user_policy.governor = policy->governor;
 
 	if (ret) {
-		dprintk("setting policy failed\n");
+		pr_debug("setting policy failed\n");
 		if (cpufreq_driver->exit)
 			cpufreq_driver->exit(policy);
 	}
@@ -977,8 +887,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 	if (cpu_is_offline(cpu))
 		return 0;
 
-	cpufreq_debug_disable_ratelimit();
-	dprintk("adding CPU %u\n", cpu);
+	pr_debug("adding CPU %u\n", cpu);
 
 #ifdef CONFIG_SMP
 	/* check whether a different CPU already registered this
@@ -986,7 +895,6 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 	policy = cpufreq_cpu_get(cpu);
 	if (unlikely(policy)) {
 		cpufreq_cpu_put(policy);
-		cpufreq_debug_enable_ratelimit();
 		return 0;
 	}
 #endif
@@ -1037,7 +945,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 	 */
 	ret = cpufreq_driver->init(policy);
 	if (ret) {
-		dprintk("initialization failed\n");
+		pr_debug("initialization failed\n");
 		goto err_unlock_policy;
 	}
 	policy->user_policy.min = policy->min;
@@ -1063,8 +971,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 
 	kobject_uevent(&policy->kobj, KOBJ_ADD);
 	module_put(cpufreq_driver->owner);
-	dprintk("initialization complete\n");
-	cpufreq_debug_enable_ratelimit();
+	pr_debug("initialization complete\n");
 
 	return 0;
 
@@ -1088,7 +995,6 @@ err_free_policy:
 nomem_out:
 	module_put(cpufreq_driver->owner);
 module_out:
-	cpufreq_debug_enable_ratelimit();
 	return ret;
 }
 
@@ -1112,15 +1018,13 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 	unsigned int j;
 #endif
 
-	cpufreq_debug_disable_ratelimit();
-	dprintk("unregistering CPU %u\n", cpu);
+	pr_debug("unregistering CPU %u\n", cpu);
 
 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
 	data = per_cpu(cpufreq_cpu_data, cpu);
 
 	if (!data) {
 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
-		cpufreq_debug_enable_ratelimit();
 		unlock_policy_rwsem_write(cpu);
 		return -EINVAL;
 	}
@@ -1132,12 +1036,11 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 	 * only need to unlink, put and exit
 	 */
 	if (unlikely(cpu != data->cpu)) {
-		dprintk("removing link\n");
+		pr_debug("removing link\n");
 		cpumask_clear_cpu(cpu, data->cpus);
 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 		kobj = &sys_dev->kobj;
 		cpufreq_cpu_put(data);
-		cpufreq_debug_enable_ratelimit();
 		unlock_policy_rwsem_write(cpu);
 		sysfs_remove_link(kobj, "cpufreq");
 		return 0;
@@ -1170,7 +1073,7 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 		for_each_cpu(j, data->cpus) {
 			if (j == cpu)
 				continue;
-			dprintk("removing link for cpu %u\n", j);
+			pr_debug("removing link for cpu %u\n", j);
 #ifdef CONFIG_HOTPLUG_CPU
 			strncpy(per_cpu(cpufreq_cpu_governor, j),
 				data->governor->name, CPUFREQ_NAME_LEN);
@@ -1199,17 +1102,15 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 	 * not referenced anymore by anybody before we proceed with
 	 * unloading.
 	 */
-	dprintk("waiting for dropping of refcount\n");
+	pr_debug("waiting for dropping of refcount\n");
 	wait_for_completion(cmp);
-	dprintk("wait complete\n");
+	pr_debug("wait complete\n");
 
 	lock_policy_rwsem_write(cpu);
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(data);
 	unlock_policy_rwsem_write(cpu);
 
-	cpufreq_debug_enable_ratelimit();
-
 #ifdef CONFIG_HOTPLUG_CPU
 	/* when the CPU which is the parent of the kobj is hotplugged
 	 * offline, check for siblings, and create cpufreq sysfs interface
@@ -1255,7 +1156,7 @@ static void handle_update(struct work_struct *work)
 	struct cpufreq_policy *policy =
 		container_of(work, struct cpufreq_policy, update);
 	unsigned int cpu = policy->cpu;
-	dprintk("handle_update for cpu %u called\n", cpu);
+	pr_debug("handle_update for cpu %u called\n", cpu);
 	cpufreq_update_policy(cpu);
 }
 
@@ -1273,7 +1174,7 @@ static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
 {
 	struct cpufreq_freqs freqs;
 
-	dprintk("Warning: CPU frequency out of sync: cpufreq and timing "
+	pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
 	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
 
 	freqs.cpu = cpu;
@@ -1376,7 +1277,7 @@ static int cpufreq_bp_suspend(void)
 	int cpu = smp_processor_id();
 	struct cpufreq_policy *cpu_policy;
 
-	dprintk("suspending cpu %u\n", cpu);
+	pr_debug("suspending cpu %u\n", cpu);
 
 	/* If there's no policy for the boot CPU, we have nothing to do. */
 	cpu_policy = cpufreq_cpu_get(cpu);
@@ -1414,7 +1315,7 @@ static void cpufreq_bp_resume(void)
 	int cpu = smp_processor_id();
 	struct cpufreq_policy *cpu_policy;
 
-	dprintk("resuming cpu %u\n", cpu);
+	pr_debug("resuming cpu %u\n", cpu);
 
 	/* If there's no policy for the boot CPU, we have nothing to do. */
 	cpu_policy = cpufreq_cpu_get(cpu);
@@ -1526,7 +1427,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
 {
 	int retval = -EINVAL;
 
-	dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
+	pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
 		target_freq, relation);
 	if (cpu_online(policy->cpu) && cpufreq_driver->target)
 		retval = cpufreq_driver->target(policy, target_freq, relation);
@@ -1612,7 +1513,7 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
 	if (!try_module_get(policy->governor->owner))
 		return -EINVAL;
 
-	dprintk("__cpufreq_governor for CPU %u, event %u\n",
+	pr_debug("__cpufreq_governor for CPU %u, event %u\n",
 						policy->cpu, event);
 	ret = policy->governor->governor(policy, event);
 
@@ -1713,8 +1614,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
 {
 	int ret = 0;
 
-	cpufreq_debug_disable_ratelimit();
-	dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
+	pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
 		policy->min, policy->max);
 
 	memcpy(&policy->cpuinfo, &data->cpuinfo,
@@ -1751,19 +1651,19 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
 	data->min = policy->min;
 	data->max = policy->max;
 
-	dprintk("new min and max freqs are %u - %u kHz\n",
+	pr_debug("new min and max freqs are %u - %u kHz\n",
 					data->min, data->max);
 
 	if (cpufreq_driver->setpolicy) {
 		data->policy = policy->policy;
-		dprintk("setting range\n");
+		pr_debug("setting range\n");
 		ret = cpufreq_driver->setpolicy(policy);
 	} else {
 		if (policy->governor != data->governor) {
 			/* save old, working values */
 			struct cpufreq_governor *old_gov = data->governor;
 
-			dprintk("governor switch\n");
+			pr_debug("governor switch\n");
 
 			/* end old governor */
 			if (data->governor)
@@ -1773,7 +1673,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
 			data->governor = policy->governor;
 			if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
 				/* new governor failed, so re-start old one */
-				dprintk("starting governor %s failed\n",
+				pr_debug("starting governor %s failed\n",
 							data->governor->name);
 				if (old_gov) {
 					data->governor = old_gov;
@@ -1785,12 +1685,11 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
 			}
 			/* might be a policy change, too, so fall through */
 		}
-		dprintk("governor: change or update limits\n");
+		pr_debug("governor: change or update limits\n");
 		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
 	}
 
 error_out:
-	cpufreq_debug_enable_ratelimit();
 	return ret;
 }
 
@@ -1817,7 +1716,7 @@ int cpufreq_update_policy(unsigned int cpu)
 		goto fail;
 	}
 
-	dprintk("updating policy for CPU %u\n", cpu);
+	pr_debug("updating policy for CPU %u\n", cpu);
 	memcpy(&policy, data, sizeof(struct cpufreq_policy));
 	policy.min = data->user_policy.min;
 	policy.max = data->user_policy.max;
@@ -1829,7 +1728,7 @@ int cpufreq_update_policy(unsigned int cpu)
 	if (cpufreq_driver->get) {
 		policy.cur = cpufreq_driver->get(cpu);
 		if (!data->cur) {
-			dprintk("Driver did not initialize current freq");
+			pr_debug("Driver did not initialize current freq");
 			data->cur = policy.cur;
 		} else {
 			if (data->cur != policy.cur)
@@ -1905,7 +1804,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 	    ((!driver_data->setpolicy) && (!driver_data->target)))
 		return -EINVAL;
 
-	dprintk("trying to register driver %s\n", driver_data->name);
+	pr_debug("trying to register driver %s\n", driver_data->name);
 
 	if (driver_data->setpolicy)
 		driver_data->flags |= CPUFREQ_CONST_LOOPS;
@@ -1936,15 +1835,14 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 
 		/* if all ->init() calls failed, unregister */
 		if (ret) {
-			dprintk("no CPU initialized for driver %s\n",
+			pr_debug("no CPU initialized for driver %s\n",
 							driver_data->name);
 			goto err_sysdev_unreg;
 		}
 	}
 
 	register_hotcpu_notifier(&cpufreq_cpu_notifier);
-	dprintk("driver %s up and running\n", driver_data->name);
-	cpufreq_debug_enable_ratelimit();
+	pr_debug("driver %s up and running\n", driver_data->name);
 
 	return 0;
 err_sysdev_unreg:
@@ -1971,14 +1869,10 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
 {
 	unsigned long flags;
 
-	cpufreq_debug_disable_ratelimit();
-
-	if (!cpufreq_driver || (driver != cpufreq_driver)) {
-		cpufreq_debug_enable_ratelimit();
+	if (!cpufreq_driver || (driver != cpufreq_driver))
 		return -EINVAL;
-	}
 
-	dprintk("unregistering driver %s\n", driver->name);
+	pr_debug("unregistering driver %s\n", driver->name);
 
 	sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c
index 7e2e515087f8..f13a8a9af6a1 100644
--- a/drivers/cpufreq/cpufreq_performance.c
+++ b/drivers/cpufreq/cpufreq_performance.c
@@ -15,9 +15,6 @@
 #include <linux/cpufreq.h>
 #include <linux/init.h>
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "performance", msg)
-
 
 static int cpufreq_governor_performance(struct cpufreq_policy *policy,
 					unsigned int event)
@@ -25,7 +22,7 @@ static int cpufreq_governor_performance(struct cpufreq_policy *policy,
 	switch (event) {
 	case CPUFREQ_GOV_START:
 	case CPUFREQ_GOV_LIMITS:
-		dprintk("setting to %u kHz because of event %u\n",
+		pr_debug("setting to %u kHz because of event %u\n",
 						policy->max, event);
 		__cpufreq_driver_target(policy, policy->max,
 						CPUFREQ_RELATION_H);
diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c
index e6db5faf3eb1..4c2eb512f2bc 100644
--- a/drivers/cpufreq/cpufreq_powersave.c
+++ b/drivers/cpufreq/cpufreq_powersave.c
@@ -15,16 +15,13 @@
 #include <linux/cpufreq.h>
 #include <linux/init.h>
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "powersave", msg)
-
 static int cpufreq_governor_powersave(struct cpufreq_policy *policy,
 					unsigned int event)
 {
 	switch (event) {
 	case CPUFREQ_GOV_START:
 	case CPUFREQ_GOV_LIMITS:
-		dprintk("setting to %u kHz because of event %u\n",
+		pr_debug("setting to %u kHz because of event %u\n",
 							policy->min, event);
 		__cpufreq_driver_target(policy, policy->min,
 						CPUFREQ_RELATION_L);
diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c
index 66d2d1d6c80f..f231015904c0 100644
--- a/drivers/cpufreq/cpufreq_userspace.c
+++ b/drivers/cpufreq/cpufreq_userspace.c
@@ -37,9 +37,6 @@ static DEFINE_PER_CPU(unsigned int, cpu_is_managed);
 static DEFINE_MUTEX(userspace_mutex);
 static int cpus_using_userspace_governor;
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "userspace", msg)
-
 /* keep track of frequency transitions */
 static int
 userspace_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
@@ -50,7 +47,7 @@ userspace_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 	if (!per_cpu(cpu_is_managed, freq->cpu))
 		return 0;
 
-	dprintk("saving cpu_cur_freq of cpu %u to be %u kHz\n",
+	pr_debug("saving cpu_cur_freq of cpu %u to be %u kHz\n",
 			freq->cpu, freq->new);
 	per_cpu(cpu_cur_freq, freq->cpu) = freq->new;
 
@@ -73,7 +70,7 @@ static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq)
 {
 	int ret = -EINVAL;
 
-	dprintk("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq);
+	pr_debug("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq);
 
 	mutex_lock(&userspace_mutex);
 	if (!per_cpu(cpu_is_managed, policy->cpu))
@@ -134,7 +131,7 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
 		per_cpu(cpu_max_freq, cpu) = policy->max;
 		per_cpu(cpu_cur_freq, cpu) = policy->cur;
 		per_cpu(cpu_set_freq, cpu) = policy->cur;
-		dprintk("managing cpu %u started "
+		pr_debug("managing cpu %u started "
 			"(%u - %u kHz, currently %u kHz)\n",
 				cpu,
 				per_cpu(cpu_min_freq, cpu),
@@ -156,12 +153,12 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
 		per_cpu(cpu_min_freq, cpu) = 0;
 		per_cpu(cpu_max_freq, cpu) = 0;
 		per_cpu(cpu_set_freq, cpu) = 0;
-		dprintk("managing cpu %u stopped\n", cpu);
+		pr_debug("managing cpu %u stopped\n", cpu);
 		mutex_unlock(&userspace_mutex);
 		break;
 	case CPUFREQ_GOV_LIMITS:
 		mutex_lock(&userspace_mutex);
-		dprintk("limit event for cpu %u: %u - %u kHz, "
+		pr_debug("limit event for cpu %u: %u - %u kHz, "
 			"currently %u kHz, last set to %u kHz\n",
 			cpu, policy->min, policy->max,
 			per_cpu(cpu_cur_freq, cpu),
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index 05432216e224..90431cb92804 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -14,9 +14,6 @@
 #include <linux/init.h>
 #include <linux/cpufreq.h>
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, "freq-table", msg)
-
 /*********************************************************************
  *                     FREQUENCY TABLE HELPERS                       *
  *********************************************************************/
@@ -31,11 +28,11 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
 	for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
 		unsigned int freq = table[i].frequency;
 		if (freq == CPUFREQ_ENTRY_INVALID) {
-			dprintk("table entry %u is invalid, skipping\n", i);
+			pr_debug("table entry %u is invalid, skipping\n", i);
 
 			continue;
 		}
-		dprintk("table entry %u: %u kHz, %u index\n",
+		pr_debug("table entry %u: %u kHz, %u index\n",
 					i, freq, table[i].index);
 		if (freq < min_freq)
 			min_freq = freq;
@@ -61,7 +58,7 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
 	unsigned int i;
 	unsigned int count = 0;
 
-	dprintk("request for verification of policy (%u - %u kHz) for cpu %u\n",
+	pr_debug("request for verification of policy (%u - %u kHz) for cpu %u\n",
 					policy->min, policy->max, policy->cpu);
 
 	if (!cpu_online(policy->cpu))
@@ -86,7 +83,7 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
 	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
 				     policy->cpuinfo.max_freq);
 
-	dprintk("verification lead to (%u - %u kHz) for cpu %u\n",
+	pr_debug("verification lead to (%u - %u kHz) for cpu %u\n",
 				policy->min, policy->max, policy->cpu);
 
 	return 0;
@@ -110,7 +107,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 	};
 	unsigned int i;
 
-	dprintk("request for target %u kHz (relation: %u) for cpu %u\n",
+	pr_debug("request for target %u kHz (relation: %u) for cpu %u\n",
 					target_freq, relation, policy->cpu);
 
 	switch (relation) {
@@ -167,7 +164,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 	} else
 		*index = optimal.index;
 
-	dprintk("target is %u (%u kHz, %u)\n", *index, table[*index].frequency,
+	pr_debug("target is %u (%u kHz, %u)\n", *index, table[*index].frequency,
 		table[*index].index);
 
 	return 0;
@@ -216,14 +213,14 @@ EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_available_freqs);
 void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 				      unsigned int cpu)
 {
-	dprintk("setting show_table for cpu %u to %p\n", cpu, table);
+	pr_debug("setting show_table for cpu %u to %p\n", cpu, table);
 	per_cpu(cpufreq_show_table, cpu) = table;
 }
 EXPORT_SYMBOL_GPL(cpufreq_frequency_table_get_attr);
 
 void cpufreq_frequency_table_put_attr(unsigned int cpu)
 {
-	dprintk("clearing show_table for cpu %u\n", cpu);
+	pr_debug("clearing show_table for cpu %u\n", cpu);
 	per_cpu(cpufreq_show_table, cpu) = NULL;
 }
 EXPORT_SYMBOL_GPL(cpufreq_frequency_table_put_attr);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 9343dd3de858..2845f6e67221 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -397,23 +397,4 @@ void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 void cpufreq_frequency_table_put_attr(unsigned int cpu);
 
 
-/*********************************************************************
- *                     UNIFIED DEBUG HELPERS                         *
- *********************************************************************/
-
-#define CPUFREQ_DEBUG_CORE	1
-#define CPUFREQ_DEBUG_DRIVER	2
-#define CPUFREQ_DEBUG_GOVERNOR	4
-
-#ifdef CONFIG_CPU_FREQ_DEBUG
-
-extern void cpufreq_debug_printk(unsigned int type, const char *prefix, 
-				 const char *fmt, ...);
-
-#else
-
-#define cpufreq_debug_printk(msg...) do { } while(0)
-
-#endif /* CONFIG_CPU_FREQ_DEBUG */
-
 #endif /* _LINUX_CPUFREQ_H */
-- 
cgit v1.2.3-71-gd317


From 335dc3335ff692173bc766b248f7a97f3a23b30a Mon Sep 17 00:00:00 2001
From: Thiago Farina <tfransosi@gmail.com>
Date: Thu, 28 Apr 2011 20:42:53 -0300
Subject: [CPUFREQ] cpufreq.h: Fix some checkpatch.pl coding style issues.

Before:
$ scripts/checkpatch.pl --file --terse include/linux/cpufreq.h
total: 14 errors, 11 warnings, 419 lines checked

After:
$ scripts/checkpatch.pl --file --terse include/linux/cpufreq.h
total: 2 errors, 4 warnings, 422 lines checked

Signed-off-by: Thiago Farina <tfransosi@gmail.com>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 include/linux/cpufreq.h | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 2845f6e67221..11be48e0d168 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -3,7 +3,7 @@
  *
  *  Copyright (C) 2001 Russell King
  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
- *            
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -56,9 +56,9 @@ static inline int cpufreq_unregister_notifier(struct notifier_block *nb,
 #define CPUFREQ_POLICY_POWERSAVE	(1)
 #define CPUFREQ_POLICY_PERFORMANCE	(2)
 
-/* Frequency values here are CPU kHz so that hardware which doesn't run 
- * with some frequencies can complain without having to guess what per 
- * cent / per mille means. 
+/* Frequency values here are CPU kHz so that hardware which doesn't run
+ * with some frequencies can complain without having to guess what per
+ * cent / per mille means.
  * Maximum transition latency is in nanoseconds - if it's unknown,
  * CPUFREQ_ETERNAL shall be used.
  */
@@ -72,13 +72,15 @@ extern struct kobject *cpufreq_global_kobject;
 struct cpufreq_cpuinfo {
 	unsigned int		max_freq;
 	unsigned int		min_freq;
-	unsigned int		transition_latency; /* in 10^(-9) s = nanoseconds */
+
+	/* in 10^(-9) s = nanoseconds */
+	unsigned int		transition_latency;
 };
 
 struct cpufreq_real_policy {
 	unsigned int		min;    /* in kHz */
 	unsigned int		max;    /* in kHz */
-        unsigned int		policy; /* see above */
+	unsigned int		policy; /* see above */
 	struct cpufreq_governor	*governor; /* see below */
 };
 
@@ -94,7 +96,7 @@ struct cpufreq_policy {
 	unsigned int		max;    /* in kHz */
 	unsigned int		cur;    /* in kHz, only needed if cpufreq
 					 * governors are used */
-        unsigned int		policy; /* see above */
+	unsigned int		policy; /* see above */
 	struct cpufreq_governor	*governor; /* see below */
 
 	struct work_struct	update; /* if update_policy() needs to be
@@ -167,11 +169,11 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, u_int mu
 
 struct cpufreq_governor {
 	char	name[CPUFREQ_NAME_LEN];
-	int 	(*governor)	(struct cpufreq_policy *policy,
+	int	(*governor)	(struct cpufreq_policy *policy,
 				 unsigned int event);
 	ssize_t	(*show_setspeed)	(struct cpufreq_policy *policy,
 					 char *buf);
-	int 	(*store_setspeed)	(struct cpufreq_policy *policy,
+	int	(*store_setspeed)	(struct cpufreq_policy *policy,
 					 unsigned int freq);
 	unsigned int max_transition_latency; /* HW must be able to switch to
 			next freq faster than this value in nano secs or we
@@ -180,7 +182,8 @@ struct cpufreq_governor {
 	struct module		*owner;
 };
 
-/* pass a target to the cpufreq driver 
+/*
+ * Pass a target to the cpufreq driver.
  */
 extern int cpufreq_driver_target(struct cpufreq_policy *policy,
 				 unsigned int target_freq,
@@ -237,9 +240,9 @@ struct cpufreq_driver {
 
 /* flags */
 
-#define CPUFREQ_STICKY		0x01	/* the driver isn't removed even if 
+#define CPUFREQ_STICKY		0x01	/* the driver isn't removed even if
 					 * all ->init() calls failed */
-#define CPUFREQ_CONST_LOOPS 	0x02	/* loops_per_jiffy or other kernel
+#define CPUFREQ_CONST_LOOPS	0x02	/* loops_per_jiffy or other kernel
 					 * "constants" aren't affected by
 					 * frequency transitions */
 #define CPUFREQ_PM_NO_WARN	0x04	/* don't warn on suspend/resume speed
@@ -252,7 +255,7 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state);
 
 
-static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned int min, unsigned int max) 
+static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned int min, unsigned int max)
 {
 	if (policy->min < min)
 		policy->min = min;
@@ -386,12 +389,12 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 /* the following 3 funtions are for cpufreq core use only */
 struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu);
 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu);
-void   cpufreq_cpu_put (struct cpufreq_policy *data);
+void   cpufreq_cpu_put(struct cpufreq_policy *data);
 
 /* the following are really really optional */
 extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
 
-void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table, 
+void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 				      unsigned int cpu);
 
 void cpufreq_frequency_table_put_attr(unsigned int cpu);
-- 
cgit v1.2.3-71-gd317


From 9a1b9496cd2b013f74885218947fa7120d53e74c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 4 May 2011 12:18:54 -0700
Subject: ipv4: Pass explicit saddr/daddr args to ipmr_get_route().

This eliminates the need to use rt->rt_{src,dst}.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h |  1 +
 net/ipv4/ipmr.c        | 16 ++++++++--------
 net/ipv4/route.c       |  4 +++-
 3 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index b21d567692b2..46caaf44339d 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -244,6 +244,7 @@ struct mfc_cache {
 #ifdef __KERNEL__
 struct rtmsg;
 extern int ipmr_get_route(struct net *net, struct sk_buff *skb,
+			  __be32 saddr, __be32 daddr,
 			  struct rtmsg *rtm, int nowait);
 #endif
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 86033b7a05ba..30a7763c400e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2041,20 +2041,20 @@ rtattr_failure:
 	return -EMSGSIZE;
 }
 
-int ipmr_get_route(struct net *net,
-		   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
+int ipmr_get_route(struct net *net, struct sk_buff *skb,
+		   __be32 saddr, __be32 daddr,
+		   struct rtmsg *rtm, int nowait)
 {
-	int err;
-	struct mr_table *mrt;
 	struct mfc_cache *cache;
-	struct rtable *rt = skb_rtable(skb);
+	struct mr_table *mrt;
+	int err;
 
 	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
 	if (mrt == NULL)
 		return -ENOENT;
 
 	rcu_read_lock();
-	cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
+	cache = ipmr_cache_find(mrt, saddr, daddr);
 
 	if (cache == NULL) {
 		struct sk_buff *skb2;
@@ -2087,8 +2087,8 @@ int ipmr_get_route(struct net *net,
 		skb_reset_network_header(skb2);
 		iph = ip_hdr(skb2);
 		iph->ihl = sizeof(struct iphdr) >> 2;
-		iph->saddr = rt->rt_src;
-		iph->daddr = rt->rt_dst;
+		iph->saddr = saddr;
+		iph->daddr = daddr;
 		iph->version = 0;
 		err = ipmr_cache_unresolved(mrt, vif, skb2);
 		read_unlock(&mrt_lock);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3bc685454b5b..6a83840b16af 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2857,7 +2857,9 @@ static int rt_fill_info(struct net *net,
 
 		if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
 		    IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
-			int err = ipmr_get_route(net, skb, r, nowait);
+			int err = ipmr_get_route(net, skb,
+						 rt->rt_src, rt->rt_dst,
+						 r, nowait);
 			if (err <= 0) {
 				if (!nowait) {
 					if (err == 0)
-- 
cgit v1.2.3-71-gd317


From 1650629d1800bf05ad775f974e931ca2fa03b0ff Mon Sep 17 00:00:00 2001
From: Kurt Van Dijck <kurt.van.dijck@eia.be>
Date: Tue, 3 May 2011 18:40:57 +0000
Subject: can: make struct can_proto const

commit 53914b67993c724cec585863755c9ebc8446e83b had the
same message. That commit did put everything in place but
did not make can_proto const itself.

Signed-off-by: Kurt Van Dijck <kurt.van.dijck@eia.be>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/can/core.h |  4 ++--
 net/can/af_can.c         | 12 ++++++------
 net/can/bcm.c            |  2 +-
 net/can/raw.c            |  2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index 6f70a6d3a16e..5ce6b5d62ecc 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -44,8 +44,8 @@ struct can_proto {
 
 /* function prototypes for the CAN networklayer core (af_can.c) */
 
-extern int  can_proto_register(struct can_proto *cp);
-extern void can_proto_unregister(struct can_proto *cp);
+extern int  can_proto_register(const struct can_proto *cp);
+extern void can_proto_unregister(const struct can_proto *cp);
 
 extern int  can_rx_register(struct net_device *dev, canid_t can_id,
 			    canid_t mask,
diff --git a/net/can/af_can.c b/net/can/af_can.c
index a8dcaa49675a..5b52762b9f20 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -84,7 +84,7 @@ static DEFINE_SPINLOCK(can_rcvlists_lock);
 static struct kmem_cache *rcv_cache __read_mostly;
 
 /* table of registered CAN protocols */
-static struct can_proto *proto_tab[CAN_NPROTO] __read_mostly;
+static const struct can_proto *proto_tab[CAN_NPROTO] __read_mostly;
 static DEFINE_MUTEX(proto_tab_lock);
 
 struct timer_list can_stattimer;   /* timer for statistics update */
@@ -115,9 +115,9 @@ static void can_sock_destruct(struct sock *sk)
 	skb_queue_purge(&sk->sk_receive_queue);
 }
 
-static struct can_proto *can_try_module_get(int protocol)
+static const struct can_proto *can_try_module_get(int protocol)
 {
-	struct can_proto *cp;
+	const struct can_proto *cp;
 
 	rcu_read_lock();
 	cp = rcu_dereference(proto_tab[protocol]);
@@ -132,7 +132,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
 		      int kern)
 {
 	struct sock *sk;
-	struct can_proto *cp;
+	const struct can_proto *cp;
 	int err = 0;
 
 	sock->state = SS_UNCONNECTED;
@@ -691,7 +691,7 @@ drop:
  *  -EBUSY  protocol already in use
  *  -ENOBUF if proto_register() fails
  */
-int can_proto_register(struct can_proto *cp)
+int can_proto_register(const struct can_proto *cp)
 {
 	int proto = cp->protocol;
 	int err = 0;
@@ -728,7 +728,7 @@ EXPORT_SYMBOL(can_proto_register);
  * can_proto_unregister - unregister CAN transport protocol
  * @cp: pointer to CAN protocol structure
  */
-void can_proto_unregister(struct can_proto *cp)
+void can_proto_unregister(const struct can_proto *cp)
 {
 	int proto = cp->protocol;
 
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 8a6a05e7c3c8..cced806098a9 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1601,7 +1601,7 @@ static struct proto bcm_proto __read_mostly = {
 	.init       = bcm_init,
 };
 
-static struct can_proto bcm_can_proto __read_mostly = {
+static const struct can_proto bcm_can_proto = {
 	.type       = SOCK_DGRAM,
 	.protocol   = CAN_BCM,
 	.ops        = &bcm_ops,
diff --git a/net/can/raw.c b/net/can/raw.c
index 0eb39a7fdf64..dea99a6e596c 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -774,7 +774,7 @@ static struct proto raw_proto __read_mostly = {
 	.init       = raw_init,
 };
 
-static struct can_proto raw_can_proto __read_mostly = {
+static const struct can_proto raw_can_proto = {
 	.type       = SOCK_RAW,
 	.protocol   = CAN_RAW,
 	.ops        = &raw_ops,
-- 
cgit v1.2.3-71-gd317


From 30106b8ce2cc2243514116d6f29086e6deecc754 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 4 May 2011 15:38:19 +0200
Subject: slub: Fix the lockless code on 32-bit platforms with no 64-bit
 cmpxchg

The SLUB allocator use of the cmpxchg_double logic was wrong: it
actually needs the irq-safe one.

That happens automatically when we use the native unlocked 'cmpxchg8b'
instruction, but when compiling the kernel for older x86 CPUs that do
not support that instruction, we fall back to the generic emulation
code.

And if you don't specify that you want the irq-safe version, the generic
code ends up just open-coding the cmpxchg8b equivalent without any
protection against interrupts or preemption.  Which definitely doesn't
work for SLUB.

This was reported by Werner Landgraf <w.landgraf@ru.ru>, who saw
instability with his distro-kernel that was compiled to support pretty
much everything under the sun.  Most big Linux distributions tend to
compile for PPro and later, and would never have noticed this problem.

This also fixes the prototypes for the irqsafe cmpxchg_double functions
to use 'bool' like they should.

[ Btw, that whole "generic code defaults to no protection" design just
  sounds stupid - if the code needs no protection, there is no reason to
  use "cmpxchg_double" to begin with.  So we should probably just remove
  the unprotected version entirely as pointless.   - Linus ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reported-and-tested-by: werner <w.landgraf@ru.ru>
Acked-and-tested-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1105041539050.3005@ionos
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu.h | 2 +-
 mm/slub.c              | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 3a5c4449fd36..8b97308e65df 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -948,7 +948,7 @@ do {									\
 	irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
 # endif
 # define irqsafe_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	__pcpu_double_call_return_int(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
+	__pcpu_double_call_return_bool(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
 #endif
 
 #endif /* __LINUX_PERCPU_H */
diff --git a/mm/slub.c b/mm/slub.c
index 94d2a33a866e..9d2e5e46bf09 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1940,7 +1940,7 @@ redo:
 		 * Since this is without lock semantics the protection is only against
 		 * code executing on this cpu *not* from access by other cpus.
 		 */
-		if (unlikely(!this_cpu_cmpxchg_double(
+		if (unlikely(!irqsafe_cpu_cmpxchg_double(
 				s->cpu_slab->freelist, s->cpu_slab->tid,
 				object, tid,
 				get_freepointer(s, object), next_tid(tid)))) {
@@ -2145,7 +2145,7 @@ redo:
 		set_freepointer(s, object, c->freelist);
 
 #ifdef CONFIG_CMPXCHG_LOCAL
-		if (unlikely(!this_cpu_cmpxchg_double(
+		if (unlikely(!irqsafe_cpu_cmpxchg_double(
 				s->cpu_slab->freelist, s->cpu_slab->tid,
 				c->freelist, tid,
 				object, next_tid(tid)))) {
-- 
cgit v1.2.3-71-gd317


From 228e548e602061b08ee8e8966f567c12aa079682 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 2 May 2011 20:21:35 +0000
Subject: net: Add sendmmsg socket system call

This patch adds a multiple message send syscall and is the send
version of the existing recvmmsg syscall. This is heavily
based on the patch by Arnaldo that added recvmmsg.

I wrote a microbenchmark to test the performance gains of using
this new syscall:

http://ozlabs.org/~anton/junkcode/sendmmsg_test.c

The test was run on a ppc64 box with a 10 Gbit network card. The
benchmark can send both UDP and RAW ethernet packets.

64B UDP

batch   pkts/sec
1       804570
2       872800 (+ 8 %)
4       916556 (+14 %)
8       939712 (+17 %)
16      952688 (+18 %)
32      956448 (+19 %)
64      964800 (+20 %)

64B raw socket

batch   pkts/sec
1       1201449
2       1350028 (+12 %)
4       1461416 (+22 %)
8       1513080 (+26 %)
16      1541216 (+28 %)
32      1553440 (+29 %)
64      1557888 (+30 %)

We see a 20% improvement in throughput on UDP send and 30%
on raw socket send.

[ Add sparc syscall entries. -DaveM ]

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/powerpc/include/asm/systbl.h  |   1 +
 arch/powerpc/include/asm/unistd.h  |   3 +-
 arch/sparc/include/asm/unistd.h    |   3 +-
 arch/sparc/kernel/systbls_32.S     |   2 +-
 arch/sparc/kernel/systbls_64.S     |   4 +-
 arch/x86/ia32/ia32entry.S          |   1 +
 arch/x86/include/asm/unistd_32.h   |   3 +-
 arch/x86/include/asm/unistd_64.h   |   2 +
 arch/x86/kernel/syscall_table_32.S |   1 +
 include/linux/net.h                |   1 +
 include/linux/socket.h             |   2 +
 include/linux/syscalls.h           |   2 +
 include/net/compat.h               |   2 +
 kernel/sys_ni.c                    |   2 +
 net/compat.c                       |  16 ++-
 net/socket.c                       | 199 +++++++++++++++++++++++++++++--------
 16 files changed, 192 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 60f64b132bd4..8489d372077f 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -352,3 +352,4 @@ SYSCALL_SPU(name_to_handle_at)
 COMPAT_SYS_SPU(open_by_handle_at)
 COMPAT_SYS_SPU(clock_adjtime)
 SYSCALL_SPU(syncfs)
+COMPAT_SYS_SPU(sendmmsg)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 3c215648ce6d..6d23c8193caa 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -371,10 +371,11 @@
 #define __NR_open_by_handle_at	346
 #define __NR_clock_adjtime	347
 #define __NR_syncfs		348
+#define __NR_sendmmsg		349
 
 #ifdef __KERNEL__
 
-#define __NR_syscalls		349
+#define __NR_syscalls		350
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 9d897b6db983..c5387ed0add8 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -404,8 +404,9 @@
 #define __NR_open_by_handle_at	333
 #define __NR_clock_adjtime	334
 #define __NR_syncfs		335
+#define __NR_sendmmsg		336
 
-#define NR_syscalls		336
+#define NR_syscalls		337
 
 #ifdef __32bit_syscall_numbers__
 /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 47ac73c32e88..332c83ff7701 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -84,4 +84,4 @@ sys_call_table:
 /*320*/	.long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
 /*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/	.long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
-/*335*/	.long sys_syncfs
+/*335*/	.long sys_syncfs, sys_sendmmsg
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 4f3170c1ef47..43887ca0be0e 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -85,7 +85,7 @@ sys_call_table32:
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
 	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
 /*330*/	.word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
-	.word sys_syncfs
+	.word sys_syncfs, compat_sys_sendmmsg
 
 #endif /* CONFIG_COMPAT */
 
@@ -162,4 +162,4 @@ sys_call_table:
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
 	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/	.word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
-	.word sys_syncfs
+	.word sys_syncfs, sys_sendmmsg
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 849a9d23c71d..95f5826be458 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -848,4 +848,5 @@ ia32_sys_call_table:
 	.quad compat_sys_open_by_handle_at
 	.quad compat_sys_clock_adjtime
 	.quad sys_syncfs
+	.quad compat_sys_sendmmsg	/* 345 */
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index a755ef5e5977..fb6a625c99bf 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -350,10 +350,11 @@
 #define __NR_open_by_handle_at  342
 #define __NR_clock_adjtime	343
 #define __NR_syncfs             344
+#define __NR_sendmmsg		345
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 345
+#define NR_syscalls 346
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 160fa76bd578..79f90eb15aad 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -677,6 +677,8 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
 __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
 #define __NR_syncfs                             306
 __SYSCALL(__NR_syncfs, sys_syncfs)
+#define __NR_sendmmsg				307
+__SYSCALL(__NR_sendmmsg, sys_sendmmsg)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index abce34d5c79d..32cbffb0c494 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -344,3 +344,4 @@ ENTRY(sys_call_table)
 	.long sys_open_by_handle_at
 	.long sys_clock_adjtime
 	.long sys_syncfs
+	.long sys_sendmmsg		/* 345 */
diff --git a/include/linux/net.h b/include/linux/net.h
index 94de83c0f877..1da55e9b6f01 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -42,6 +42,7 @@
 #define SYS_RECVMSG	17		/* sys_recvmsg(2)		*/
 #define SYS_ACCEPT4	18		/* sys_accept4(2)		*/
 #define SYS_RECVMMSG	19		/* sys_recvmmsg(2)		*/
+#define SYS_SENDMMSG	20		/* sys_sendmmsg(2)		*/
 
 typedef enum {
 	SS_FREE = 0,			/* not allocated		*/
diff --git a/include/linux/socket.h b/include/linux/socket.h
index d2b5e982f079..4ef98e422fde 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -333,5 +333,7 @@ struct timespec;
 
 extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 			  unsigned int flags, struct timespec *timeout);
+extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
+			  unsigned int vlen, unsigned int flags);
 #endif /* not kernel and not glibc */
 #endif /* _LINUX_SOCKET_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 83ecc1749ef6..ab71447d0c5a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -610,6 +610,8 @@ asmlinkage long sys_send(int, void __user *, size_t, unsigned);
 asmlinkage long sys_sendto(int, void __user *, size_t, unsigned,
 				struct sockaddr __user *, int);
 asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags);
+asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg,
+			     unsigned int vlen, unsigned flags);
 asmlinkage long sys_recv(int, void __user *, size_t, unsigned);
 asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned,
 				struct sockaddr __user *, int __user *);
diff --git a/include/net/compat.h b/include/net/compat.h
index 28d5428ec6a2..9ee75edcc295 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -43,6 +43,8 @@ extern int compat_sock_get_timestampns(struct sock *, struct timespec __user *);
 extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *);
 extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int);
 extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned);
+extern asmlinkage long compat_sys_sendmmsg(int, struct compat_mmsghdr __user *,
+					   unsigned, unsigned);
 extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned);
 extern asmlinkage long compat_sys_recvmmsg(int, struct compat_mmsghdr __user *,
 					   unsigned, unsigned,
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 25cc41cd8f33..97e966f171c6 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -46,7 +46,9 @@ cond_syscall(sys_getsockopt);
 cond_syscall(compat_sys_getsockopt);
 cond_syscall(sys_shutdown);
 cond_syscall(sys_sendmsg);
+cond_syscall(sys_sendmmsg);
 cond_syscall(compat_sys_sendmsg);
+cond_syscall(compat_sys_sendmmsg);
 cond_syscall(sys_recvmsg);
 cond_syscall(sys_recvmmsg);
 cond_syscall(compat_sys_recvmsg);
diff --git a/net/compat.c b/net/compat.c
index 3649d5895361..c578d9382e19 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -722,11 +722,11 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
 
 /* Argument list sizes for compat_sys_socketcall */
 #define AL(x) ((x) * sizeof(u32))
-static unsigned char nas[20] = {
+static unsigned char nas[21] = {
 	AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
 	AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
 	AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
-	AL(4), AL(5)
+	AL(4), AL(5), AL(4)
 };
 #undef AL
 
@@ -735,6 +735,13 @@ asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, uns
 	return sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
 }
 
+asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
+				    unsigned vlen, unsigned int flags)
+{
+	return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
+			      flags | MSG_CMSG_COMPAT);
+}
+
 asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags)
 {
 	return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
@@ -780,7 +787,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 	u32 a[6];
 	u32 a0, a1;
 
-	if (call < SYS_SOCKET || call > SYS_RECVMMSG)
+	if (call < SYS_SOCKET || call > SYS_SENDMMSG)
 		return -EINVAL;
 	if (copy_from_user(a, args, nas[call]))
 		return -EFAULT;
@@ -839,6 +846,9 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 	case SYS_SENDMSG:
 		ret = compat_sys_sendmsg(a0, compat_ptr(a1), a[2]);
 		break;
+	case SYS_SENDMMSG:
+		ret = compat_sys_sendmmsg(a0, compat_ptr(a1), a[2], a[3]);
+		break;
 	case SYS_RECVMSG:
 		ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
 		break;
diff --git a/net/socket.c b/net/socket.c
index d25f5a9d6fa2..ed50255143d5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -551,11 +551,10 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
 }
 EXPORT_SYMBOL(sock_tx_timestamp);
 
-static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
-				 struct msghdr *msg, size_t size)
+static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
+				       struct msghdr *msg, size_t size)
 {
 	struct sock_iocb *si = kiocb_to_siocb(iocb);
-	int err;
 
 	sock_update_classid(sock->sk);
 
@@ -564,13 +563,17 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	si->msg = msg;
 	si->size = size;
 
-	err = security_socket_sendmsg(sock, msg, size);
-	if (err)
-		return err;
-
 	return sock->ops->sendmsg(iocb, sock, msg, size);
 }
 
+static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
+				 struct msghdr *msg, size_t size)
+{
+	int err = security_socket_sendmsg(sock, msg, size);
+
+	return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
+}
+
 int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 {
 	struct kiocb iocb;
@@ -586,6 +589,20 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 }
 EXPORT_SYMBOL(sock_sendmsg);
 
+int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
+{
+	struct kiocb iocb;
+	struct sock_iocb siocb;
+	int ret;
+
+	init_sync_kiocb(&iocb, NULL);
+	iocb.private = &siocb;
+	ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&iocb);
+	return ret;
+}
+
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size)
 {
@@ -1863,57 +1880,47 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how)
 #define COMPAT_NAMELEN(msg)	COMPAT_MSG(msg, msg_namelen)
 #define COMPAT_FLAGS(msg)	COMPAT_MSG(msg, msg_flags)
 
-/*
- *	BSD sendmsg interface
- */
-
-SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
+static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
+			 struct msghdr *msg_sys, unsigned flags, int nosec)
 {
 	struct compat_msghdr __user *msg_compat =
 	    (struct compat_msghdr __user *)msg;
-	struct socket *sock;
 	struct sockaddr_storage address;
 	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
 	unsigned char ctl[sizeof(struct cmsghdr) + 20]
 	    __attribute__ ((aligned(sizeof(__kernel_size_t))));
 	/* 20 is size of ipv6_pktinfo */
 	unsigned char *ctl_buf = ctl;
-	struct msghdr msg_sys;
 	int err, ctl_len, iov_size, total_len;
-	int fput_needed;
 
 	err = -EFAULT;
 	if (MSG_CMSG_COMPAT & flags) {
-		if (get_compat_msghdr(&msg_sys, msg_compat))
+		if (get_compat_msghdr(msg_sys, msg_compat))
 			return -EFAULT;
-	} else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+	} else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
 		return -EFAULT;
 
-	sock = sockfd_lookup_light(fd, &err, &fput_needed);
-	if (!sock)
-		goto out;
-
 	/* do not move before msg_sys is valid */
 	err = -EMSGSIZE;
-	if (msg_sys.msg_iovlen > UIO_MAXIOV)
-		goto out_put;
+	if (msg_sys->msg_iovlen > UIO_MAXIOV)
+		goto out;
 
 	/* Check whether to allocate the iovec area */
 	err = -ENOMEM;
-	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
-	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
+	iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
+	if (msg_sys->msg_iovlen > UIO_FASTIOV) {
 		iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
 		if (!iov)
-			goto out_put;
+			goto out;
 	}
 
 	/* This will also move the address data into kernel space */
 	if (MSG_CMSG_COMPAT & flags) {
-		err = verify_compat_iovec(&msg_sys, iov,
+		err = verify_compat_iovec(msg_sys, iov,
 					  (struct sockaddr *)&address,
 					  VERIFY_READ);
 	} else
-		err = verify_iovec(&msg_sys, iov,
+		err = verify_iovec(msg_sys, iov,
 				   (struct sockaddr *)&address,
 				   VERIFY_READ);
 	if (err < 0)
@@ -1922,17 +1929,17 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
 
 	err = -ENOBUFS;
 
-	if (msg_sys.msg_controllen > INT_MAX)
+	if (msg_sys->msg_controllen > INT_MAX)
 		goto out_freeiov;
-	ctl_len = msg_sys.msg_controllen;
+	ctl_len = msg_sys->msg_controllen;
 	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
 		err =
-		    cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
+		    cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
 						     sizeof(ctl));
 		if (err)
 			goto out_freeiov;
-		ctl_buf = msg_sys.msg_control;
-		ctl_len = msg_sys.msg_controllen;
+		ctl_buf = msg_sys->msg_control;
+		ctl_len = msg_sys->msg_controllen;
 	} else if (ctl_len) {
 		if (ctl_len > sizeof(ctl)) {
 			ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
@@ -1941,21 +1948,22 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
 		}
 		err = -EFAULT;
 		/*
-		 * Careful! Before this, msg_sys.msg_control contains a user pointer.
+		 * Careful! Before this, msg_sys->msg_control contains a user pointer.
 		 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
 		 * checking falls down on this.
 		 */
 		if (copy_from_user(ctl_buf,
-				   (void __user __force *)msg_sys.msg_control,
+				   (void __user __force *)msg_sys->msg_control,
 				   ctl_len))
 			goto out_freectl;
-		msg_sys.msg_control = ctl_buf;
+		msg_sys->msg_control = ctl_buf;
 	}
-	msg_sys.msg_flags = flags;
+	msg_sys->msg_flags = flags;
 
 	if (sock->file->f_flags & O_NONBLOCK)
-		msg_sys.msg_flags |= MSG_DONTWAIT;
-	err = sock_sendmsg(sock, &msg_sys, total_len);
+		msg_sys->msg_flags |= MSG_DONTWAIT;
+	err = (nosec ? sock_sendmsg_nosec : sock_sendmsg)(sock, msg_sys,
+							  total_len);
 
 out_freectl:
 	if (ctl_buf != ctl)
@@ -1963,12 +1971,114 @@ out_freectl:
 out_freeiov:
 	if (iov != iovstack)
 		sock_kfree_s(sock->sk, iov, iov_size);
-out_put:
+out:
+	return err;
+}
+
+/*
+ *	BSD sendmsg interface
+ */
+
+SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
+{
+	int fput_needed, err;
+	struct msghdr msg_sys;
+	struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
+
+	if (!sock)
+		goto out;
+
+	err = __sys_sendmsg(sock, msg, &msg_sys, flags, 0);
+
 	fput_light(sock->file, fput_needed);
 out:
 	return err;
 }
 
+/*
+ *	Linux sendmmsg interface
+ */
+
+int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
+		   unsigned int flags)
+{
+	int fput_needed, err, datagrams;
+	struct socket *sock;
+	struct mmsghdr __user *entry;
+	struct compat_mmsghdr __user *compat_entry;
+	struct msghdr msg_sys;
+
+	datagrams = 0;
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (!sock)
+		return err;
+
+	err = sock_error(sock->sk);
+	if (err)
+		goto out_put;
+
+	entry = mmsg;
+	compat_entry = (struct compat_mmsghdr __user *)mmsg;
+
+	while (datagrams < vlen) {
+		/*
+		 * No need to ask LSM for more than the first datagram.
+		 */
+		if (MSG_CMSG_COMPAT & flags) {
+			err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
+					    &msg_sys, flags, datagrams);
+			if (err < 0)
+				break;
+			err = __put_user(err, &compat_entry->msg_len);
+			++compat_entry;
+		} else {
+			err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
+					    &msg_sys, flags, datagrams);
+			if (err < 0)
+				break;
+			err = put_user(err, &entry->msg_len);
+			++entry;
+		}
+
+		if (err)
+			break;
+		++datagrams;
+	}
+
+out_put:
+	fput_light(sock->file, fput_needed);
+
+	if (err == 0)
+		return datagrams;
+
+	if (datagrams != 0) {
+		/*
+		 * We may send less entries than requested (vlen) if the
+		 * sock is non blocking...
+		 */
+		if (err != -EAGAIN) {
+			/*
+			 * ... or if sendmsg returns an error after we
+			 * send some datagrams, where we record the
+			 * error to return on the next call or if the
+			 * app asks about it using getsockopt(SO_ERROR).
+			 */
+			sock->sk->sk_err = -err;
+		}
+
+		return datagrams;
+	}
+
+	return err;
+}
+
+SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
+		unsigned int, vlen, unsigned int, flags)
+{
+	return __sys_sendmmsg(fd, mmsg, vlen, flags);
+}
+
 static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
 			 struct msghdr *msg_sys, unsigned flags, int nosec)
 {
@@ -2214,11 +2324,11 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
 #ifdef __ARCH_WANT_SYS_SOCKETCALL
 /* Argument list sizes for sys_socketcall */
 #define AL(x) ((x) * sizeof(unsigned long))
-static const unsigned char nargs[20] = {
+static const unsigned char nargs[21] = {
 	AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
 	AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
 	AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
-	AL(4), AL(5)
+	AL(4), AL(5), AL(4)
 };
 
 #undef AL
@@ -2238,7 +2348,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
 	int err;
 	unsigned int len;
 
-	if (call < 1 || call > SYS_RECVMMSG)
+	if (call < 1 || call > SYS_SENDMMSG)
 		return -EINVAL;
 
 	len = nargs[call];
@@ -2313,6 +2423,9 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
 	case SYS_SENDMSG:
 		err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
 		break;
+	case SYS_SENDMMSG:
+		err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
+		break;
 	case SYS_RECVMSG:
 		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
 		break;
-- 
cgit v1.2.3-71-gd317


From ff1b6e69ad4f31fb3c9c6da2665655f2e798dd70 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 4 May 2011 15:37:28 +0200
Subject: nl80211/cfg80211: WoWLAN support

This is based on (but now quite far from) the
original work from Luis and Eliad. Add support
for configuring WoWLAN triggers, and getting
the configuration out again. Changes from the
original patchset are too numerous to list,
but one important change needs highlighting:
the suspend() callback is passed NULL for the
trigger configuration if userspace has not
configured WoWLAN at all.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  99 ++++++++++++++++++++
 include/net/cfg80211.h  |  74 ++++++++++++++-
 net/mac80211/cfg.c      |   3 +-
 net/wireless/core.c     |   8 ++
 net/wireless/core.h     |  14 +++
 net/wireless/nl80211.c  | 243 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/sysfs.c    |   2 +-
 7 files changed, 439 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index be8df57b789d..a75dea9c416e 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -420,6 +420,14 @@
  *      new station with the AUTHENTICATED flag unset and maybe change it later
  *      depending on the authentication result.
  *
+ * @NL80211_CMD_GET_WOWLAN: get Wake-on-Wireless-LAN (WoWLAN) settings.
+ * @NL80211_CMD_SET_WOWLAN: set Wake-on-Wireless-LAN (WoWLAN) settings.
+ *	Since wireless is more complex than wired ethernet, it supports
+ *	various triggers. These triggers can be configured through this
+ *	command with the %NL80211_ATTR_WOWLAN_TRIGGERS attribute. For
+ *	more background information, see
+ *	http://wireless.kernel.org/en/users/Documentation/WoWLAN.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -534,6 +542,9 @@ enum nl80211_commands {
 
 	NL80211_CMD_NEW_PEER_CANDIDATE,
 
+	NL80211_CMD_GET_WOWLAN,
+	NL80211_CMD_SET_WOWLAN,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -903,6 +914,13 @@ enum nl80211_commands {
  *	allows auth frames in a mesh to be passed to userspace for processing via
  *	the @NL80211_MESH_SETUP_USERSPACE_AUTH flag.
  *
+ * @NL80211_ATTR_WOWLAN_SUPPORTED: indicates, as part of the wiphy capabilities,
+ *	the supported WoWLAN triggers
+ * @NL80211_ATTR_WOWLAN_TRIGGERS: used by %NL80211_CMD_SET_WOWLAN to
+ *	indicate which WoW triggers should be enabled. This is also
+ *	used by %NL80211_CMD_GET_WOWLAN to get the currently enabled WoWLAN
+ *	triggers.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1092,6 +1110,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_SUPPORT_MESH_AUTH,
 
+	NL80211_ATTR_WOWLAN_TRIGGERS,
+	NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -2061,4 +2082,82 @@ enum nl80211_tx_power_setting {
 	NL80211_TX_POWER_FIXED,
 };
 
+/**
+ * enum nl80211_wowlan_packet_pattern_attr - WoWLAN packet pattern attribute
+ * @__NL80211_WOWLAN_PKTPAT_INVALID: invalid number for nested attribute
+ * @NL80211_WOWLAN_PKTPAT_PATTERN: the pattern, values where the mask has
+ *	a zero bit are ignored
+ * @NL80211_WOWLAN_PKTPAT_MASK: pattern mask, must be long enough to have
+ *	a bit for each byte in the pattern. The lowest-order bit corresponds
+ *	to the first byte of the pattern, but the bytes of the pattern are
+ *	in a little-endian-like format, i.e. the 9th byte of the pattern
+ *	corresponds to the lowest-order bit in the second byte of the mask.
+ *	For example: The match 00:xx:00:00:xx:00:00:00:00:xx:xx:xx (where
+ *	xx indicates "don't care") would be represented by a pattern of
+ *	twelve zero bytes, and a mask of "0xed,0x07".
+ *	Note that the pattern matching is done as though frames were not
+ *	802.11 frames but 802.3 frames, i.e. the frame is fully unpacked
+ *	first (including SNAP header unpacking) and then matched.
+ * @NUM_NL80211_WOWLAN_PKTPAT: number of attributes
+ * @MAX_NL80211_WOWLAN_PKTPAT: max attribute number
+ */
+enum nl80211_wowlan_packet_pattern_attr {
+	__NL80211_WOWLAN_PKTPAT_INVALID,
+	NL80211_WOWLAN_PKTPAT_MASK,
+	NL80211_WOWLAN_PKTPAT_PATTERN,
+
+	NUM_NL80211_WOWLAN_PKTPAT,
+	MAX_NL80211_WOWLAN_PKTPAT = NUM_NL80211_WOWLAN_PKTPAT - 1,
+};
+
+/**
+ * struct nl80211_wowlan_pattern_support - pattern support information
+ * @max_patterns: maximum number of patterns supported
+ * @min_pattern_len: minimum length of each pattern
+ * @max_pattern_len: maximum length of each pattern
+ *
+ * This struct is carried in %NL80211_WOWLAN_TRIG_PKT_PATTERN when
+ * that is part of %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED in the
+ * capability information given by the kernel to userspace.
+ */
+struct nl80211_wowlan_pattern_support {
+	__u32 max_patterns;
+	__u32 min_pattern_len;
+	__u32 max_pattern_len;
+} __attribute__((packed));
+
+/**
+ * enum nl80211_wowlan_triggers - WoWLAN trigger definitions
+ * @__NL80211_WOWLAN_TRIG_INVALID: invalid number for nested attributes
+ * @NL80211_WOWLAN_TRIG_ANY: wake up on any activity, do not really put
+ *	the chip into a special state -- works best with chips that have
+ *	support for low-power operation already (flag)
+ * @NL80211_WOWLAN_TRIG_DISCONNECT: wake up on disconnect, the way disconnect
+ *	is detected is implementation-specific (flag)
+ * @NL80211_WOWLAN_TRIG_MAGIC_PKT: wake up on magic packet (6x 0xff, followed
+ *	by 16 repetitions of MAC addr, anywhere in payload) (flag)
+ * @NL80211_WOWLAN_TRIG_PKT_PATTERN: wake up on the specified packet patterns
+ *	which are passed in an array of nested attributes, each nested attribute
+ *	defining a with attributes from &struct nl80211_wowlan_trig_pkt_pattern.
+ *	Each pattern defines a wakeup packet. The matching is done on the MSDU,
+ *	i.e. as though the packet was an 802.3 packet, so the pattern matching
+ *	is done after the packet is converted to the MSDU.
+ *
+ *	In %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED, it is a binary attribute
+ *	carrying a &struct nl80211_wowlan_pattern_support.
+ * @NUM_NL80211_WOWLAN_TRIG: number of wake on wireless triggers
+ * @MAX_NL80211_WOWLAN_TRIG: highest wowlan trigger attribute number
+ */
+enum nl80211_wowlan_triggers {
+	__NL80211_WOWLAN_TRIG_INVALID,
+	NL80211_WOWLAN_TRIG_ANY,
+	NL80211_WOWLAN_TRIG_DISCONNECT,
+	NL80211_WOWLAN_TRIG_MAGIC_PKT,
+	NL80211_WOWLAN_TRIG_PKT_PATTERN,
+
+	/* keep last */
+	NUM_NL80211_WOWLAN_TRIG,
+	MAX_NL80211_WOWLAN_TRIG = NUM_NL80211_WOWLAN_TRIG - 1
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4932dfcb72b4..0920daf36807 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1087,6 +1087,38 @@ struct cfg80211_pmksa {
 	u8 *pmkid;
 };
 
+/**
+ * struct cfg80211_wowlan_trig_pkt_pattern - packet pattern
+ * @mask: bitmask where to match pattern and where to ignore bytes,
+ *	one bit per byte, in same format as nl80211
+ * @pattern: bytes to match where bitmask is 1
+ * @pattern_len: length of pattern (in bytes)
+ *
+ * Internal note: @mask and @pattern are allocated in one chunk of
+ * memory, free @mask only!
+ */
+struct cfg80211_wowlan_trig_pkt_pattern {
+	u8 *mask, *pattern;
+	int pattern_len;
+};
+
+/**
+ * struct cfg80211_wowlan - Wake on Wireless-LAN support info
+ *
+ * This structure defines the enabled WoWLAN triggers for the device.
+ * @any: wake up on any activity -- special trigger if device continues
+ *	operating as normal during suspend
+ * @disconnect: wake up if getting disconnected
+ * @magic_pkt: wake up on receiving magic packet
+ * @patterns: wake up on receiving packet matching a pattern
+ * @n_patterns: number of patterns
+ */
+struct cfg80211_wowlan {
+	bool any, disconnect, magic_pkt;
+	struct cfg80211_wowlan_trig_pkt_pattern *patterns;
+	int n_patterns;
+};
+
 /**
  * struct cfg80211_ops - backend description for wireless configuration
  *
@@ -1100,7 +1132,9 @@ struct cfg80211_pmksa {
  * wireless extensions but this is subject to reevaluation as soon as this
  * code is used more widely and we have a first user without wext.
  *
- * @suspend: wiphy device needs to be suspended
+ * @suspend: wiphy device needs to be suspended. The variable @wow will
+ *	be %NULL or contain the enabled Wake-on-Wireless triggers that are
+ *	configured for the device.
  * @resume: wiphy device needs to be resumed
  *
  * @add_virtual_intf: create a new virtual interface with the given name,
@@ -1244,7 +1278,7 @@ struct cfg80211_pmksa {
  * @get_ringparam: Get tx and rx ring current and maximum sizes.
  */
 struct cfg80211_ops {
-	int	(*suspend)(struct wiphy *wiphy);
+	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
 	int	(*resume)(struct wiphy *wiphy);
 
 	struct net_device * (*add_virtual_intf)(struct wiphy *wiphy,
@@ -1479,6 +1513,38 @@ struct ieee80211_txrx_stypes {
 	u16 tx, rx;
 };
 
+/**
+ * enum wiphy_wowlan_support_flags - WoWLAN support flags
+ * @WIPHY_WOWLAN_ANY: supports wakeup for the special "any"
+ *	trigger that keeps the device operating as-is and
+ *	wakes up the host on any activity, for example a
+ *	received packet that passed filtering; note that the
+ *	packet should be preserved in that case
+ * @WIPHY_WOWLAN_MAGIC_PKT: supports wakeup on magic packet
+ *	(see nl80211.h)
+ * @WIPHY_WOWLAN_DISCONNECT: supports wakeup on disconnect
+ */
+enum wiphy_wowlan_support_flags {
+	WIPHY_WOWLAN_ANY	= BIT(0),
+	WIPHY_WOWLAN_MAGIC_PKT	= BIT(1),
+	WIPHY_WOWLAN_DISCONNECT	= BIT(2),
+};
+
+/**
+ * struct wiphy_wowlan_support - WoWLAN support data
+ * @flags: see &enum wiphy_wowlan_support_flags
+ * @n_patterns: number of supported wakeup patterns
+ *	(see nl80211.h for the pattern definition)
+ * @pattern_max_len: maximum length of each pattern
+ * @pattern_min_len: minimum length of each pattern
+ */
+struct wiphy_wowlan_support {
+	u32 flags;
+	int n_patterns;
+	int pattern_max_len;
+	int pattern_min_len;
+};
+
 /**
  * struct wiphy - wireless hardware description
  * @reg_notifier: the driver's regulatory notification callback,
@@ -1546,6 +1612,8 @@ struct ieee80211_txrx_stypes {
  *
  * @max_remain_on_channel_duration: Maximum time a remain-on-channel operation
  *	may request, if implemented.
+ *
+ * @wowlan: WoWLAN support information
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -1583,6 +1651,8 @@ struct wiphy {
 	char fw_version[ETHTOOL_BUSINFO_LEN];
 	u32 hw_version;
 
+	struct wiphy_wowlan_support wowlan;
+
 	u16 max_remain_on_channel_duration;
 
 	u8 max_num_pmkids;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 12d52cec9515..321d598eb8cb 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1297,7 +1297,8 @@ static int ieee80211_set_channel(struct wiphy *wiphy,
 }
 
 #ifdef CONFIG_PM
-static int ieee80211_suspend(struct wiphy *wiphy)
+static int ieee80211_suspend(struct wiphy *wiphy,
+			     struct cfg80211_wowlan *wowlan)
 {
 	return __ieee80211_suspend(wiphy_priv(wiphy));
 }
diff --git a/net/wireless/core.c b/net/wireless/core.c
index bbf1fa11107a..bea0d80710c8 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -493,6 +493,13 @@ int wiphy_register(struct wiphy *wiphy)
 		return -EINVAL;
 	}
 
+	if (rdev->wiphy.wowlan.n_patterns) {
+		if (WARN_ON(!rdev->wiphy.wowlan.pattern_min_len ||
+			    rdev->wiphy.wowlan.pattern_min_len >
+			    rdev->wiphy.wowlan.pattern_max_len))
+			return -EINVAL;
+	}
+
 	/* check and set up bitrates */
 	ieee80211_set_bitrate_flags(wiphy);
 
@@ -631,6 +638,7 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev)
 	mutex_destroy(&rdev->devlist_mtx);
 	list_for_each_entry_safe(scan, tmp, &rdev->bss_list, list)
 		cfg80211_put_bss(&scan->pub);
+	cfg80211_rdev_free_wowlan(rdev);
 	kfree(rdev);
 }
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 26a0a084e16b..7a18c10a7fb6 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -70,6 +70,8 @@ struct cfg80211_registered_device {
 	struct work_struct conn_work;
 	struct work_struct event_work;
 
+	struct cfg80211_wowlan *wowlan;
+
 	/* must be last because of the way we do wiphy_priv(),
 	 * and it should at least be aligned to NETDEV_ALIGN */
 	struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN)));
@@ -89,6 +91,18 @@ bool wiphy_idx_valid(int wiphy_idx)
 	return wiphy_idx >= 0;
 }
 
+static inline void
+cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev)
+{
+	int i;
+
+	if (!rdev->wowlan)
+		return;
+	for (i = 0; i < rdev->wowlan->n_patterns; i++)
+		kfree(rdev->wowlan->patterns[i].mask);
+	kfree(rdev->wowlan->patterns);
+	kfree(rdev->wowlan);
+}
 
 extern struct workqueue_struct *cfg80211_wq;
 extern struct mutex cfg80211_mutex;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ab77f943dc04..0a199a1ca9b6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -173,6 +173,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 },
 	[NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG },
 	[NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED },
+	[NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED },
 };
 
 /* policy for the key attributes */
@@ -194,6 +195,15 @@ nl80211_key_default_policy[NUM_NL80211_KEY_DEFAULT_TYPES] = {
 	[NL80211_KEY_DEFAULT_TYPE_MULTICAST] = { .type = NLA_FLAG },
 };
 
+/* policy for WoWLAN attributes */
+static const struct nla_policy
+nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = {
+	[NL80211_WOWLAN_TRIG_ANY] = { .type = NLA_FLAG },
+	[NL80211_WOWLAN_TRIG_DISCONNECT] = { .type = NLA_FLAG },
+	[NL80211_WOWLAN_TRIG_MAGIC_PKT] = { .type = NLA_FLAG },
+	[NL80211_WOWLAN_TRIG_PKT_PATTERN] = { .type = NLA_NESTED },
+};
+
 /* ifidx get helper */
 static int nl80211_get_ifidx(struct netlink_callback *cb)
 {
@@ -821,6 +831,35 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		nla_nest_end(msg, nl_ifs);
 	}
 
+	if (dev->wiphy.wowlan.flags || dev->wiphy.wowlan.n_patterns) {
+		struct nlattr *nl_wowlan;
+
+		nl_wowlan = nla_nest_start(msg,
+				NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
+		if (!nl_wowlan)
+			goto nla_put_failure;
+
+		if (dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY)
+			NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_ANY);
+		if (dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT)
+			NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_DISCONNECT);
+		if (dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT)
+			NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT);
+		if (dev->wiphy.wowlan.n_patterns) {
+			struct nl80211_wowlan_pattern_support pat = {
+				.max_patterns = dev->wiphy.wowlan.n_patterns,
+				.min_pattern_len =
+					dev->wiphy.wowlan.pattern_min_len,
+				.max_pattern_len =
+					dev->wiphy.wowlan.pattern_max_len,
+			};
+			NLA_PUT(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN,
+				sizeof(pat), &pat);
+		}
+
+		nla_nest_end(msg, nl_wowlan);
+	}
+
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
@@ -4808,6 +4847,194 @@ static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info)
 	return cfg80211_leave_mesh(rdev, dev);
 }
 
+static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct sk_buff *msg;
+	void *hdr;
+
+	if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns)
+		return -EOPNOTSUPP;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+			     NL80211_CMD_GET_WOWLAN);
+	if (!hdr)
+		goto nla_put_failure;
+
+	if (rdev->wowlan) {
+		struct nlattr *nl_wowlan;
+
+		nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS);
+		if (!nl_wowlan)
+			goto nla_put_failure;
+
+		if (rdev->wowlan->any)
+			NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_ANY);
+		if (rdev->wowlan->disconnect)
+			NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_DISCONNECT);
+		if (rdev->wowlan->magic_pkt)
+			NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT);
+		if (rdev->wowlan->n_patterns) {
+			struct nlattr *nl_pats, *nl_pat;
+			int i, pat_len;
+
+			nl_pats = nla_nest_start(msg,
+					NL80211_WOWLAN_TRIG_PKT_PATTERN);
+			if (!nl_pats)
+				goto nla_put_failure;
+
+			for (i = 0; i < rdev->wowlan->n_patterns; i++) {
+				nl_pat = nla_nest_start(msg, i + 1);
+				if (!nl_pat)
+					goto nla_put_failure;
+				pat_len = rdev->wowlan->patterns[i].pattern_len;
+				NLA_PUT(msg, NL80211_WOWLAN_PKTPAT_MASK,
+					DIV_ROUND_UP(pat_len, 8),
+					rdev->wowlan->patterns[i].mask);
+				NLA_PUT(msg, NL80211_WOWLAN_PKTPAT_PATTERN,
+					pat_len,
+					rdev->wowlan->patterns[i].pattern);
+				nla_nest_end(msg, nl_pat);
+			}
+			nla_nest_end(msg, nl_pats);
+		}
+
+		nla_nest_end(msg, nl_wowlan);
+	}
+
+	genlmsg_end(msg, hdr);
+	return genlmsg_reply(msg, info);
+
+nla_put_failure:
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+
+static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct nlattr *tb[NUM_NL80211_WOWLAN_TRIG];
+	struct cfg80211_wowlan no_triggers = {};
+	struct cfg80211_wowlan new_triggers = {};
+	struct wiphy_wowlan_support *wowlan = &rdev->wiphy.wowlan;
+	int err, i;
+
+	if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns)
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS])
+		goto no_triggers;
+
+	err = nla_parse(tb, MAX_NL80211_WOWLAN_TRIG,
+			nla_data(info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]),
+			nla_len(info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]),
+			nl80211_wowlan_policy);
+	if (err)
+		return err;
+
+	if (tb[NL80211_WOWLAN_TRIG_ANY]) {
+		if (!(wowlan->flags & WIPHY_WOWLAN_ANY))
+			return -EINVAL;
+		new_triggers.any = true;
+	}
+
+	if (tb[NL80211_WOWLAN_TRIG_DISCONNECT]) {
+		if (!(wowlan->flags & WIPHY_WOWLAN_DISCONNECT))
+			return -EINVAL;
+		new_triggers.disconnect = true;
+	}
+
+	if (tb[NL80211_WOWLAN_TRIG_MAGIC_PKT]) {
+		if (!(wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT))
+			return -EINVAL;
+		new_triggers.magic_pkt = true;
+	}
+
+	if (tb[NL80211_WOWLAN_TRIG_PKT_PATTERN]) {
+		struct nlattr *pat;
+		int n_patterns = 0;
+		int rem, pat_len, mask_len;
+		struct nlattr *pat_tb[NUM_NL80211_WOWLAN_PKTPAT];
+
+		nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
+				    rem)
+			n_patterns++;
+		if (n_patterns > wowlan->n_patterns)
+			return -EINVAL;
+
+		new_triggers.patterns = kcalloc(n_patterns,
+						sizeof(new_triggers.patterns[0]),
+						GFP_KERNEL);
+		if (!new_triggers.patterns)
+			return -ENOMEM;
+
+		new_triggers.n_patterns = n_patterns;
+		i = 0;
+
+		nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
+				    rem) {
+			nla_parse(pat_tb, MAX_NL80211_WOWLAN_PKTPAT,
+				  nla_data(pat), nla_len(pat), NULL);
+			err = -EINVAL;
+			if (!pat_tb[NL80211_WOWLAN_PKTPAT_MASK] ||
+			    !pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN])
+				goto error;
+			pat_len = nla_len(pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN]);
+			mask_len = DIV_ROUND_UP(pat_len, 8);
+			if (nla_len(pat_tb[NL80211_WOWLAN_PKTPAT_MASK]) !=
+			    mask_len)
+				goto error;
+			if (pat_len > wowlan->pattern_max_len ||
+			    pat_len < wowlan->pattern_min_len)
+				goto error;
+
+			new_triggers.patterns[i].mask =
+				kmalloc(mask_len + pat_len, GFP_KERNEL);
+			if (!new_triggers.patterns[i].mask) {
+				err = -ENOMEM;
+				goto error;
+			}
+			new_triggers.patterns[i].pattern =
+				new_triggers.patterns[i].mask + mask_len;
+			memcpy(new_triggers.patterns[i].mask,
+			       nla_data(pat_tb[NL80211_WOWLAN_PKTPAT_MASK]),
+			       mask_len);
+			new_triggers.patterns[i].pattern_len = pat_len;
+			memcpy(new_triggers.patterns[i].pattern,
+			       nla_data(pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN]),
+			       pat_len);
+			i++;
+		}
+	}
+
+	if (memcmp(&new_triggers, &no_triggers, sizeof(new_triggers))) {
+		struct cfg80211_wowlan *ntrig;
+		ntrig = kmemdup(&new_triggers, sizeof(new_triggers),
+				GFP_KERNEL);
+		if (!ntrig) {
+			err = -ENOMEM;
+			goto error;
+		}
+		cfg80211_rdev_free_wowlan(rdev);
+		rdev->wowlan = ntrig;
+	} else {
+ no_triggers:
+		cfg80211_rdev_free_wowlan(rdev);
+		rdev->wowlan = NULL;
+	}
+
+	return 0;
+ error:
+	for (i = 0; i < new_triggers.n_patterns; i++)
+		kfree(new_triggers.patterns[i].mask);
+	kfree(new_triggers.patterns);
+	return err;
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -5306,6 +5533,22 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_GET_WOWLAN,
+		.doit = nl80211_get_wowlan,
+		.policy = nl80211_policy,
+		/* can be retrieved by unprivileged users */
+		.internal_flags = NL80211_FLAG_NEED_WIPHY |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_SET_WOWLAN,
+		.doit = nl80211_set_wowlan,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WIPHY |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 4294fa22bb2d..c6e4ca6a7d2e 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -93,7 +93,7 @@ static int wiphy_suspend(struct device *dev, pm_message_t state)
 
 	if (rdev->ops->suspend) {
 		rtnl_lock();
-		ret = rdev->ops->suspend(&rdev->wiphy);
+		ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan);
 		rtnl_unlock();
 	}
 
-- 
cgit v1.2.3-71-gd317


From a26ac2455ffcf3be5c6ef92bc6df7182700f2114 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Wed, 12 Jan 2011 14:10:23 -0800
Subject: rcu: move TREE_RCU from softirq to kthread

If RCU priority boosting is to be meaningful, callback invocation must
be boosted in addition to preempted RCU readers.  Otherwise, in presence
of CPU real-time threads, the grace period ends, but the callbacks don't
get invoked.  If the callbacks don't get invoked, the associated memory
doesn't get freed, so the system is still subject to OOM.

But it is not reasonable to priority-boost RCU_SOFTIRQ, so this commit
moves the callback invocations to a kthread, which can be boosted easily.

Also add comments and properly synchronized all accesses to
rcu_cpu_kthread_task, as suggested by Lai Jiangshan.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 Documentation/filesystems/proc.txt  |   1 -
 include/linux/interrupt.h           |   1 -
 include/trace/events/irq.h          |   3 +-
 kernel/rcutree.c                    | 340 +++++++++++++++++++++++++++++++++++-
 kernel/rcutree.h                    |   8 +
 kernel/rcutree_plugin.h             |   4 +-
 kernel/softirq.c                    |   2 +-
 tools/perf/util/trace-event-parse.c |   1 -
 8 files changed, 348 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index b0b814d75ca1..60740e8ecb37 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -836,7 +836,6 @@ Provides counts of softirq handlers serviced since boot time, for each cpu.
  TASKLET:          0          0          0        290
    SCHED:      27035      26983      26971      26746
  HRTIMER:          0          0          0          0
-     RCU:       1678       1769       2178       2250
 
 
 1.3 IDE devices in /proc/ide
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index bea0ac750712..6c12989839d9 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -414,7 +414,6 @@ enum
 	TASKLET_SOFTIRQ,
 	SCHED_SOFTIRQ,
 	HRTIMER_SOFTIRQ,
-	RCU_SOFTIRQ,	/* Preferable RCU should always be the last softirq */
 
 	NR_SOFTIRQS
 };
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 1c09820df585..ae045ca7d356 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -20,8 +20,7 @@ struct softirq_action;
 			 softirq_name(BLOCK_IOPOLL),	\
 			 softirq_name(TASKLET),		\
 			 softirq_name(SCHED),		\
-			 softirq_name(HRTIMER),		\
-			 softirq_name(RCU))
+			 softirq_name(HRTIMER))
 
 /**
  * irq_handler_entry - called immediately before the irq action handler
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0ac1cc03f935..18e33313873e 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -47,6 +47,8 @@
 #include <linux/mutex.h>
 #include <linux/time.h>
 #include <linux/kernel_stat.h>
+#include <linux/wait.h>
+#include <linux/kthread.h>
 
 #include "rcutree.h"
 
@@ -82,6 +84,20 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
 int rcu_scheduler_active __read_mostly;
 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
 
+/*
+ * Control variables for per-CPU and per-rcu_node kthreads.  These
+ * handle all flavors of RCU.
+ */
+static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
+static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq);
+static DEFINE_PER_CPU(char, rcu_cpu_has_work);
+static char rcu_kthreads_spawnable;
+
+static void rcu_node_kthread_setaffinity(struct rcu_node *rnp);
+static void invoke_rcu_kthread(void);
+
+#define RCU_KTHREAD_PRIO 1	/* RT priority for per-CPU kthreads. */
+
 /*
  * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
  * permit this function to be invoked without holding the root rcu_node
@@ -1009,6 +1025,8 @@ static void rcu_send_cbs_to_online(struct rcu_state *rsp)
 /*
  * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy
  * and move all callbacks from the outgoing CPU to the current one.
+ * There can only be one CPU hotplug operation at a time, so no other
+ * CPU can be attempting to update rcu_cpu_kthread_task.
  */
 static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 {
@@ -1017,6 +1035,14 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 	int need_report = 0;
 	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
 	struct rcu_node *rnp;
+	struct task_struct *t;
+
+	/* Stop the CPU's kthread. */
+	t = per_cpu(rcu_cpu_kthread_task, cpu);
+	if (t != NULL) {
+		per_cpu(rcu_cpu_kthread_task, cpu) = NULL;
+		kthread_stop(t);
+	}
 
 	/* Exclude any attempts to start a new grace period. */
 	raw_spin_lock_irqsave(&rsp->onofflock, flags);
@@ -1054,6 +1080,19 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	if (need_report & RCU_OFL_TASKS_EXP_GP)
 		rcu_report_exp_rnp(rsp, rnp);
+
+	/*
+	 * If there are no more online CPUs for this rcu_node structure,
+	 * kill the rcu_node structure's kthread.  Otherwise, adjust its
+	 * affinity.
+	 */
+	t = rnp->node_kthread_task;
+	if (t != NULL &&
+	    rnp->qsmaskinit == 0) {
+		kthread_stop(t);
+		rnp->node_kthread_task = NULL;
+	} else
+		rcu_node_kthread_setaffinity(rnp);
 }
 
 /*
@@ -1151,7 +1190,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 
 	/* Re-raise the RCU softirq if there are callbacks remaining. */
 	if (cpu_has_callbacks_ready_to_invoke(rdp))
-		raise_softirq(RCU_SOFTIRQ);
+		invoke_rcu_kthread();
 }
 
 /*
@@ -1197,7 +1236,7 @@ void rcu_check_callbacks(int cpu, int user)
 	}
 	rcu_preempt_check_callbacks(cpu);
 	if (rcu_pending(cpu))
-		raise_softirq(RCU_SOFTIRQ);
+		invoke_rcu_kthread();
 }
 
 #ifdef CONFIG_SMP
@@ -1361,7 +1400,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
 /*
  * Do softirq processing for the current CPU.
  */
-static void rcu_process_callbacks(struct softirq_action *unused)
+static void rcu_process_callbacks(void)
 {
 	__rcu_process_callbacks(&rcu_sched_state,
 				&__get_cpu_var(rcu_sched_data));
@@ -1372,6 +1411,281 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 	rcu_needs_cpu_flush();
 }
 
+/*
+ * Wake up the current CPU's kthread.  This replaces raise_softirq()
+ * in earlier versions of RCU.  Note that because we are running on
+ * the current CPU with interrupts disabled, the rcu_cpu_kthread_task
+ * cannot disappear out from under us.
+ */
+static void invoke_rcu_kthread(void)
+{
+	unsigned long flags;
+	wait_queue_head_t *q;
+	int cpu;
+
+	local_irq_save(flags);
+	cpu = smp_processor_id();
+	per_cpu(rcu_cpu_has_work, cpu) = 1;
+	if (per_cpu(rcu_cpu_kthread_task, cpu) == NULL) {
+		local_irq_restore(flags);
+		return;
+	}
+	q = &per_cpu(rcu_cpu_wq, cpu);
+	wake_up(q);
+	local_irq_restore(flags);
+}
+
+/*
+ * Timer handler to initiate the waking up of per-CPU kthreads that
+ * have yielded the CPU due to excess numbers of RCU callbacks.
+ */
+static void rcu_cpu_kthread_timer(unsigned long arg)
+{
+	unsigned long flags;
+	struct rcu_data *rdp = (struct rcu_data *)arg;
+	struct rcu_node *rnp = rdp->mynode;
+	struct task_struct *t;
+
+	raw_spin_lock_irqsave(&rnp->lock, flags);
+	rnp->wakemask |= rdp->grpmask;
+	t = rnp->node_kthread_task;
+	if (t == NULL) {
+		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		return;
+	}
+	wake_up_process(t);
+	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+}
+
+/*
+ * Drop to non-real-time priority and yield, but only after posting a
+ * timer that will cause us to regain our real-time priority if we
+ * remain preempted.  Either way, we restore our real-time priority
+ * before returning.
+ */
+static void rcu_yield(int cpu)
+{
+	struct rcu_data *rdp = per_cpu_ptr(rcu_sched_state.rda, cpu);
+	struct sched_param sp;
+	struct timer_list yield_timer;
+
+	setup_timer_on_stack(&yield_timer, rcu_cpu_kthread_timer, (unsigned long)rdp);
+	mod_timer(&yield_timer, jiffies + 2);
+	sp.sched_priority = 0;
+	sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
+	schedule();
+	sp.sched_priority = RCU_KTHREAD_PRIO;
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+	del_timer(&yield_timer);
+}
+
+/*
+ * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU.
+ * This can happen while the corresponding CPU is either coming online
+ * or going offline.  We cannot wait until the CPU is fully online
+ * before starting the kthread, because the various notifier functions
+ * can wait for RCU grace periods.  So we park rcu_cpu_kthread() until
+ * the corresponding CPU is online.
+ *
+ * Return 1 if the kthread needs to stop, 0 otherwise.
+ *
+ * Caller must disable bh.  This function can momentarily enable it.
+ */
+static int rcu_cpu_kthread_should_stop(int cpu)
+{
+	while (cpu_is_offline(cpu) ||
+	       !cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)) ||
+	       smp_processor_id() != cpu) {
+		if (kthread_should_stop())
+			return 1;
+		local_bh_enable();
+		schedule_timeout_uninterruptible(1);
+		if (!cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)))
+			set_cpus_allowed_ptr(current, cpumask_of(cpu));
+		local_bh_disable();
+	}
+	return 0;
+}
+
+/*
+ * Per-CPU kernel thread that invokes RCU callbacks.  This replaces the
+ * earlier RCU softirq.
+ */
+static int rcu_cpu_kthread(void *arg)
+{
+	int cpu = (int)(long)arg;
+	unsigned long flags;
+	int spincnt = 0;
+	wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu);
+	char work;
+	char *workp = &per_cpu(rcu_cpu_has_work, cpu);
+
+	for (;;) {
+		wait_event_interruptible(*wqp,
+					 *workp != 0 || kthread_should_stop());
+		local_bh_disable();
+		if (rcu_cpu_kthread_should_stop(cpu)) {
+			local_bh_enable();
+			break;
+		}
+		local_irq_save(flags);
+		work = *workp;
+		*workp = 0;
+		local_irq_restore(flags);
+		if (work)
+			rcu_process_callbacks();
+		local_bh_enable();
+		if (*workp != 0)
+			spincnt++;
+		else
+			spincnt = 0;
+		if (spincnt > 10) {
+			rcu_yield(cpu);
+			spincnt = 0;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Spawn a per-CPU kthread, setting up affinity and priority.
+ * Because the CPU hotplug lock is held, no other CPU will be attempting
+ * to manipulate rcu_cpu_kthread_task.  There might be another CPU
+ * attempting to access it during boot, but the locking in kthread_bind()
+ * will enforce sufficient ordering.
+ */
+static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
+{
+	struct sched_param sp;
+	struct task_struct *t;
+
+	if (!rcu_kthreads_spawnable ||
+	    per_cpu(rcu_cpu_kthread_task, cpu) != NULL)
+		return 0;
+	t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu);
+	if (IS_ERR(t))
+		return PTR_ERR(t);
+	kthread_bind(t, cpu);
+	WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
+	per_cpu(rcu_cpu_kthread_task, cpu) = t;
+	wake_up_process(t);
+	sp.sched_priority = RCU_KTHREAD_PRIO;
+	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+	return 0;
+}
+
+/*
+ * Per-rcu_node kthread, which is in charge of waking up the per-CPU
+ * kthreads when needed.  We ignore requests to wake up kthreads
+ * for offline CPUs, which is OK because force_quiescent_state()
+ * takes care of this case.
+ */
+static int rcu_node_kthread(void *arg)
+{
+	int cpu;
+	unsigned long flags;
+	unsigned long mask;
+	struct rcu_node *rnp = (struct rcu_node *)arg;
+	struct sched_param sp;
+	struct task_struct *t;
+
+	for (;;) {
+		wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0 ||
+						       kthread_should_stop());
+		if (kthread_should_stop())
+			break;
+		raw_spin_lock_irqsave(&rnp->lock, flags);
+		mask = rnp->wakemask;
+		rnp->wakemask = 0;
+		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
+			if ((mask & 0x1) == 0)
+				continue;
+			preempt_disable();
+			t = per_cpu(rcu_cpu_kthread_task, cpu);
+			if (!cpu_online(cpu) || t == NULL) {
+				preempt_enable();
+				continue;
+			}
+			per_cpu(rcu_cpu_has_work, cpu) = 1;
+			sp.sched_priority = RCU_KTHREAD_PRIO;
+			sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+			preempt_enable();
+		}
+	}
+	return 0;
+}
+
+/*
+ * Set the per-rcu_node kthread's affinity to cover all CPUs that are
+ * served by the rcu_node in question.
+ */
+static void rcu_node_kthread_setaffinity(struct rcu_node *rnp)
+{
+	cpumask_var_t cm;
+	int cpu;
+	unsigned long mask = rnp->qsmaskinit;
+
+	if (rnp->node_kthread_task == NULL ||
+	    rnp->qsmaskinit == 0)
+		return;
+	if (!alloc_cpumask_var(&cm, GFP_KERNEL))
+		return;
+	cpumask_clear(cm);
+	for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
+		if (mask & 0x1)
+			cpumask_set_cpu(cpu, cm);
+	set_cpus_allowed_ptr(rnp->node_kthread_task, cm);
+	free_cpumask_var(cm);
+}
+
+/*
+ * Spawn a per-rcu_node kthread, setting priority and affinity.
+ */
+static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
+						struct rcu_node *rnp)
+{
+	int rnp_index = rnp - &rsp->node[0];
+	struct sched_param sp;
+	struct task_struct *t;
+
+	if (!rcu_kthreads_spawnable ||
+	    rnp->qsmaskinit == 0 ||
+	    rnp->node_kthread_task != NULL)
+		return 0;
+	t = kthread_create(rcu_node_kthread, (void *)rnp, "rcun%d", rnp_index);
+	if (IS_ERR(t))
+		return PTR_ERR(t);
+	rnp->node_kthread_task = t;
+	wake_up_process(t);
+	sp.sched_priority = 99;
+	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+	return 0;
+}
+
+/*
+ * Spawn all kthreads -- called as soon as the scheduler is running.
+ */
+static int __init rcu_spawn_kthreads(void)
+{
+	int cpu;
+	struct rcu_node *rnp;
+
+	rcu_kthreads_spawnable = 1;
+	for_each_possible_cpu(cpu) {
+		init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu));
+		per_cpu(rcu_cpu_has_work, cpu) = 0;
+		if (cpu_online(cpu))
+			(void)rcu_spawn_one_cpu_kthread(cpu);
+	}
+	rcu_for_each_leaf_node(&rcu_sched_state, rnp) {
+		init_waitqueue_head(&rnp->node_wq);
+		(void)rcu_spawn_one_node_kthread(&rcu_sched_state, rnp);
+	}
+	return 0;
+}
+early_initcall(rcu_spawn_kthreads);
+
 static void
 __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 	   struct rcu_state *rsp)
@@ -1771,6 +2085,19 @@ static void __cpuinit rcu_online_cpu(int cpu)
 	rcu_preempt_init_percpu_data(cpu);
 }
 
+static void __cpuinit rcu_online_kthreads(int cpu)
+{
+	struct rcu_data *rdp = per_cpu_ptr(rcu_sched_state.rda, cpu);
+	struct rcu_node *rnp = rdp->mynode;
+
+	/* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
+	if (rcu_kthreads_spawnable) {
+		(void)rcu_spawn_one_cpu_kthread(cpu);
+		if (rnp->node_kthread_task == NULL)
+			(void)rcu_spawn_one_node_kthread(&rcu_sched_state, rnp);
+	}
+}
+
 /*
  * Handle CPU online/offline notification events.
  */
@@ -1778,11 +2105,17 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
 				    unsigned long action, void *hcpu)
 {
 	long cpu = (long)hcpu;
+	struct rcu_data *rdp = per_cpu_ptr(rcu_sched_state.rda, cpu);
+	struct rcu_node *rnp = rdp->mynode;
 
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
 		rcu_online_cpu(cpu);
+		rcu_online_kthreads(cpu);
+		break;
+	case CPU_ONLINE:
+		rcu_node_kthread_setaffinity(rnp);
 		break;
 	case CPU_DYING:
 	case CPU_DYING_FROZEN:
@@ -1923,7 +2256,6 @@ void __init rcu_init(void)
 	rcu_init_one(&rcu_sched_state, &rcu_sched_data);
 	rcu_init_one(&rcu_bh_state, &rcu_bh_data);
 	__rcu_init_preempt();
-	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 
 	/*
 	 * We don't need protection against CPU-hotplug here because
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 5a439c180e69..c0213802d164 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -111,6 +111,7 @@ struct rcu_node {
 				/*  elements that need to drain to allow the */
 				/*  current expedited grace period to */
 				/*  complete (only for TREE_PREEMPT_RCU). */
+	unsigned long wakemask; /* CPUs whose kthread needs to be awakened. */
 	unsigned long qsmaskinit;
 				/* Per-GP initial value for qsmask & expmask. */
 	unsigned long grpmask;	/* Mask to apply to parent qsmask. */
@@ -134,6 +135,13 @@ struct rcu_node {
 				/*  if there is no such task.  If there */
 				/*  is no current expedited grace period, */
 				/*  then there can cannot be any such task. */
+	struct task_struct *node_kthread_task;
+				/* kthread that takes care of this rcu_node */
+				/*  structure, for example, awakening the */
+				/*  per-CPU kthreads as needed. */
+	wait_queue_head_t node_wq;
+				/* Wait queue on which to park the per-node */
+				/*  kthread. */
 } ____cacheline_internodealigned_in_smp;
 
 /*
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 774f010a4619..b9bd69a5a4fe 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1206,7 +1206,7 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
  *
  * Because it is not legal to invoke rcu_process_callbacks() with irqs
  * disabled, we do one pass of force_quiescent_state(), then do a
- * raise_softirq() to cause rcu_process_callbacks() to be invoked later.
+ * invoke_rcu_kthread() to cause rcu_process_callbacks() to be invoked later.
  * The per-cpu rcu_dyntick_drain variable controls the sequencing.
  */
 int rcu_needs_cpu(int cpu)
@@ -1257,7 +1257,7 @@ int rcu_needs_cpu(int cpu)
 
 	/* If RCU callbacks are still pending, RCU still needs this CPU. */
 	if (c)
-		raise_softirq(RCU_SOFTIRQ);
+		invoke_rcu_kthread();
 	return c;
 }
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 174f976c2874..13960170cad4 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -58,7 +58,7 @@ DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
 
 char *softirq_to_name[NR_SOFTIRQS] = {
 	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
-	"TASKLET", "SCHED", "HRTIMER",	"RCU"
+	"TASKLET", "SCHED", "HRTIMER"
 };
 
 /*
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 0a7ed5b5e281..1e88485c16a0 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -2187,7 +2187,6 @@ static const struct flag flags[] = {
 	{ "TASKLET_SOFTIRQ", 6 },
 	{ "SCHED_SOFTIRQ", 7 },
 	{ "HRTIMER_SOFTIRQ", 8 },
-	{ "RCU_SOFTIRQ", 9 },
 
 	{ "HRTIMER_NORESTART", 0 },
 	{ "HRTIMER_RESTART", 1 },
-- 
cgit v1.2.3-71-gd317


From 4a29865689dbb87a02e3b0fff4a4ae5041273173 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Sun, 3 Apr 2011 21:33:51 -0700
Subject: rcu: make rcutorture version numbers available through debugfs

It is not possible to accurately correlate rcutorture output with that
of debugfs.  This patch therefore adds a debugfs file that prints out
the rcutorture version number, permitting easy correlation.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 13 ++++++++++++-
 include/linux/rcutree.h  |  3 +++
 kernel/rcutorture.c      |  8 +++++---
 kernel/rcutree.c         | 37 +++++++++++++++++++++++++++++++++++++
 kernel/rcutree_trace.c   | 28 ++++++++++++++++++++++++++++
 5 files changed, 85 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index ff422d2b7f90..9e169c2ba91f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -47,6 +47,18 @@
 extern int rcutorture_runnable; /* for sysctl */
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 
+#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
+extern void rcutorture_record_test_transition(void);
+extern void rcutorture_record_progress(unsigned long vernum);
+#else
+static inline void rcutorture_record_test_transition(void)
+{
+}
+static inline void rcutorture_record_progress(unsigned long vernum)
+{
+}
+#endif
+
 #define UINT_CMP_GE(a, b)	(UINT_MAX / 2 >= (a) - (b))
 #define UINT_CMP_LT(a, b)	(UINT_MAX / 2 < (a) - (b))
 #define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b))
@@ -68,7 +80,6 @@ extern void call_rcu_sched(struct rcu_head *head,
 extern void synchronize_sched(void);
 extern void rcu_barrier_bh(void);
 extern void rcu_barrier_sched(void);
-extern int sched_expedited_torture_stats(char *page);
 
 static inline void __rcu_read_lock_bh(void)
 {
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 3a933482734a..284dad10c55b 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -58,9 +58,12 @@ static inline void synchronize_rcu_bh_expedited(void)
 
 extern void rcu_barrier(void);
 
+extern unsigned long rcutorture_testseq;
+extern unsigned long rcutorture_vernum;
 extern long rcu_batches_completed(void);
 extern long rcu_batches_completed_bh(void);
 extern long rcu_batches_completed_sched(void);
+
 extern void rcu_force_quiescent_state(void);
 extern void rcu_bh_force_quiescent_state(void);
 extern void rcu_sched_force_quiescent_state(void);
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 22b0e74e7d99..c2f58ec24751 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -131,7 +131,7 @@ struct rcu_torture {
 
 static LIST_HEAD(rcu_torture_freelist);
 static struct rcu_torture __rcu *rcu_torture_current;
-static long rcu_torture_current_version;
+static unsigned long rcu_torture_current_version;
 static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
 static DEFINE_SPINLOCK(rcu_torture_lock);
 static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) =
@@ -884,7 +884,7 @@ rcu_torture_writer(void *arg)
 			old_rp->rtort_pipe_count++;
 			cur_ops->deferred_free(old_rp);
 		}
-		rcu_torture_current_version++;
+		rcutorture_record_progress(++rcu_torture_current_version);
 		oldbatch = cur_ops->completed();
 		rcu_stutter_wait("rcu_torture_writer");
 	} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
@@ -1064,7 +1064,7 @@ rcu_torture_printk(char *page)
 	}
 	cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
 	cnt += sprintf(&page[cnt],
-		       "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d "
+		       "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d "
 		       "rtmbe: %d rtbke: %ld rtbre: %ld "
 		       "rtbf: %ld rtb: %ld nt: %ld",
 		       rcu_torture_current,
@@ -1325,6 +1325,7 @@ rcu_torture_cleanup(void)
 	int i;
 
 	mutex_lock(&fullstop_mutex);
+	rcutorture_record_test_transition();
 	if (fullstop == FULLSTOP_SHUTDOWN) {
 		printk(KERN_WARNING /* but going down anyway, so... */
 		       "Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
@@ -1616,6 +1617,7 @@ rcu_torture_init(void)
 		}
 	}
 	register_reboot_notifier(&rcutorture_shutdown_nb);
+	rcutorture_record_test_transition();
 	mutex_unlock(&fullstop_mutex);
 	return 0;
 
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d8917401cbbc..bb84deca3319 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -101,6 +101,18 @@ static void invoke_rcu_cpu_kthread(void);
 
 #define RCU_KTHREAD_PRIO 1	/* RT priority for per-CPU kthreads. */
 
+/*
+ * Track the rcutorture test sequence number and the update version
+ * number within a given test.  The rcutorture_testseq is incremented
+ * on every rcutorture module load and unload, so has an odd value
+ * when a test is running.  The rcutorture_vernum is set to zero
+ * when rcutorture starts and is incremented on each rcutorture update.
+ * These variables enable correlating rcutorture output with the
+ * RCU tracing information.
+ */
+unsigned long rcutorture_testseq;
+unsigned long rcutorture_vernum;
+
 /*
  * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
  * permit this function to be invoked without holding the root rcu_node
@@ -192,6 +204,31 @@ void rcu_bh_force_quiescent_state(void)
 }
 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
 
+/*
+ * Record the number of times rcutorture tests have been initiated and
+ * terminated.  This information allows the debugfs tracing stats to be
+ * correlated to the rcutorture messages, even when the rcutorture module
+ * is being repeatedly loaded and unloaded.  In other words, we cannot
+ * store this state in rcutorture itself.
+ */
+void rcutorture_record_test_transition(void)
+{
+	rcutorture_testseq++;
+	rcutorture_vernum = 0;
+}
+EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
+
+/*
+ * Record the number of writer passes through the current rcutorture test.
+ * This is also used to correlate debugfs tracing stats with the rcutorture
+ * messages.
+ */
+void rcutorture_record_progress(unsigned long vernum)
+{
+	rcutorture_vernum++;
+}
+EXPORT_SYMBOL_GPL(rcutorture_record_progress);
+
 /*
  * Force a quiescent state for RCU-sched.
  */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index fc40e89a028c..3baa235786b5 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -394,6 +394,29 @@ static const struct file_operations rcu_pending_fops = {
 	.release = single_release,
 };
 
+static int show_rcutorture(struct seq_file *m, void *unused)
+{
+	seq_printf(m, "rcutorture test sequence: %lu %s\n",
+		   rcutorture_testseq >> 1,
+		   (rcutorture_testseq & 0x1) ? "(test in progress)" : "");
+	seq_printf(m, "rcutorture update version number: %lu\n",
+		   rcutorture_vernum);
+	return 0;
+}
+
+static int rcutorture_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_rcutorture, NULL);
+}
+
+static const struct file_operations rcutorture_fops = {
+	.owner = THIS_MODULE,
+	.open = rcutorture_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
 static struct dentry *rcudir;
 
 static int __init rcutree_trace_init(void)
@@ -430,6 +453,11 @@ static int __init rcutree_trace_init(void)
 						NULL, &rcu_pending_fops);
 	if (!retval)
 		goto free_out;
+
+	retval = debugfs_create_file("rcutorture", 0444, rcudir,
+						NULL, &rcutorture_fops);
+	if (!retval)
+		goto free_out;
 	return 0;
 free_out:
 	debugfs_remove_recursive(rcudir);
-- 
cgit v1.2.3-71-gd317


From b0c9d7ff2793502650ad987c3f237d5fe5587a1e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 29 Mar 2011 12:56:56 -0700
Subject: rcu: add DEBUG_OBJECTS_RCU_HEAD check for alignment

Verify that rcu_head structures are aligned to a four-byte boundary.
This check is enabled by CONFIG_DEBUG_OBJECTS_RCU_HEAD.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9e169c2ba91f..c7aeacf7fc98 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -785,6 +785,7 @@ extern struct debug_obj_descr rcuhead_debug_descr;
 
 static inline void debug_rcu_head_queue(struct rcu_head *head)
 {
+	WARN_ON_ONCE((unsigned long)head & 0x3);
 	debug_object_activate(head, &rcuhead_debug_descr);
 	debug_object_active_state(head, &rcuhead_debug_descr,
 				  STATE_RCU_HEAD_READY,
-- 
cgit v1.2.3-71-gd317


From 9ab1544eb4196ca8d05c433b2eb56f74496b1ee3 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Fri, 18 Mar 2011 11:15:47 +0800
Subject: rcu: introduce kfree_rcu()

Many rcu callbacks functions just call kfree() on the base structure.
These functions are trivial, but their size adds up, and furthermore
when they are used in a kernel module, that module must invoke the
high-latency rcu_barrier() function at module-unload time.

The kfree_rcu() function introduced by this commit addresses this issue.
Rather than encoding a function address in the embedded rcu_head
structure, kfree_rcu() instead encodes the offset of the rcu_head
structure within the base structure.  Because the functions are not
allowed in the low-order 4096 bytes of kernel virtual memory, offsets
up to 4095 bytes can be accommodated.  If the offset is larger than
4095 bytes, a compile-time error will be generated in __kfree_rcu().
If this error is triggered, you can either fall back to use of call_rcu()
or rearrange the structure to position the rcu_head structure into the
first 4096 bytes.

Note that the allowable offset might decrease in the future, for example,
to allow something like kmem_cache_free_rcu().

The new kfree_rcu() function can replace code as follows:

	call_rcu(&p->rcu, simple_kfree_callback);

where "simple_kfree_callback()" might be defined as follows:

	void simple_kfree_callback(struct rcu_head *p)
	{
		struct foo *q = container_of(p, struct foo, rcu);

		kfree(q);
	}

with the following:

	kfree_rcu(&p->rcu, rcu);

Note that the "rcu" is the name of a field in the structure being
freed.  The reason for using this rather than passing in a pointer
to the base structure is that the above approach allows better type
checking.

This commit is based on earlier work by Lai Jiangshan and Manfred Spraul:

Lai's V1 patch: http://lkml.org/lkml/2008/9/18/1
Manfred's patch: http://lkml.org/lkml/2009/1/2/115

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/rcutiny.c         |  2 +-
 kernel/rcutree.c         |  2 +-
 3 files changed, 58 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index c7aeacf7fc98..99f9aa7c2804 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -809,4 +809,60 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
 }
 #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
+static __always_inline bool __is_kfree_rcu_offset(unsigned long offset)
+{
+	return offset < 4096;
+}
+
+static __always_inline
+void __kfree_rcu(struct rcu_head *head, unsigned long offset)
+{
+	typedef void (*rcu_callback)(struct rcu_head *);
+
+	BUILD_BUG_ON(!__builtin_constant_p(offset));
+
+	/* See the kfree_rcu() header comment. */
+	BUILD_BUG_ON(!__is_kfree_rcu_offset(offset));
+
+	call_rcu(head, (rcu_callback)offset);
+}
+
+extern void kfree(const void *);
+
+static inline void __rcu_reclaim(struct rcu_head *head)
+{
+	unsigned long offset = (unsigned long)head->func;
+
+	if (__is_kfree_rcu_offset(offset))
+		kfree((void *)head - offset);
+	else
+		head->func(head);
+}
+
+/**
+ * kfree_rcu() - kfree an object after a grace period.
+ * @ptr:	pointer to kfree
+ * @rcu_head:	the name of the struct rcu_head within the type of @ptr.
+ *
+ * Many rcu callbacks functions just call kfree() on the base structure.
+ * These functions are trivial, but their size adds up, and furthermore
+ * when they are used in a kernel module, that module must invoke the
+ * high-latency rcu_barrier() function at module-unload time.
+ *
+ * The kfree_rcu() function handles this issue.  Rather than encoding a
+ * function address in the embedded rcu_head structure, kfree_rcu() instead
+ * encodes the offset of the rcu_head structure within the base structure.
+ * Because the functions are not allowed in the low-order 4096 bytes of
+ * kernel virtual memory, offsets up to 4095 bytes can be accommodated.
+ * If the offset is larger than 4095 bytes, a compile-time error will
+ * be generated in __kfree_rcu().  If this error is triggered, you can
+ * either fall back to use of call_rcu() or rearrange the structure to
+ * position the rcu_head structure into the first 4096 bytes.
+ *
+ * Note that the allowable offset might decrease in the future, for example,
+ * to allow something like kmem_cache_free_rcu().
+ */
+#define kfree_rcu(ptr, rcu_head)					\
+	__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
+
 #endif /* __LINUX_RCUPDATE_H */
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 0c343b9a46d5..4d60fbc9c64c 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -167,7 +167,7 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 		prefetch(next);
 		debug_rcu_head_unqueue(list);
 		local_bh_disable();
-		list->func(list);
+		__rcu_reclaim(list);
 		local_bh_enable();
 		list = next;
 		RCU_TRACE(cb_count++);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index b579e4f97210..2c07adb97088 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1206,7 +1206,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 		next = list->next;
 		prefetch(next);
 		debug_rcu_head_unqueue(list);
-		list->func(list);
+		__rcu_reclaim(list);
 		list = next;
 		if (++count >= rdp->blimit)
 			break;
-- 
cgit v1.2.3-71-gd317


From 29ce831000081dd757d3116bf774aafffc4b6b20 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 4 May 2011 16:31:03 +0300
Subject: rcu: provide rcu_virt_note_context_switch() function.

Provide rcu_virt_note_context_switch() for vitalization use to note
quiescent state during guest entry.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcutiny.h |  8 ++++++++
 include/linux/rcutree.h | 10 ++++++++++
 kernel/rcutree.c        |  1 +
 3 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 30ebd7c8d874..52b3e0281fd0 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -99,6 +99,14 @@ static inline void rcu_note_context_switch(int cpu)
 	rcu_preempt_note_context_switch();
 }
 
+/*
+ * Take advantage of the fact that there is only one CPU, which
+ * allows us to ignore virtualization-based context switches.
+ */
+static inline void rcu_virt_note_context_switch(int cpu)
+{
+}
+
 /*
  * Return the number of grace periods.
  */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 284dad10c55b..e65d06634dd8 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -35,6 +35,16 @@ extern void rcu_note_context_switch(int cpu);
 extern int rcu_needs_cpu(int cpu);
 extern void rcu_cpu_stall_reset(void);
 
+/*
+ * Note a virtualization-based context switch.  This is simply a
+ * wrapper around rcu_note_context_switch(), which allows TINY_RCU
+ * to save a few bytes.
+ */
+static inline void rcu_virt_note_context_switch(int cpu)
+{
+	rcu_note_context_switch(cpu);
+}
+
 #ifdef CONFIG_TREE_PREEMPT_RCU
 
 extern void exit_rcu(void);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index b2fe2a273df2..54ff7eb92819 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -157,6 +157,7 @@ void rcu_note_context_switch(int cpu)
 	rcu_sched_qs(cpu);
 	rcu_preempt_note_context_switch(cpu);
 }
+EXPORT_SYMBOL_GPL(rcu_note_context_switch);
 
 #ifdef CONFIG_NO_HZ
 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
-- 
cgit v1.2.3-71-gd317


From a3a4a5acd3bd2f6f1e102e1f1b9d2e2bb320a7fd Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 5 May 2011 23:55:18 -0400
Subject: Regression: partial revert "tracing: Remove lock_depth from event
 entry"

This partially reverts commit e6e1e2593592a8f6f6380496655d8c6f67431266.

That commit changed the structure layout of the trace structure, which
in turn broke PowerTOP (1.9x generation) quite badly.

I appreciate not wanting to expose the variable in question, and
PowerTOP was not using it, so I've replaced the variable with just a
padding field - that way if in the future a new field is needed it can
just use this padding field.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ftrace_event.h | 1 +
 kernel/trace/trace.c         | 1 +
 kernel/trace/trace_events.c  | 1 +
 3 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 22b32af1b5ec..b5a550a39a70 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -37,6 +37,7 @@ struct trace_entry {
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
+	int			padding;
 };
 
 #define FTRACE_MAX_EVENT						\
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d38c16a06a6f..1cb49be7c7fb 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1110,6 +1110,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 
 	entry->preempt_count		= pc & 0xff;
 	entry->pid			= (tsk) ? tsk->pid : 0;
+	entry->padding			= 0;
 	entry->flags =
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index e88f74fe1d4c..2fe110341359 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -116,6 +116,7 @@ static int trace_define_common_fields(void)
 	__common_field(unsigned char, flags);
 	__common_field(unsigned char, preempt_count);
 	__common_field(int, pid);
+	__common_field(int, padding);
 
 	return ret;
 }
-- 
cgit v1.2.3-71-gd317


From 880ffb5c6c5c8c8c6efd9efe9355317322b4603b Mon Sep 17 00:00:00 2001
From: Wanlong Gao <wanlong.gao@gmail.com>
Date: Thu, 5 May 2011 07:55:36 +0800
Subject: driver core: Add the device driver-model structures to kerneldoc

Add the comments to the structure bus_type, device_driver, device,
class to device.h for generating the driver-model kerneldoc. With another patch
these all removed from the files in Documentation/driver-model/ since
they are out of date. That will keep things up to date and provide a better way
to document this stuff.

Signed-off-by: Wanlong Gao <wanlong.gao@gmail.com>
Acked-by: Harry Wei <harryxiyou@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/DocBook/device-drivers.tmpl |   6 +-
 include/linux/device.h                    | 154 +++++++++++++++++++++++++++++-
 2 files changed, 154 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
index 36f63d4a0a06..b638e50cf8f6 100644
--- a/Documentation/DocBook/device-drivers.tmpl
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -96,10 +96,10 @@ X!Iinclude/linux/kobject.h
 
   <chapter id="devdrivers">
      <title>Device drivers infrastructure</title>
+     <sect1><title>The Basic Device Driver-Model Structures </title>
+!Iinclude/linux/device.h
+     </sect1>
      <sect1><title>Device Drivers Base</title>
-<!--
-X!Iinclude/linux/device.h
--->
 !Edrivers/base/driver.c
 !Edrivers/base/core.c
 !Edrivers/base/class.c
diff --git a/include/linux/device.h b/include/linux/device.h
index 2215d013ca96..42365067a836 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -47,6 +47,38 @@ extern int __must_check bus_create_file(struct bus_type *,
 					struct bus_attribute *);
 extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
 
+/**
+ * struct bus_type - The bus type of the device
+ *
+ * @name:	The name of the bus.
+ * @bus_attrs:	Default attributes of the bus.
+ * @dev_attrs:	Default attributes of the devices on the bus.
+ * @drv_attrs:	Default attributes of the device drivers on the bus.
+ * @match:	Called, perhaps multiple times, whenever a new device or driver
+ *		is added for this bus. It should return a nonzero value if the
+ *		given device can be handled by the given driver.
+ * @uevent:	Called when a device is added, removed, or a few other things
+ *		that generate uevents to add the environment variables.
+ * @probe:	Called when a new device or driver add to this bus, and callback
+ *		the specific driver's probe to initial the matched device.
+ * @remove:	Called when a device removed from this bus.
+ * @shutdown:	Called at shut-down time to quiesce the device.
+ * @suspend:	Called when a device on this bus wants to go to sleep mode.
+ * @resume:	Called to bring a device on this bus out of sleep mode.
+ * @pm:		Power management operations of this bus, callback the specific
+ *		device driver's pm-ops.
+ * @p:		The private data of the driver core, only the driver core can
+ *		touch this.
+ *
+ * A bus is a channel between the processor and one or more devices. For the
+ * purposes of the device model, all devices are connected via a bus, even if
+ * it is an internal, virtual, "platform" bus. Buses can plug into each other.
+ * A USB controller is usually a PCI device, for example. The device model
+ * represents the actual connections between buses and the devices they control.
+ * A bus is represented by the bus_type structure. It contains the name, the
+ * default attributes, the bus' methods, PM operations, and the driver core's
+ * private data.
+ */
 struct bus_type {
 	const char		*name;
 	struct bus_attribute	*bus_attrs;
@@ -119,6 +151,37 @@ extern int bus_unregister_notifier(struct bus_type *bus,
 extern struct kset *bus_get_kset(struct bus_type *bus);
 extern struct klist *bus_get_device_klist(struct bus_type *bus);
 
+/**
+ * struct device_driver - The basic device driver structure
+ * @name:	Name of the device driver.
+ * @bus:	The bus which the device of this driver belongs to.
+ * @owner:	The module owner.
+ * @mod_name:	Used for built-in modules.
+ * @suppress_bind_attrs: Disables bind/unbind via sysfs.
+ * @of_match_table: The open firmware table.
+ * @probe:	Called to query the existence of a specific device,
+ *		whether this driver can work with it, and bind the driver
+ *		to a specific device.
+ * @remove:	Called when the device is removed from the system to
+ *		unbind a device from this driver.
+ * @shutdown:	Called at shut-down time to quiesce the device.
+ * @suspend:	Called to put the device to sleep mode. Usually to a
+ *		low power state.
+ * @resume:	Called to bring a device from sleep mode.
+ * @groups:	Default attributes that get created by the driver core
+ *		automatically.
+ * @pm:		Power management operations of the device which matched
+ *		this driver.
+ * @p:		Driver core's private data, no one other than the driver
+ *		core can touch this.
+ *
+ * The device driver-model tracks all of the drivers known to the system.
+ * The main reason for this tracking is to enable the driver core to match
+ * up drivers with new devices. Once drivers are known objects within the
+ * system, however, a number of other things become possible. Device drivers
+ * can export information and configuration variables that are independent
+ * of any specific device.
+ */
 struct device_driver {
 	const char		*name;
 	struct bus_type		*bus;
@@ -185,8 +248,34 @@ struct device *driver_find_device(struct device_driver *drv,
 				  struct device *start, void *data,
 				  int (*match)(struct device *dev, void *data));
 
-/*
- * device classes
+/**
+ * struct class - device classes
+ * @name:	Name of the class.
+ * @owner:	The module owner.
+ * @class_attrs: Default attributes of this class.
+ * @dev_attrs:	Default attributes of the devices belong to the class.
+ * @dev_bin_attrs: Default binary attributes of the devices belong to the class.
+ * @dev_kobj:	The kobject that represents this class and links it into the hierarchy.
+ * @dev_uevent:	Called when a device is added, removed from this class, or a
+ *		few other things that generate uevents to add the environment
+ *		variables.
+ * @devnode:	Callback to provide the devtmpfs.
+ * @class_release: Called to release this class.
+ * @dev_release: Called to release the device.
+ * @suspend:	Used to put the device to sleep mode, usually to a low power
+ *		state.
+ * @resume:	Used to bring the device from the sleep mode.
+ * @ns_type:	Callbacks so sysfs can detemine namespaces.
+ * @namespace:	Namespace of the device belongs to this class.
+ * @pm:		The default device power management operations of this class.
+ * @p:		The private data of the driver core, no one other than the
+ *		driver core can touch this.
+ *
+ * A class is a higher-level view of a device that abstracts out low-level
+ * implementation details. Drivers may see a SCSI disk or an ATA disk, but,
+ * at the class level, they are all simply disks. Classes allow user space
+ * to work with devices based on what they do, rather than how they are
+ * connected or how they work.
  */
 struct class {
 	const char		*name;
@@ -401,6 +490,65 @@ struct device_dma_parameters {
 	unsigned long segment_boundary_mask;
 };
 
+/**
+ * struct device - The basic device structure
+ * @parent:	The device's "parent" device, the device to which it is attached.
+ * 		In most cases, a parent device is some sort of bus or host
+ * 		controller. If parent is NULL, the device, is a top-level device,
+ * 		which is not usually what you want.
+ * @p:		Holds the private data of the driver core portions of the device.
+ * 		See the comment of the struct device_private for detail.
+ * @kobj:	A top-level, abstract class from which other classes are derived.
+ * @init_name:	Initial name of the device.
+ * @type:	The type of device.
+ * 		This identifies the device type and carries type-specific
+ * 		information.
+ * @mutex:	Mutex to synchronize calls to its driver.
+ * @bus:	Type of bus device is on.
+ * @driver:	Which driver has allocated this
+ * @platform_data: Platform data specific to the device.
+ * 		Example: For devices on custom boards, as typical of embedded
+ * 		and SOC based hardware, Linux often uses platform_data to point
+ * 		to board-specific structures describing devices and how they
+ * 		are wired.  That can include what ports are available, chip
+ * 		variants, which GPIO pins act in what additional roles, and so
+ * 		on.  This shrinks the "Board Support Packages" (BSPs) and
+ * 		minimizes board-specific #ifdefs in drivers.
+ * @power:	For device power management.
+ * 		See Documentation/power/devices.txt for details.
+ * @pwr_domain:	Provide callbacks that are executed during system suspend,
+ * 		hibernation, system resume and during runtime PM transitions
+ * 		along with subsystem-level and driver-level callbacks.
+ * @numa_node:	NUMA node this device is close to.
+ * @dma_mask:	Dma mask (if dma'ble device).
+ * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all
+ * 		hardware supports 64-bit addresses for consistent allocations
+ * 		such descriptors.
+ * @dma_parms:	A low level driver may set these to teach IOMMU code about
+ * 		segment limitations.
+ * @dma_pools:	Dma pools (if dma'ble device).
+ * @dma_mem:	Internal for coherent mem override.
+ * @archdata:	For arch-specific additions.
+ * @of_node:	Associated device tree node.
+ * @of_match:	Matching of_device_id from driver.
+ * @devt:	For creating the sysfs "dev".
+ * @devres_lock: Spinlock to protect the resource of the device.
+ * @devres_head: The resources list of the device.
+ * @knode_class: The node used to add the device to the class list.
+ * @class:	The class of the device.
+ * @groups:	Optional attribute groups.
+ * @release:	Callback to free the device after all references have
+ * 		gone away. This should be set by the allocator of the
+ * 		device (i.e. the bus driver that discovered the device).
+ *
+ * At the lowest level, every device in a Linux system is represented by an
+ * instance of struct device. The device structure contains the information
+ * that the device model core needs to model the system. Most subsystems,
+ * however, track additional information about the devices they host. As a
+ * result, it is rare for devices to be represented by bare device structures;
+ * instead, that structure, like kobject structures, is usually embedded within
+ * a higher-level representation of the device.
+ */
 struct device {
 	struct device		*parent;
 
@@ -611,7 +759,7 @@ extern int (*platform_notify)(struct device *dev);
 extern int (*platform_notify_remove)(struct device *dev);
 
 
-/**
+/*
  * get_device - atomically increment the reference count for the device.
  *
  */
-- 
cgit v1.2.3-71-gd317


From 3df004532582d0cc721da0df28311bcedd639724 Mon Sep 17 00:00:00 2001
From: Anatolij Gustschin <agust@denx.de>
Date: Thu, 5 May 2011 12:11:21 +0200
Subject: usb: fix building musb drivers

Commit 3dacdf11 "usb: factor out state_string() on otg drivers"
broke building musb drivers since there is already another
otg_state_string() function in musb drivers, but with different
prototype. Fix musb drivers to use common otg_state_string(), too.

Also provide a nop for otg_state_string() if CONFIG_USB_OTG_UTILS
is not defined.

Signed-off-by: Anatolij Gustschin <agust@denx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/musb/am35x.c        | 11 ++++---
 drivers/usb/musb/blackfin.c     |  7 +++--
 drivers/usb/musb/da8xx.c        | 11 ++++---
 drivers/usb/musb/davinci.c      |  5 ++--
 drivers/usb/musb/musb_core.c    | 65 ++++++++++++++++-------------------------
 drivers/usb/musb/musb_debug.h   |  2 --
 drivers/usb/musb/musb_gadget.c  |  9 +++---
 drivers/usb/musb/musb_host.c    |  2 +-
 drivers/usb/musb/musb_virthub.c |  5 ++--
 drivers/usb/musb/omap2430.c     |  7 +++--
 drivers/usb/musb/tusb6010.c     | 16 +++++-----
 include/linux/usb/otg.h         |  7 ++++-
 12 files changed, 74 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/musb/am35x.c b/drivers/usb/musb/am35x.c
index d5a3da37c90c..9e6209f87d3b 100644
--- a/drivers/usb/musb/am35x.c
+++ b/drivers/usb/musb/am35x.c
@@ -151,7 +151,8 @@ static void otg_timer(unsigned long _musb)
 	 * status change events (from the transceiver) otherwise.
 	 */
 	devctl = musb_readb(mregs, MUSB_DEVCTL);
-	DBG(7, "Poll devctl %02x (%s)\n", devctl, otg_state_string(musb));
+	DBG(7, "Poll devctl %02x (%s)\n", devctl,
+		otg_state_string(musb->xceiv->state));
 
 	spin_lock_irqsave(&musb->lock, flags);
 	switch (musb->xceiv->state) {
@@ -202,7 +203,8 @@ static void am35x_musb_try_idle(struct musb *musb, unsigned long timeout)
 	/* Never idle if active, or when VBUS timeout is not set as host */
 	if (musb->is_active || (musb->a_wait_bcon == 0 &&
 				musb->xceiv->state == OTG_STATE_A_WAIT_BCON)) {
-		DBG(4, "%s active, deleting timer\n", otg_state_string(musb));
+		DBG(4, "%s active, deleting timer\n",
+			otg_state_string(musb->xceiv->state));
 		del_timer(&otg_workaround);
 		last_timer = jiffies;
 		return;
@@ -215,7 +217,8 @@ static void am35x_musb_try_idle(struct musb *musb, unsigned long timeout)
 	last_timer = timeout;
 
 	DBG(4, "%s inactive, starting idle timer for %u ms\n",
-	    otg_state_string(musb), jiffies_to_msecs(timeout - jiffies));
+		otg_state_string(musb->xceiv->state),
+		jiffies_to_msecs(timeout - jiffies));
 	mod_timer(&otg_workaround, timeout);
 }
 
@@ -304,7 +307,7 @@ static irqreturn_t am35x_musb_interrupt(int irq, void *hci)
 		/* NOTE: this must complete power-on within 100 ms. */
 		DBG(2, "VBUS %s (%s)%s, devctl %02x\n",
 				drvvbus ? "on" : "off",
-				otg_state_string(musb),
+				otg_state_string(musb->xceiv->state),
 				err ? " ERROR" : "",
 				devctl);
 		ret = IRQ_HANDLED;
diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c
index 8e2a1ff8a35a..e141d89656e1 100644
--- a/drivers/usb/musb/blackfin.c
+++ b/drivers/usb/musb/blackfin.c
@@ -279,12 +279,13 @@ static void musb_conn_timer_handler(unsigned long _musb)
 		}
 		break;
 	default:
-		DBG(1, "%s state not handled\n", otg_state_string(musb));
+		DBG(1, "%s state not handled\n",
+			otg_state_string(musb->xceiv->state));
 		break;
 	}
 	spin_unlock_irqrestore(&musb->lock, flags);
 
-	DBG(4, "state is %s\n", otg_state_string(musb));
+	DBG(4, "state is %s\n", otg_state_string(musb->xceiv->state));
 }
 
 static void bfin_musb_enable(struct musb *musb)
@@ -308,7 +309,7 @@ static void bfin_musb_set_vbus(struct musb *musb, int is_on)
 
 	DBG(1, "VBUS %s, devctl %02x "
 		/* otg %3x conf %08x prcm %08x */ "\n",
-		otg_state_string(musb),
+		otg_state_string(musb->xceiv->state),
 		musb_readb(musb->mregs, MUSB_DEVCTL));
 }
 
diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c
index 69a0da3c8f09..0d8984993a38 100644
--- a/drivers/usb/musb/da8xx.c
+++ b/drivers/usb/musb/da8xx.c
@@ -199,7 +199,8 @@ static void otg_timer(unsigned long _musb)
 	 * status change events (from the transceiver) otherwise.
 	 */
 	devctl = musb_readb(mregs, MUSB_DEVCTL);
-	DBG(7, "Poll devctl %02x (%s)\n", devctl, otg_state_string(musb));
+	DBG(7, "Poll devctl %02x (%s)\n", devctl,
+		otg_state_string(musb->xceiv->state));
 
 	spin_lock_irqsave(&musb->lock, flags);
 	switch (musb->xceiv->state) {
@@ -273,7 +274,8 @@ static void da8xx_musb_try_idle(struct musb *musb, unsigned long timeout)
 	/* Never idle if active, or when VBUS timeout is not set as host */
 	if (musb->is_active || (musb->a_wait_bcon == 0 &&
 				musb->xceiv->state == OTG_STATE_A_WAIT_BCON)) {
-		DBG(4, "%s active, deleting timer\n", otg_state_string(musb));
+		DBG(4, "%s active, deleting timer\n",
+			otg_state_string(musb->xceiv->state));
 		del_timer(&otg_workaround);
 		last_timer = jiffies;
 		return;
@@ -286,7 +288,8 @@ static void da8xx_musb_try_idle(struct musb *musb, unsigned long timeout)
 	last_timer = timeout;
 
 	DBG(4, "%s inactive, starting idle timer for %u ms\n",
-	    otg_state_string(musb), jiffies_to_msecs(timeout - jiffies));
+		otg_state_string(musb->xceiv->state),
+		jiffies_to_msecs(timeout - jiffies));
 	mod_timer(&otg_workaround, timeout);
 }
 
@@ -365,7 +368,7 @@ static irqreturn_t da8xx_musb_interrupt(int irq, void *hci)
 
 		DBG(2, "VBUS %s (%s)%s, devctl %02x\n",
 				drvvbus ? "on" : "off",
-				otg_state_string(musb),
+				otg_state_string(musb->xceiv->state),
 				err ? " ERROR" : "",
 				devctl);
 		ret = IRQ_HANDLED;
diff --git a/drivers/usb/musb/davinci.c b/drivers/usb/musb/davinci.c
index e6de097fb7e8..3661b81a9571 100644
--- a/drivers/usb/musb/davinci.c
+++ b/drivers/usb/musb/davinci.c
@@ -220,7 +220,8 @@ static void otg_timer(unsigned long _musb)
 	* status change events (from the transceiver) otherwise.
 	 */
 	devctl = musb_readb(mregs, MUSB_DEVCTL);
-	DBG(7, "poll devctl %02x (%s)\n", devctl, otg_state_string(musb));
+	DBG(7, "poll devctl %02x (%s)\n", devctl,
+		otg_state_string(musb->xceiv->state));
 
 	spin_lock_irqsave(&musb->lock, flags);
 	switch (musb->xceiv->state) {
@@ -356,7 +357,7 @@ static irqreturn_t davinci_musb_interrupt(int irq, void *__hci)
 		davinci_musb_source_power(musb, drvvbus, 0);
 		DBG(2, "VBUS %s (%s)%s, devctl %02x\n",
 				drvvbus ? "on" : "off",
-				otg_state_string(musb),
+				otg_state_string(musb->xceiv->state),
 				err ? " ERROR" : "",
 				devctl);
 		retval = IRQ_HANDLED;
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index f10ff00ca09e..9a280872c2b4 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -333,26 +333,6 @@ void musb_load_testpacket(struct musb *musb)
 
 /*-------------------------------------------------------------------------*/
 
-const char *otg_state_string(struct musb *musb)
-{
-	switch (musb->xceiv->state) {
-	case OTG_STATE_A_IDLE:		return "a_idle";
-	case OTG_STATE_A_WAIT_VRISE:	return "a_wait_vrise";
-	case OTG_STATE_A_WAIT_BCON:	return "a_wait_bcon";
-	case OTG_STATE_A_HOST:		return "a_host";
-	case OTG_STATE_A_SUSPEND:	return "a_suspend";
-	case OTG_STATE_A_PERIPHERAL:	return "a_peripheral";
-	case OTG_STATE_A_WAIT_VFALL:	return "a_wait_vfall";
-	case OTG_STATE_A_VBUS_ERR:	return "a_vbus_err";
-	case OTG_STATE_B_IDLE:		return "b_idle";
-	case OTG_STATE_B_SRP_INIT:	return "b_srp_init";
-	case OTG_STATE_B_PERIPHERAL:	return "b_peripheral";
-	case OTG_STATE_B_WAIT_ACON:	return "b_wait_acon";
-	case OTG_STATE_B_HOST:		return "b_host";
-	default:			return "UNDEFINED";
-	}
-}
-
 #ifdef	CONFIG_USB_MUSB_OTG
 
 /*
@@ -373,12 +353,14 @@ void musb_otg_timer_func(unsigned long data)
 		break;
 	case OTG_STATE_A_SUSPEND:
 	case OTG_STATE_A_WAIT_BCON:
-		DBG(1, "HNP: %s timeout\n", otg_state_string(musb));
+		DBG(1, "HNP: %s timeout\n",
+			otg_state_string(musb->xceiv->state));
 		musb_platform_set_vbus(musb, 0);
 		musb->xceiv->state = OTG_STATE_A_WAIT_VFALL;
 		break;
 	default:
-		DBG(1, "HNP: Unhandled mode %s\n", otg_state_string(musb));
+		DBG(1, "HNP: Unhandled mode %s\n",
+			otg_state_string(musb->xceiv->state));
 	}
 	musb->ignore_disconnect = 0;
 	spin_unlock_irqrestore(&musb->lock, flags);
@@ -393,12 +375,13 @@ void musb_hnp_stop(struct musb *musb)
 	void __iomem	*mbase = musb->mregs;
 	u8	reg;
 
-	DBG(1, "HNP: stop from %s\n", otg_state_string(musb));
+	DBG(1, "HNP: stop from %s\n", otg_state_string(musb->xceiv->state));
 
 	switch (musb->xceiv->state) {
 	case OTG_STATE_A_PERIPHERAL:
 		musb_g_disconnect(musb);
-		DBG(1, "HNP: back to %s\n", otg_state_string(musb));
+		DBG(1, "HNP: back to %s\n",
+			otg_state_string(musb->xceiv->state));
 		break;
 	case OTG_STATE_B_HOST:
 		DBG(1, "HNP: Disabling HR\n");
@@ -412,7 +395,7 @@ void musb_hnp_stop(struct musb *musb)
 		break;
 	default:
 		DBG(1, "HNP: Stopping in unknown state %s\n",
-			otg_state_string(musb));
+			otg_state_string(musb->xceiv->state));
 	}
 
 	/*
@@ -451,7 +434,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 	 */
 	if (int_usb & MUSB_INTR_RESUME) {
 		handled = IRQ_HANDLED;
-		DBG(3, "RESUME (%s)\n", otg_state_string(musb));
+		DBG(3, "RESUME (%s)\n", otg_state_string(musb->xceiv->state));
 
 		if (devctl & MUSB_DEVCTL_HM) {
 #ifdef CONFIG_USB_MUSB_HDRC_HCD
@@ -492,7 +475,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 			default:
 				WARNING("bogus %s RESUME (%s)\n",
 					"host",
-					otg_state_string(musb));
+					otg_state_string(musb->xceiv->state));
 			}
 #endif
 		} else {
@@ -526,7 +509,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 			default:
 				WARNING("bogus %s RESUME (%s)\n",
 					"peripheral",
-					otg_state_string(musb));
+					otg_state_string(musb->xceiv->state));
 			}
 		}
 	}
@@ -542,7 +525,8 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 			return IRQ_HANDLED;
 		}
 
-		DBG(1, "SESSION_REQUEST (%s)\n", otg_state_string(musb));
+		DBG(1, "SESSION_REQUEST (%s)\n",
+			otg_state_string(musb->xceiv->state));
 
 		/* IRQ arrives from ID pin sense or (later, if VBUS power
 		 * is removed) SRP.  responses are time critical:
@@ -607,7 +591,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 		}
 
 		DBG(1, "VBUS_ERROR in %s (%02x, %s), retry #%d, port1 %08x\n",
-				otg_state_string(musb),
+				otg_state_string(musb->xceiv->state),
 				devctl,
 				({ char *s;
 				switch (devctl & MUSB_DEVCTL_VBUS) {
@@ -633,7 +617,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 #endif
 	if (int_usb & MUSB_INTR_SUSPEND) {
 		DBG(1, "SUSPEND (%s) devctl %02x power %02x\n",
-				otg_state_string(musb), devctl, power);
+			otg_state_string(musb->xceiv->state), devctl, power);
 		handled = IRQ_HANDLED;
 
 		switch (musb->xceiv->state) {
@@ -758,13 +742,13 @@ b_host:
 			usb_hcd_resume_root_hub(hcd);
 
 		DBG(1, "CONNECT (%s) devctl %02x\n",
-				otg_state_string(musb), devctl);
+				otg_state_string(musb->xceiv->state), devctl);
 	}
 #endif	/* CONFIG_USB_MUSB_HDRC_HCD */
 
 	if ((int_usb & MUSB_INTR_DISCONNECT) && !musb->ignore_disconnect) {
 		DBG(1, "DISCONNECT (%s) as %s, devctl %02x\n",
-				otg_state_string(musb),
+				otg_state_string(musb->xceiv->state),
 				MUSB_MODE(musb), devctl);
 		handled = IRQ_HANDLED;
 
@@ -807,7 +791,7 @@ b_host:
 #endif	/* GADGET */
 		default:
 			WARNING("unhandled DISCONNECT transition (%s)\n",
-				otg_state_string(musb));
+				otg_state_string(musb->xceiv->state));
 			break;
 		}
 	}
@@ -832,7 +816,8 @@ b_host:
 				musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
 			}
 		} else if (is_peripheral_capable()) {
-			DBG(1, "BUS RESET as %s\n", otg_state_string(musb));
+			DBG(1, "BUS RESET as %s\n",
+				otg_state_string(musb->xceiv->state));
 			switch (musb->xceiv->state) {
 #ifdef CONFIG_USB_OTG
 			case OTG_STATE_A_SUSPEND:
@@ -846,8 +831,8 @@ b_host:
 			case OTG_STATE_A_WAIT_BCON:	/* OPT TD.4.7-900ms */
 				/* never use invalid T(a_wait_bcon) */
 				DBG(1, "HNP: in %s, %d msec timeout\n",
-						otg_state_string(musb),
-						TA_WAIT_BCON(musb));
+					otg_state_string(musb->xceiv->state),
+					TA_WAIT_BCON(musb));
 				mod_timer(&musb->otg_timer, jiffies
 					+ msecs_to_jiffies(TA_WAIT_BCON(musb)));
 				break;
@@ -858,7 +843,7 @@ b_host:
 				break;
 			case OTG_STATE_B_WAIT_ACON:
 				DBG(1, "HNP: RESET (%s), to b_peripheral\n",
-					otg_state_string(musb));
+					otg_state_string(musb->xceiv->state));
 				musb->xceiv->state = OTG_STATE_B_PERIPHERAL;
 				musb_g_reset(musb);
 				break;
@@ -871,7 +856,7 @@ b_host:
 				break;
 			default:
 				DBG(1, "Unhandled BUS RESET as %s\n",
-					otg_state_string(musb));
+					otg_state_string(musb->xceiv->state));
 			}
 		}
 	}
@@ -1713,7 +1698,7 @@ musb_mode_show(struct device *dev, struct device_attribute *attr, char *buf)
 	int ret = -EINVAL;
 
 	spin_lock_irqsave(&musb->lock, flags);
-	ret = sprintf(buf, "%s\n", otg_state_string(musb));
+	ret = sprintf(buf, "%s\n", otg_state_string(musb->xceiv->state));
 	spin_unlock_irqrestore(&musb->lock, flags);
 
 	return ret;
diff --git a/drivers/usb/musb/musb_debug.h b/drivers/usb/musb/musb_debug.h
index 94f6973cf8f7..f958a49a0124 100644
--- a/drivers/usb/musb/musb_debug.h
+++ b/drivers/usb/musb/musb_debug.h
@@ -54,8 +54,6 @@ static inline int _dbg_level(unsigned l)
 	return musb_debug >= l;
 }
 
-extern const char *otg_state_string(struct musb *);
-
 #ifdef CONFIG_DEBUG_FS
 extern int musb_init_debugfs(struct musb *musb);
 extern void musb_exit_debugfs(struct musb *musb);
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 6dfbf9ffd7a6..3d799195c8b4 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1556,7 +1556,8 @@ static int musb_gadget_wakeup(struct usb_gadget *gadget)
 		status = 0;
 		goto done;
 	default:
-		DBG(2, "Unhandled wake: %s\n", otg_state_string(musb));
+		DBG(2, "Unhandled wake: %s\n",
+			otg_state_string(musb->xceiv->state));
 		goto done;
 	}
 
@@ -2039,7 +2040,7 @@ void musb_g_resume(struct musb *musb)
 		break;
 	default:
 		WARNING("unhandled RESUME transition (%s)\n",
-				otg_state_string(musb));
+				otg_state_string(musb->xceiv->state));
 	}
 }
 
@@ -2069,7 +2070,7 @@ void musb_g_suspend(struct musb *musb)
 		 * A_PERIPHERAL may need care too
 		 */
 		WARNING("unhandled SUSPEND transition (%s)\n",
-				otg_state_string(musb));
+				otg_state_string(musb->xceiv->state));
 	}
 }
 
@@ -2104,7 +2105,7 @@ void musb_g_disconnect(struct musb *musb)
 	default:
 #ifdef	CONFIG_USB_MUSB_OTG
 		DBG(2, "Unhandled disconnect %s, setting a_idle\n",
-			otg_state_string(musb));
+			otg_state_string(musb->xceiv->state));
 		musb->xceiv->state = OTG_STATE_A_IDLE;
 		MUSB_HST_MODE(musb);
 		break;
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 5eef4a8847db..07b106c7de82 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -2304,7 +2304,7 @@ static int musb_bus_suspend(struct usb_hcd *hcd)
 
 	if (musb->is_active) {
 		WARNING("trying to suspend as %s while active\n",
-				otg_state_string(musb));
+				otg_state_string(musb->xceiv->state));
 		return -EBUSY;
 	} else
 		return 0;
diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c
index 489104a5ae14..bb1247d5fd04 100644
--- a/drivers/usb/musb/musb_virthub.c
+++ b/drivers/usb/musb/musb_virthub.c
@@ -98,7 +98,7 @@ static void musb_port_suspend(struct musb *musb, bool do_suspend)
 #endif
 		default:
 			DBG(1, "bogus rh suspend? %s\n",
-				otg_state_string(musb));
+				otg_state_string(musb->xceiv->state));
 		}
 	} else if (power & MUSB_POWER_SUSPENDM) {
 		power &= ~MUSB_POWER_SUSPENDM;
@@ -208,7 +208,8 @@ void musb_root_disconnect(struct musb *musb)
 		musb->xceiv->state = OTG_STATE_B_IDLE;
 		break;
 	default:
-		DBG(1, "host disconnect (%s)\n", otg_state_string(musb));
+		DBG(1, "host disconnect (%s)\n",
+			otg_state_string(musb->xceiv->state));
 	}
 }
 
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index 57a27fa954b4..d51b15adc0b0 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -114,7 +114,8 @@ static void omap2430_musb_try_idle(struct musb *musb, unsigned long timeout)
 	/* Never idle if active, or when VBUS timeout is not set as host */
 	if (musb->is_active || ((musb->a_wait_bcon == 0)
 			&& (musb->xceiv->state == OTG_STATE_A_WAIT_BCON))) {
-		DBG(4, "%s active, deleting timer\n", otg_state_string(musb));
+		DBG(4, "%s active, deleting timer\n",
+			otg_state_string(musb->xceiv->state));
 		del_timer(&musb_idle_timer);
 		last_timer = jiffies;
 		return;
@@ -131,7 +132,7 @@ static void omap2430_musb_try_idle(struct musb *musb, unsigned long timeout)
 	last_timer = timeout;
 
 	DBG(4, "%s inactive, for idle timer for %lu ms\n",
-		otg_state_string(musb),
+		otg_state_string(musb->xceiv->state),
 		(unsigned long)jiffies_to_msecs(timeout - jiffies));
 	mod_timer(&musb_idle_timer, timeout);
 }
@@ -195,7 +196,7 @@ static void omap2430_musb_set_vbus(struct musb *musb, int is_on)
 
 	DBG(1, "VBUS %s, devctl %02x "
 		/* otg %3x conf %08x prcm %08x */ "\n",
-		otg_state_string(musb),
+		otg_state_string(musb->xceiv->state),
 		musb_readb(musb->mregs, MUSB_DEVCTL));
 }
 
diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c
index c47aac4a1f98..221feaaded72 100644
--- a/drivers/usb/musb/tusb6010.c
+++ b/drivers/usb/musb/tusb6010.c
@@ -422,7 +422,7 @@ static void musb_do_idle(unsigned long _musb)
 			&& (musb->idle_timeout == 0
 				|| time_after(jiffies, musb->idle_timeout))) {
 			DBG(4, "Nothing connected %s, turning off VBUS\n",
-					otg_state_string(musb));
+					otg_state_string(musb->xceiv->state));
 		}
 		/* FALLTHROUGH */
 	case OTG_STATE_A_IDLE:
@@ -481,7 +481,8 @@ static void tusb_musb_try_idle(struct musb *musb, unsigned long timeout)
 	/* Never idle if active, or when VBUS timeout is not set as host */
 	if (musb->is_active || ((musb->a_wait_bcon == 0)
 			&& (musb->xceiv->state == OTG_STATE_A_WAIT_BCON))) {
-		DBG(4, "%s active, deleting timer\n", otg_state_string(musb));
+		DBG(4, "%s active, deleting timer\n",
+			otg_state_string(musb->xceiv->state));
 		del_timer(&musb_idle_timer);
 		last_timer = jiffies;
 		return;
@@ -498,7 +499,7 @@ static void tusb_musb_try_idle(struct musb *musb, unsigned long timeout)
 	last_timer = timeout;
 
 	DBG(4, "%s inactive, for idle timer for %lu ms\n",
-		otg_state_string(musb),
+		otg_state_string(musb->xceiv->state),
 		(unsigned long)jiffies_to_msecs(timeout - jiffies));
 	mod_timer(&musb_idle_timer, timeout);
 }
@@ -573,7 +574,7 @@ static void tusb_musb_set_vbus(struct musb *musb, int is_on)
 	musb_writeb(musb->mregs, MUSB_DEVCTL, devctl);
 
 	DBG(1, "VBUS %s, devctl %02x otg %3x conf %08x prcm %08x\n",
-		otg_state_string(musb),
+		otg_state_string(musb->xceiv->state),
 		musb_readb(musb->mregs, MUSB_DEVCTL),
 		musb_readl(tbase, TUSB_DEV_OTG_STAT),
 		conf, prcm);
@@ -702,13 +703,13 @@ tusb_otg_ints(struct musb *musb, u32 int_src, void __iomem *tbase)
 				musb->is_active = 0;
 			}
 			DBG(2, "vbus change, %s, otg %03x\n",
-				otg_state_string(musb), otg_stat);
+				otg_state_string(musb->xceiv->state), otg_stat);
 			idle_timeout = jiffies + (1 * HZ);
 			schedule_work(&musb->irq_work);
 
 		} else /* A-dev state machine */ {
 			DBG(2, "vbus change, %s, otg %03x\n",
-				otg_state_string(musb), otg_stat);
+				otg_state_string(musb->xceiv->state), otg_stat);
 
 			switch (musb->xceiv->state) {
 			case OTG_STATE_A_IDLE:
@@ -756,7 +757,8 @@ tusb_otg_ints(struct musb *musb, u32 int_src, void __iomem *tbase)
 	if (int_src & TUSB_INT_SRC_OTG_TIMEOUT) {
 		u8	devctl;
 
-		DBG(4, "%s timer, %03x\n", otg_state_string(musb), otg_stat);
+		DBG(4, "%s timer, %03x\n",
+			otg_state_string(musb->xceiv->state), otg_stat);
 
 		switch (musb->xceiv->state) {
 		case OTG_STATE_A_WAIT_VRISE:
diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
index bc84858b3a4d..d87f44f5b04e 100644
--- a/include/linux/usb/otg.h
+++ b/include/linux/usb/otg.h
@@ -168,6 +168,7 @@ otg_shutdown(struct otg_transceiver *otg)
 #ifdef CONFIG_USB_OTG_UTILS
 extern struct otg_transceiver *otg_get_transceiver(void);
 extern void otg_put_transceiver(struct otg_transceiver *);
+extern const char *otg_state_string(enum usb_otg_state state);
 #else
 static inline struct otg_transceiver *otg_get_transceiver(void)
 {
@@ -177,6 +178,11 @@ static inline struct otg_transceiver *otg_get_transceiver(void)
 static inline void otg_put_transceiver(struct otg_transceiver *x)
 {
 }
+
+static inline const char *otg_state_string(enum usb_otg_state state)
+{
+	return NULL;
+}
 #endif
 
 /* Context: can sleep */
@@ -246,6 +252,5 @@ otg_unregister_notifier(struct otg_transceiver *otg, struct notifier_block *nb)
 
 /* for OTG controller drivers (and maybe other stuff) */
 extern int usb_bus_start_enum(struct usb_bus *bus, unsigned port_num);
-extern const char *otg_state_string(enum usb_otg_state state);
 
 #endif /* __LINUX_USB_OTG_H */
-- 
cgit v1.2.3-71-gd317


From 0f73cac8e41723d600c91a0f5b481dc3202f4f82 Mon Sep 17 00:00:00 2001
From: Anji jonnala <anjir@codeaurora.org>
Date: Wed, 4 May 2011 10:19:46 +0530
Subject: USB: OTG: msm: vote for dayatona fabric clock

HSUSB core clock is derived from daytona fabric clock and for
HSUSB operational require minimum core clock at 55MHz. Since, HSUSB
cannot tolerate daytona fabric clock change in the middle of HSUSB
operational, vote for maximum Daytona fabric clock
while usb is operational

Signed-off-by: Anji jonnala <anjir@codeaurora.org>
Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/otg/msm_otg.c     | 36 +++++++++++++++++++++++++++++++++++-
 include/linux/usb/msm_hsusb.h |  7 +++++--
 2 files changed, 40 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/otg/msm_otg.c b/drivers/usb/otg/msm_otg.c
index f58b7dab75aa..7792fef0be5e 100644
--- a/drivers/usb/otg/msm_otg.c
+++ b/drivers/usb/otg/msm_otg.c
@@ -324,6 +324,9 @@ static int msm_otg_suspend(struct msm_otg *motg)
 	if (motg->core_clk)
 		clk_disable(motg->core_clk);
 
+	if (!IS_ERR(motg->pclk_src))
+		clk_disable(motg->pclk_src);
+
 	if (device_may_wakeup(otg->dev))
 		enable_irq_wake(motg->irq);
 	if (bus)
@@ -347,6 +350,9 @@ static int msm_otg_resume(struct msm_otg *motg)
 	if (!atomic_read(&motg->in_lpm))
 		return 0;
 
+	if (!IS_ERR(motg->pclk_src))
+		clk_enable(motg->pclk_src);
+
 	clk_enable(motg->pclk);
 	clk_enable(motg->clk);
 	if (motg->core_clk)
@@ -862,12 +868,31 @@ static int __init msm_otg_probe(struct platform_device *pdev)
 		ret = PTR_ERR(motg->clk);
 		goto put_phy_reset_clk;
 	}
+	clk_set_rate(motg->clk, 60000000);
+
+	/*
+	 * If USB Core is running its protocol engine based on CORE CLK,
+	 * CORE CLK  must be running at >55Mhz for correct HSUSB
+	 * operation and USB core cannot tolerate frequency changes on
+	 * CORE CLK. For such USB cores, vote for maximum clk frequency
+	 * on pclk source
+	 */
+	 if (motg->pdata->pclk_src_name) {
+		motg->pclk_src = clk_get(&pdev->dev,
+			motg->pdata->pclk_src_name);
+		if (IS_ERR(motg->pclk_src))
+			goto put_clk;
+		clk_set_rate(motg->pclk_src, INT_MAX);
+		clk_enable(motg->pclk_src);
+	} else
+		motg->pclk_src = ERR_PTR(-ENOENT);
+
 
 	motg->pclk = clk_get(&pdev->dev, "usb_hs_pclk");
 	if (IS_ERR(motg->pclk)) {
 		dev_err(&pdev->dev, "failed to get usb_hs_pclk\n");
 		ret = PTR_ERR(motg->pclk);
-		goto put_clk;
+		goto put_pclk_src;
 	}
 
 	/*
@@ -955,6 +980,11 @@ put_core_clk:
 	if (motg->core_clk)
 		clk_put(motg->core_clk);
 	clk_put(motg->pclk);
+put_pclk_src:
+	if (!IS_ERR(motg->pclk_src)) {
+		clk_disable(motg->pclk_src);
+		clk_put(motg->pclk_src);
+	}
 put_clk:
 	clk_put(motg->clk);
 put_phy_reset_clk:
@@ -1004,6 +1034,10 @@ static int __devexit msm_otg_remove(struct platform_device *pdev)
 	clk_disable(motg->clk);
 	if (motg->core_clk)
 		clk_disable(motg->core_clk);
+	if (!IS_ERR(motg->pclk_src)) {
+		clk_disable(motg->pclk_src);
+		clk_put(motg->pclk_src);
+	}
 
 	iounmap(motg->regs);
 	pm_runtime_set_suspended(&pdev->dev);
diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h
index 3657403eac18..31ef1853f93c 100644
--- a/include/linux/usb/msm_hsusb.h
+++ b/include/linux/usb/msm_hsusb.h
@@ -64,7 +64,8 @@ enum otg_control_type {
  * @otg_control: OTG switch controlled by user/Id pin
  * @default_mode: Default operational mode. Applicable only if
  *              OTG switch is controller by user.
- *
+ * @pclk_src_name: pclk is derived from ebi1_usb_clk in case of 7x27 and 8k
+ *              dfab_usb_hs_clk in case of 8660 and 8960.
  */
 struct msm_otg_platform_data {
 	int *phy_init_seq;
@@ -74,6 +75,7 @@ struct msm_otg_platform_data {
 	enum otg_control_type otg_control;
 	enum usb_mode_type default_mode;
 	void (*setup_gpio)(enum usb_otg_state state);
+	char *pclk_src_name;
 };
 
 /**
@@ -83,6 +85,7 @@ struct msm_otg_platform_data {
  * @irq: IRQ number assigned for HSUSB controller.
  * @clk: clock struct of usb_hs_clk.
  * @pclk: clock struct of usb_hs_pclk.
+ * @pclk_src: pclk source for voting.
  * @phy_reset_clk: clock struct of usb_phy_clk.
  * @core_clk: clock struct of usb_hs_core_clk.
  * @regs: ioremapped register base address.
@@ -90,7 +93,6 @@ struct msm_otg_platform_data {
  * @sm_work: OTG state machine work.
  * @in_lpm: indicates low power mode (LPM) state.
  * @async_int: Async interrupt arrived.
- *
  */
 struct msm_otg {
 	struct otg_transceiver otg;
@@ -98,6 +100,7 @@ struct msm_otg {
 	int irq;
 	struct clk *clk;
 	struct clk *pclk;
+	struct clk *pclk_src;
 	struct clk *phy_reset_clk;
 	struct clk *core_clk;
 	void __iomem *regs;
-- 
cgit v1.2.3-71-gd317


From d860852e087eed7eadbea64f1a8db9a231c5e9b3 Mon Sep 17 00:00:00 2001
From: Pavankumar Kondeti <pkondeti@codeaurora.org>
Date: Wed, 4 May 2011 10:19:47 +0530
Subject: USB: OTG: msm: Implement charger detection

Implement good battery algorithm defined in the battery charging V1.2 spec
for detecting different charging ports.  USB hardware is put into low power
mode when connected to a dedicated charging port.  vbus_draw and set_power
methods are implemented for determining the allowed current from Host in
different states (un-configured/suspend/configured).

The charger block is implemented using vendor specific registers and the
PHY used in MSM8960(28nm PHY) different from older targets like MSM8x60
and MSM7x30(45nm PHY).  The PHY vendor and product id registers are not
implemented in the above chipsets.  Hence PHY type is passed via platform
data.

Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/ci13xxx_udc.c |  10 ++
 drivers/usb/otg/msm_otg.c        | 380 ++++++++++++++++++++++++++++++++++++++-
 include/linux/usb/msm_hsusb.h    |  72 +++++++-
 3 files changed, 457 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/ci13xxx_udc.c b/drivers/usb/gadget/ci13xxx_udc.c
index 68123ee01392..baaf87ed7685 100644
--- a/drivers/usb/gadget/ci13xxx_udc.c
+++ b/drivers/usb/gadget/ci13xxx_udc.c
@@ -2506,6 +2506,15 @@ out:
 	return ret;
 }
 
+static int ci13xxx_vbus_draw(struct usb_gadget *_gadget, unsigned mA)
+{
+	struct ci13xxx *udc = container_of(_gadget, struct ci13xxx, gadget);
+
+	if (udc->transceiver)
+		return otg_set_power(udc->transceiver, mA);
+	return -ENOTSUPP;
+}
+
 /**
  * Device operations part of the API to the USB controller hardware,
  * which don't involve endpoints (or i/o)
@@ -2514,6 +2523,7 @@ out:
 static const struct usb_gadget_ops usb_gadget_ops = {
 	.vbus_session	= ci13xxx_vbus_session,
 	.wakeup		= ci13xxx_wakeup,
+	.vbus_draw	= ci13xxx_vbus_draw,
 };
 
 /**
diff --git a/drivers/usb/otg/msm_otg.c b/drivers/usb/otg/msm_otg.c
index 7792fef0be5e..854b7e3413dd 100644
--- a/drivers/usb/otg/msm_otg.c
+++ b/drivers/usb/otg/msm_otg.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2009-2010, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -409,6 +409,33 @@ skip_phy_resume:
 }
 #endif
 
+static void msm_otg_notify_charger(struct msm_otg *motg, unsigned mA)
+{
+	if (motg->cur_power == mA)
+		return;
+
+	/* TODO: Notify PMIC about available current */
+	dev_info(motg->otg.dev, "Avail curr from USB = %u\n", mA);
+	motg->cur_power = mA;
+}
+
+static int msm_otg_set_power(struct otg_transceiver *otg, unsigned mA)
+{
+	struct msm_otg *motg = container_of(otg, struct msm_otg, otg);
+
+	/*
+	 * Gadget driver uses set_power method to notify about the
+	 * available current based on suspend/configured states.
+	 *
+	 * IDEV_CHG can be drawn irrespective of suspend/un-configured
+	 * states when CDP/ACA is connected.
+	 */
+	if (motg->chg_type == USB_SDP_CHARGER)
+		msm_otg_notify_charger(motg, mA);
+
+	return 0;
+}
+
 static void msm_otg_start_host(struct otg_transceiver *otg, int on)
 {
 	struct msm_otg *motg = container_of(otg, struct msm_otg, otg);
@@ -563,6 +590,306 @@ static int msm_otg_set_peripheral(struct otg_transceiver *otg,
 	return 0;
 }
 
+static bool msm_chg_check_secondary_det(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 chg_det;
+	bool ret = false;
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x34);
+		ret = chg_det & (1 << 4);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x87);
+		ret = chg_det & 1;
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+static void msm_chg_enable_secondary_det(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 chg_det;
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x34);
+		/* Turn off charger block */
+		chg_det |= ~(1 << 1);
+		ulpi_write(otg, chg_det, 0x34);
+		udelay(20);
+		/* control chg block via ULPI */
+		chg_det &= ~(1 << 3);
+		ulpi_write(otg, chg_det, 0x34);
+		/* put it in host mode for enabling D- source */
+		chg_det &= ~(1 << 2);
+		ulpi_write(otg, chg_det, 0x34);
+		/* Turn on chg detect block */
+		chg_det &= ~(1 << 1);
+		ulpi_write(otg, chg_det, 0x34);
+		udelay(20);
+		/* enable chg detection */
+		chg_det &= ~(1 << 0);
+		ulpi_write(otg, chg_det, 0x34);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		/*
+		 * Configure DM as current source, DP as current sink
+		 * and enable battery charging comparators.
+		 */
+		ulpi_write(otg, 0x8, 0x85);
+		ulpi_write(otg, 0x2, 0x85);
+		ulpi_write(otg, 0x1, 0x85);
+		break;
+	default:
+		break;
+	}
+}
+
+static bool msm_chg_check_primary_det(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 chg_det;
+	bool ret = false;
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x34);
+		ret = chg_det & (1 << 4);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x87);
+		ret = chg_det & 1;
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+static void msm_chg_enable_primary_det(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 chg_det;
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x34);
+		/* enable chg detection */
+		chg_det &= ~(1 << 0);
+		ulpi_write(otg, chg_det, 0x34);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		/*
+		 * Configure DP as current source, DM as current sink
+		 * and enable battery charging comparators.
+		 */
+		ulpi_write(otg, 0x2, 0x85);
+		ulpi_write(otg, 0x1, 0x85);
+		break;
+	default:
+		break;
+	}
+}
+
+static bool msm_chg_check_dcd(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 line_state;
+	bool ret = false;
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		line_state = ulpi_read(otg, 0x15);
+		ret = !(line_state & 1);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		line_state = ulpi_read(otg, 0x87);
+		ret = line_state & 2;
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+static void msm_chg_disable_dcd(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 chg_det;
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x34);
+		chg_det &= ~(1 << 5);
+		ulpi_write(otg, chg_det, 0x34);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		ulpi_write(otg, 0x10, 0x86);
+		break;
+	default:
+		break;
+	}
+}
+
+static void msm_chg_enable_dcd(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 chg_det;
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x34);
+		/* Turn on D+ current source */
+		chg_det |= (1 << 5);
+		ulpi_write(otg, chg_det, 0x34);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		/* Data contact detection enable */
+		ulpi_write(otg, 0x10, 0x85);
+		break;
+	default:
+		break;
+	}
+}
+
+static void msm_chg_block_on(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 func_ctrl, chg_det;
+
+	/* put the controller in non-driving mode */
+	func_ctrl = ulpi_read(otg, ULPI_FUNC_CTRL);
+	func_ctrl &= ~ULPI_FUNC_CTRL_OPMODE_MASK;
+	func_ctrl |= ULPI_FUNC_CTRL_OPMODE_NONDRIVING;
+	ulpi_write(otg, func_ctrl, ULPI_FUNC_CTRL);
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x34);
+		/* control chg block via ULPI */
+		chg_det &= ~(1 << 3);
+		ulpi_write(otg, chg_det, 0x34);
+		/* Turn on chg detect block */
+		chg_det &= ~(1 << 1);
+		ulpi_write(otg, chg_det, 0x34);
+		udelay(20);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		/* Clear charger detecting control bits */
+		ulpi_write(otg, 0x3F, 0x86);
+		/* Clear alt interrupt latch and enable bits */
+		ulpi_write(otg, 0x1F, 0x92);
+		ulpi_write(otg, 0x1F, 0x95);
+		udelay(100);
+		break;
+	default:
+		break;
+	}
+}
+
+static void msm_chg_block_off(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 func_ctrl, chg_det;
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x34);
+		/* Turn off charger block */
+		chg_det |= ~(1 << 1);
+		ulpi_write(otg, chg_det, 0x34);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		/* Clear charger detecting control bits */
+		ulpi_write(otg, 0x3F, 0x86);
+		/* Clear alt interrupt latch and enable bits */
+		ulpi_write(otg, 0x1F, 0x92);
+		ulpi_write(otg, 0x1F, 0x95);
+		break;
+	default:
+		break;
+	}
+
+	/* put the controller in normal mode */
+	func_ctrl = ulpi_read(otg, ULPI_FUNC_CTRL);
+	func_ctrl &= ~ULPI_FUNC_CTRL_OPMODE_MASK;
+	func_ctrl |= ULPI_FUNC_CTRL_OPMODE_NORMAL;
+	ulpi_write(otg, func_ctrl, ULPI_FUNC_CTRL);
+}
+
+#define MSM_CHG_DCD_POLL_TIME		(100 * HZ/1000) /* 100 msec */
+#define MSM_CHG_DCD_MAX_RETRIES		6 /* Tdcd_tmout = 6 * 100 msec */
+#define MSM_CHG_PRIMARY_DET_TIME	(40 * HZ/1000) /* TVDPSRC_ON */
+#define MSM_CHG_SECONDARY_DET_TIME	(40 * HZ/1000) /* TVDMSRC_ON */
+static void msm_chg_detect_work(struct work_struct *w)
+{
+	struct msm_otg *motg = container_of(w, struct msm_otg, chg_work.work);
+	struct otg_transceiver *otg = &motg->otg;
+	bool is_dcd, tmout, vout;
+	unsigned long delay;
+
+	dev_dbg(otg->dev, "chg detection work\n");
+	switch (motg->chg_state) {
+	case USB_CHG_STATE_UNDEFINED:
+		pm_runtime_get_sync(otg->dev);
+		msm_chg_block_on(motg);
+		msm_chg_enable_dcd(motg);
+		motg->chg_state = USB_CHG_STATE_WAIT_FOR_DCD;
+		motg->dcd_retries = 0;
+		delay = MSM_CHG_DCD_POLL_TIME;
+		break;
+	case USB_CHG_STATE_WAIT_FOR_DCD:
+		is_dcd = msm_chg_check_dcd(motg);
+		tmout = ++motg->dcd_retries == MSM_CHG_DCD_MAX_RETRIES;
+		if (is_dcd || tmout) {
+			msm_chg_disable_dcd(motg);
+			msm_chg_enable_primary_det(motg);
+			delay = MSM_CHG_PRIMARY_DET_TIME;
+			motg->chg_state = USB_CHG_STATE_DCD_DONE;
+		} else {
+			delay = MSM_CHG_DCD_POLL_TIME;
+		}
+		break;
+	case USB_CHG_STATE_DCD_DONE:
+		vout = msm_chg_check_primary_det(motg);
+		if (vout) {
+			msm_chg_enable_secondary_det(motg);
+			delay = MSM_CHG_SECONDARY_DET_TIME;
+			motg->chg_state = USB_CHG_STATE_PRIMARY_DONE;
+		} else {
+			motg->chg_type = USB_SDP_CHARGER;
+			motg->chg_state = USB_CHG_STATE_DETECTED;
+			delay = 0;
+		}
+		break;
+	case USB_CHG_STATE_PRIMARY_DONE:
+		vout = msm_chg_check_secondary_det(motg);
+		if (vout)
+			motg->chg_type = USB_DCP_CHARGER;
+		else
+			motg->chg_type = USB_CDP_CHARGER;
+		motg->chg_state = USB_CHG_STATE_SECONDARY_DONE;
+		/* fall through */
+	case USB_CHG_STATE_SECONDARY_DONE:
+		motg->chg_state = USB_CHG_STATE_DETECTED;
+	case USB_CHG_STATE_DETECTED:
+		msm_chg_block_off(motg);
+		dev_dbg(otg->dev, "charger = %d\n", motg->chg_type);
+		schedule_work(&motg->sm_work);
+		return;
+	default:
+		return;
+	}
+
+	schedule_delayed_work(&motg->chg_work, delay);
+}
+
 /*
  * We support OTG, Peripheral only and Host only configurations. In case
  * of OTG, mode switch (host-->peripheral/peripheral-->host) can happen
@@ -633,9 +960,48 @@ static void msm_otg_sm_work(struct work_struct *w)
 			writel(readl(USB_OTGSC) & ~OTGSC_BSVIE, USB_OTGSC);
 			msm_otg_start_host(otg, 1);
 			otg->state = OTG_STATE_A_HOST;
-		} else if (test_bit(B_SESS_VLD, &motg->inputs) && otg->gadget) {
-			msm_otg_start_peripheral(otg, 1);
-			otg->state = OTG_STATE_B_PERIPHERAL;
+		} else if (test_bit(B_SESS_VLD, &motg->inputs)) {
+			switch (motg->chg_state) {
+			case USB_CHG_STATE_UNDEFINED:
+				msm_chg_detect_work(&motg->chg_work.work);
+				break;
+			case USB_CHG_STATE_DETECTED:
+				switch (motg->chg_type) {
+				case USB_DCP_CHARGER:
+					msm_otg_notify_charger(motg,
+							IDEV_CHG_MAX);
+					break;
+				case USB_CDP_CHARGER:
+					msm_otg_notify_charger(motg,
+							IDEV_CHG_MAX);
+					msm_otg_start_peripheral(otg, 1);
+					otg->state = OTG_STATE_B_PERIPHERAL;
+					break;
+				case USB_SDP_CHARGER:
+					msm_otg_notify_charger(motg, IUNIT);
+					msm_otg_start_peripheral(otg, 1);
+					otg->state = OTG_STATE_B_PERIPHERAL;
+					break;
+				default:
+					break;
+				}
+				break;
+			default:
+				break;
+			}
+		} else {
+			/*
+			 * If charger detection work is pending, decrement
+			 * the pm usage counter to balance with the one that
+			 * is incremented in charger detection work.
+			 */
+			if (cancel_delayed_work_sync(&motg->chg_work)) {
+				pm_runtime_put_sync(otg->dev);
+				msm_otg_reset(otg);
+			}
+			msm_otg_notify_charger(motg, 0);
+			motg->chg_state = USB_CHG_STATE_UNDEFINED;
+			motg->chg_type = USB_INVALID_CHARGER;
 		}
 		pm_runtime_put_sync(otg->dev);
 		break;
@@ -643,7 +1009,10 @@ static void msm_otg_sm_work(struct work_struct *w)
 		dev_dbg(otg->dev, "OTG_STATE_B_PERIPHERAL state\n");
 		if (!test_bit(B_SESS_VLD, &motg->inputs) ||
 				!test_bit(ID, &motg->inputs)) {
+			msm_otg_notify_charger(motg, 0);
 			msm_otg_start_peripheral(otg, 0);
+			motg->chg_state = USB_CHG_STATE_UNDEFINED;
+			motg->chg_type = USB_INVALID_CHARGER;
 			otg->state = OTG_STATE_B_IDLE;
 			msm_otg_reset(otg);
 			schedule_work(w);
@@ -935,6 +1304,7 @@ static int __init msm_otg_probe(struct platform_device *pdev)
 	writel(0, USB_OTGSC);
 
 	INIT_WORK(&motg->sm_work, msm_otg_sm_work);
+	INIT_DELAYED_WORK(&motg->chg_work, msm_chg_detect_work);
 	ret = request_irq(motg->irq, msm_otg_irq, IRQF_SHARED,
 					"msm_otg", motg);
 	if (ret) {
@@ -945,6 +1315,7 @@ static int __init msm_otg_probe(struct platform_device *pdev)
 	otg->init = msm_otg_reset;
 	otg->set_host = msm_otg_set_host;
 	otg->set_peripheral = msm_otg_set_peripheral;
+	otg->set_power = msm_otg_set_power;
 
 	otg->io_ops = &msm_otg_io_ops;
 
@@ -1004,6 +1375,7 @@ static int __devexit msm_otg_remove(struct platform_device *pdev)
 		return -EBUSY;
 
 	msm_otg_debugfs_cleanup();
+	cancel_delayed_work_sync(&motg->chg_work);
 	cancel_work_sync(&motg->sm_work);
 
 	pm_runtime_resume(&pdev->dev);
diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h
index 31ef1853f93c..00311fe9d0df 100644
--- a/include/linux/usb/msm_hsusb.h
+++ b/include/linux/usb/msm_hsusb.h
@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2008 Google, Inc.
  * Author: Brian Swetland <swetland@google.com>
- * Copyright (c) 2009-2010, Code Aurora Forum. All rights reserved.
+ * Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved.
  *
  * This software is licensed under the terms of the GNU General Public
  * License version 2, as published by the Free Software Foundation, and
@@ -53,6 +53,64 @@ enum otg_control_type {
 	OTG_USER_CONTROL,
 };
 
+/**
+ * PHY used in
+ *
+ * INVALID_PHY			Unsupported PHY
+ * CI_45NM_INTEGRATED_PHY	Chipidea 45nm integrated PHY
+ * SNPS_28NM_INTEGRATED_PHY	Synopsis 28nm integrated PHY
+ *
+ */
+enum msm_usb_phy_type {
+	INVALID_PHY = 0,
+	CI_45NM_INTEGRATED_PHY,
+	SNPS_28NM_INTEGRATED_PHY,
+};
+
+#define IDEV_CHG_MAX	1500
+#define IUNIT		100
+
+/**
+ * Different states involved in USB charger detection.
+ *
+ * USB_CHG_STATE_UNDEFINED	USB charger is not connected or detection
+ *                              process is not yet started.
+ * USB_CHG_STATE_WAIT_FOR_DCD	Waiting for Data pins contact.
+ * USB_CHG_STATE_DCD_DONE	Data pin contact is detected.
+ * USB_CHG_STATE_PRIMARY_DONE	Primary detection is completed (Detects
+ *                              between SDP and DCP/CDP).
+ * USB_CHG_STATE_SECONDARY_DONE	Secondary detection is completed (Detects
+ *                              between DCP and CDP).
+ * USB_CHG_STATE_DETECTED	USB charger type is determined.
+ *
+ */
+enum usb_chg_state {
+	USB_CHG_STATE_UNDEFINED = 0,
+	USB_CHG_STATE_WAIT_FOR_DCD,
+	USB_CHG_STATE_DCD_DONE,
+	USB_CHG_STATE_PRIMARY_DONE,
+	USB_CHG_STATE_SECONDARY_DONE,
+	USB_CHG_STATE_DETECTED,
+};
+
+/**
+ * USB charger types
+ *
+ * USB_INVALID_CHARGER	Invalid USB charger.
+ * USB_SDP_CHARGER	Standard downstream port. Refers to a downstream port
+ *                      on USB2.0 compliant host/hub.
+ * USB_DCP_CHARGER	Dedicated charger port (AC charger/ Wall charger).
+ * USB_CDP_CHARGER	Charging downstream port. Enumeration can happen and
+ *                      IDEV_CHG_MAX can be drawn irrespective of USB state.
+ *
+ */
+enum usb_chg_type {
+	USB_INVALID_CHARGER = 0,
+	USB_SDP_CHARGER,
+	USB_DCP_CHARGER,
+	USB_CDP_CHARGER,
+};
+
 /**
  * struct msm_otg_platform_data - platform device data
  *              for msm_otg driver.
@@ -74,6 +132,7 @@ struct msm_otg_platform_data {
 	enum usb_mode_type mode;
 	enum otg_control_type otg_control;
 	enum usb_mode_type default_mode;
+	enum msm_usb_phy_type phy_type;
 	void (*setup_gpio)(enum usb_otg_state state);
 	char *pclk_src_name;
 };
@@ -93,6 +152,12 @@ struct msm_otg_platform_data {
  * @sm_work: OTG state machine work.
  * @in_lpm: indicates low power mode (LPM) state.
  * @async_int: Async interrupt arrived.
+ * @cur_power: The amount of mA available from downstream port.
+ * @chg_work: Charger detection work.
+ * @chg_state: The state of charger detection process.
+ * @chg_type: The type of charger attached.
+ * @dcd_retires: The retry count used to track Data contact
+ *               detection process.
  */
 struct msm_otg {
 	struct otg_transceiver otg;
@@ -110,6 +175,11 @@ struct msm_otg {
 	struct work_struct sm_work;
 	atomic_t in_lpm;
 	int async_int;
+	unsigned cur_power;
+	struct delayed_work chg_work;
+	enum usb_chg_state chg_state;
+	enum usb_chg_type chg_type;
+	u8 dcd_retries;
 };
 
 #endif
-- 
cgit v1.2.3-71-gd317


From 04aebcbb1b6dccadc8862b2765265f65a946db57 Mon Sep 17 00:00:00 2001
From: Pavankumar Kondeti <pkondeti@codeaurora.org>
Date: Wed, 4 May 2011 10:19:49 +0530
Subject: USB: OTG: msm: Add PHY suspend support for MSM8960

Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/otg/msm_otg.c        | 64 +++++++++++++++++++++++++++++++++-------
 include/linux/usb/msm_hsusb_hw.h |  2 ++
 2 files changed, 56 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/otg/msm_otg.c b/drivers/usb/otg/msm_otg.c
index 628ba7d943da..1cdda6c05238 100644
--- a/drivers/usb/otg/msm_otg.c
+++ b/drivers/usb/otg/msm_otg.c
@@ -163,6 +163,32 @@ put_3p3:
 	return rc;
 }
 
+#ifdef CONFIG_PM_SLEEP
+#define USB_PHY_SUSP_DIG_VOL  500000
+static int msm_hsusb_config_vddcx(int high)
+{
+	int max_vol = USB_PHY_VDD_DIG_VOL_MAX;
+	int min_vol;
+	int ret;
+
+	if (high)
+		min_vol = USB_PHY_VDD_DIG_VOL_MIN;
+	else
+		min_vol = USB_PHY_SUSP_DIG_VOL;
+
+	ret = regulator_set_voltage(hsusb_vddcx, min_vol, max_vol);
+	if (ret) {
+		pr_err("%s: unable to set the voltage for regulator "
+			"HSUSB_VDDCX\n", __func__);
+		return ret;
+	}
+
+	pr_debug("%s: min_vol:%d max_vol:%d\n", __func__, min_vol, max_vol);
+
+	return ret;
+}
+#endif
+
 static int msm_hsusb_ldo_set_mode(int on)
 {
 	int ret = 0;
@@ -434,27 +460,28 @@ static int msm_otg_suspend(struct msm_otg *motg)
 
 	disable_irq(motg->irq);
 	/*
+	 * Chipidea 45-nm PHY suspend sequence:
+	 *
 	 * Interrupt Latch Register auto-clear feature is not present
 	 * in all PHY versions. Latch register is clear on read type.
 	 * Clear latch register to avoid spurious wakeup from
 	 * low power mode (LPM).
-	 */
-	ulpi_read(otg, 0x14);
-
-	/*
+	 *
 	 * PHY comparators are disabled when PHY enters into low power
 	 * mode (LPM). Keep PHY comparators ON in LPM only when we expect
 	 * VBUS/Id notifications from USB PHY. Otherwise turn off USB
 	 * PHY comparators. This save significant amount of power.
-	 */
-	if (pdata->otg_control == OTG_PHY_CONTROL)
-		ulpi_write(otg, 0x01, 0x30);
-
-	/*
+	 *
 	 * PLL is not turned off when PHY enters into low power mode (LPM).
 	 * Disable PLL for maximum power savings.
 	 */
-	ulpi_write(otg, 0x08, 0x09);
+
+	if (motg->pdata->phy_type == CI_45NM_INTEGRATED_PHY) {
+		ulpi_read(otg, 0x14);
+		if (pdata->otg_control == OTG_PHY_CONTROL)
+			ulpi_write(otg, 0x01, 0x30);
+		ulpi_write(otg, 0x08, 0x09);
+	}
 
 	/*
 	 * PHY may take some time or even fail to enter into low power
@@ -485,6 +512,10 @@ static int msm_otg_suspend(struct msm_otg *motg)
 	 */
 	writel(readl(USB_USBCMD) | ASYNC_INTR_CTRL | ULPI_STP_CTRL, USB_USBCMD);
 
+	if (motg->pdata->phy_type == SNPS_28NM_INTEGRATED_PHY &&
+			motg->pdata->otg_control == OTG_PMIC_CONTROL)
+		writel(readl(USB_PHY_CTRL) | PHY_RETEN, USB_PHY_CTRL);
+
 	clk_disable(motg->pclk);
 	clk_disable(motg->clk);
 	if (motg->core_clk)
@@ -493,6 +524,12 @@ static int msm_otg_suspend(struct msm_otg *motg)
 	if (!IS_ERR(motg->pclk_src))
 		clk_disable(motg->pclk_src);
 
+	if (motg->pdata->phy_type == SNPS_28NM_INTEGRATED_PHY &&
+			motg->pdata->otg_control == OTG_PMIC_CONTROL) {
+		msm_hsusb_ldo_set_mode(0);
+		msm_hsusb_config_vddcx(0);
+	}
+
 	if (device_may_wakeup(otg->dev))
 		enable_irq_wake(motg->irq);
 	if (bus)
@@ -524,6 +561,13 @@ static int msm_otg_resume(struct msm_otg *motg)
 	if (motg->core_clk)
 		clk_enable(motg->core_clk);
 
+	if (motg->pdata->phy_type == SNPS_28NM_INTEGRATED_PHY &&
+			motg->pdata->otg_control == OTG_PMIC_CONTROL) {
+		msm_hsusb_ldo_set_mode(1);
+		msm_hsusb_config_vddcx(1);
+		writel(readl(USB_PHY_CTRL) & ~PHY_RETEN, USB_PHY_CTRL);
+	}
+
 	temp = readl(USB_USBCMD);
 	temp &= ~ASYNC_INTR_CTRL;
 	temp &= ~ULPI_STP_CTRL;
diff --git a/include/linux/usb/msm_hsusb_hw.h b/include/linux/usb/msm_hsusb_hw.h
index 7d1babbff071..6e97a2d3d39f 100644
--- a/include/linux/usb/msm_hsusb_hw.h
+++ b/include/linux/usb/msm_hsusb_hw.h
@@ -24,6 +24,7 @@
 #define USB_PORTSC           (MSM_USB_BASE + 0x0184)
 #define USB_OTGSC            (MSM_USB_BASE + 0x01A4)
 #define USB_USBMODE          (MSM_USB_BASE + 0x01A8)
+#define USB_PHY_CTRL         (MSM_USB_BASE + 0x0240)
 
 #define USBCMD_RESET   2
 #define USB_USBINTR          (MSM_USB_BASE + 0x0148)
@@ -42,6 +43,7 @@
 
 #define ASYNC_INTR_CTRL         (1 << 29) /* Enable async interrupt */
 #define ULPI_STP_CTRL           (1 << 30) /* Block communication with PHY */
+#define PHY_RETEN               (1 << 1) /* PHY retention enable/disable */
 
 /* OTG definitions */
 #define OTGSC_INTSTS_MASK	(0x7f << 16)
-- 
cgit v1.2.3-71-gd317


From 1759415e630e5db0dd2390df9f94892cbfb9a8a2 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Thu, 5 May 2011 15:23:54 -0500
Subject: slub: Remove CONFIG_CMPXCHG_LOCAL ifdeffery

Remove the #ifdefs. This means that the irqsafe_cpu_cmpxchg_double() is used
everywhere.

There may be performance implications since:

A. We now have to manage a transaction ID for all arches

B. The interrupt holdoff for arches not supporting CONFIG_CMPXCHG_LOCAL is reduced
to a very short irqoff section.

There are no multiple irqoff/irqon sequences as a result of this change. Even in the fallback
case we only have to do one disable and enable like before.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slub_def.h |  2 --
 mm/slub.c                | 56 ------------------------------------------------
 2 files changed, 58 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 45ca123e8002..ca0c076b2374 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -37,9 +37,7 @@ enum stat_item {
 
 struct kmem_cache_cpu {
 	void **freelist;	/* Pointer to next available object */
-#ifdef CONFIG_CMPXCHG_LOCAL
 	unsigned long tid;	/* Globally unique transaction id */
-#endif
 	struct page *page;	/* The slab from which we are allocating */
 	int node;		/* The node of the page (or -1 for debug) */
 #ifdef CONFIG_SLUB_STATS
diff --git a/mm/slub.c b/mm/slub.c
index c952fac112e8..461199f019d6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1551,7 +1551,6 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 	}
 }
 
-#ifdef CONFIG_CMPXCHG_LOCAL
 #ifdef CONFIG_PREEMPT
 /*
  * Calculate the next globally unique transaction for disambiguiation
@@ -1611,17 +1610,12 @@ static inline void note_cmpxchg_failure(const char *n,
 	stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
 }
 
-#endif
-
 void init_kmem_cache_cpus(struct kmem_cache *s)
 {
-#ifdef CONFIG_CMPXCHG_LOCAL
 	int cpu;
 
 	for_each_possible_cpu(cpu)
 		per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
-#endif
-
 }
 /*
  * Remove the cpu slab
@@ -1654,9 +1648,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 		page->inuse--;
 	}
 	c->page = NULL;
-#ifdef CONFIG_CMPXCHG_LOCAL
 	c->tid = next_tid(c->tid);
-#endif
 	unfreeze_slab(s, page, tail);
 }
 
@@ -1791,7 +1783,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 {
 	void **object;
 	struct page *page;
-#ifdef CONFIG_CMPXCHG_LOCAL
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -1802,7 +1793,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 	 * pointer.
 	 */
 	c = this_cpu_ptr(s->cpu_slab);
-#endif
 #endif
 
 	/* We handle __GFP_ZERO in the caller */
@@ -1831,10 +1821,8 @@ load_freelist:
 
 unlock_out:
 	slab_unlock(page);
-#ifdef CONFIG_CMPXCHG_LOCAL
 	c->tid = next_tid(c->tid);
 	local_irq_restore(flags);
-#endif
 	stat(s, ALLOC_SLOWPATH);
 	return object;
 
@@ -1873,9 +1861,7 @@ load_from_page:
 	}
 	if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
 		slab_out_of_memory(s, gfpflags, node);
-#ifdef CONFIG_CMPXCHG_LOCAL
 	local_irq_restore(flags);
-#endif
 	return NULL;
 debug:
 	if (!alloc_debug_processing(s, page, object, addr))
@@ -1902,20 +1888,12 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 {
 	void **object;
 	struct kmem_cache_cpu *c;
-#ifdef CONFIG_CMPXCHG_LOCAL
 	unsigned long tid;
-#else
-	unsigned long flags;
-#endif
 
 	if (slab_pre_alloc_hook(s, gfpflags))
 		return NULL;
 
-#ifndef CONFIG_CMPXCHG_LOCAL
-	local_irq_save(flags);
-#else
 redo:
-#endif
 
 	/*
 	 * Must read kmem_cache cpu data via this cpu ptr. Preemption is
@@ -1925,7 +1903,6 @@ redo:
 	 */
 	c = __this_cpu_ptr(s->cpu_slab);
 
-#ifdef CONFIG_CMPXCHG_LOCAL
 	/*
 	 * The transaction ids are globally unique per cpu and per operation on
 	 * a per cpu queue. Thus they can be guarantee that the cmpxchg_double
@@ -1934,7 +1911,6 @@ redo:
 	 */
 	tid = c->tid;
 	barrier();
-#endif
 
 	object = c->freelist;
 	if (unlikely(!object || !node_match(c, node)))
@@ -1942,7 +1918,6 @@ redo:
 		object = __slab_alloc(s, gfpflags, node, addr, c);
 
 	else {
-#ifdef CONFIG_CMPXCHG_LOCAL
 		/*
 		 * The cmpxchg will only match if there was no additonal
 		 * operation and if we are on the right processor.
@@ -1963,16 +1938,9 @@ redo:
 			note_cmpxchg_failure("slab_alloc", s, tid);
 			goto redo;
 		}
-#else
-		c->freelist = get_freepointer(s, object);
-#endif
 		stat(s, ALLOC_FASTPATH);
 	}
 
-#ifndef CONFIG_CMPXCHG_LOCAL
-	local_irq_restore(flags);
-#endif
-
 	if (unlikely(gfpflags & __GFP_ZERO) && object)
 		memset(object, 0, s->objsize);
 
@@ -2049,11 +2017,9 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 {
 	void *prior;
 	void **object = (void *)x;
-#ifdef CONFIG_CMPXCHG_LOCAL
 	unsigned long flags;
 
 	local_irq_save(flags);
-#endif
 	slab_lock(page);
 	stat(s, FREE_SLOWPATH);
 
@@ -2084,9 +2050,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 
 out_unlock:
 	slab_unlock(page);
-#ifdef CONFIG_CMPXCHG_LOCAL
 	local_irq_restore(flags);
-#endif
 	return;
 
 slab_empty:
@@ -2098,9 +2062,7 @@ slab_empty:
 		stat(s, FREE_REMOVE_PARTIAL);
 	}
 	slab_unlock(page);
-#ifdef CONFIG_CMPXCHG_LOCAL
 	local_irq_restore(flags);
-#endif
 	stat(s, FREE_SLAB);
 	discard_slab(s, page);
 }
@@ -2121,20 +2083,11 @@ static __always_inline void slab_free(struct kmem_cache *s,
 {
 	void **object = (void *)x;
 	struct kmem_cache_cpu *c;
-#ifdef CONFIG_CMPXCHG_LOCAL
 	unsigned long tid;
-#else
-	unsigned long flags;
-#endif
 
 	slab_free_hook(s, x);
 
-#ifndef CONFIG_CMPXCHG_LOCAL
-	local_irq_save(flags);
-
-#else
 redo:
-#endif
 
 	/*
 	 * Determine the currently cpus per cpu slab.
@@ -2144,15 +2097,12 @@ redo:
 	 */
 	c = __this_cpu_ptr(s->cpu_slab);
 
-#ifdef CONFIG_CMPXCHG_LOCAL
 	tid = c->tid;
 	barrier();
-#endif
 
 	if (likely(page == c->page && c->node != NUMA_NO_NODE)) {
 		set_freepointer(s, object, c->freelist);
 
-#ifdef CONFIG_CMPXCHG_LOCAL
 		if (unlikely(!this_cpu_cmpxchg_double(
 				s->cpu_slab->freelist, s->cpu_slab->tid,
 				c->freelist, tid,
@@ -2161,16 +2111,10 @@ redo:
 			note_cmpxchg_failure("slab_free", s, tid);
 			goto redo;
 		}
-#else
-		c->freelist = object;
-#endif
 		stat(s, FREE_FASTPATH);
 	} else
 		__slab_free(s, page, x, addr);
 
-#ifndef CONFIG_CMPXCHG_LOCAL
-	local_irq_restore(flags);
-#endif
 }
 
 void kmem_cache_free(struct kmem_cache *s, void *x)
-- 
cgit v1.2.3-71-gd317


From 1ab7b6ac2709d0eb05a7144cd0e14faa3a7ea162 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 14 Apr 2011 23:46:06 -0700
Subject: ethtool: remove phys_id from ethtool_ops

After that all the upstream kernel drivers now use phys_id,
and the old ethtool_ops interface (phys_id) can be removed.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 7 -------
 net/core/ethtool.c      | 6 +-----
 2 files changed, 1 insertion(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 4194a2067a14..d659fdc77eb3 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -814,12 +814,6 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  *	the indicator accordingly.  Finally, it is called with the argument
  *	%ETHTOOL_ID_INACTIVE and must deactivate the indicator.  Returns a
  *	negative error code or zero.
- * @phys_id: Deprecated in favour of @set_phys_id.
- *	Identify the physical device, e.g. by flashing an LED
- *	attached to it until interrupted by a signal or the given time
- *	(in seconds) elapses.  If the given time is zero, use a default
- *	time limit.  Returns a negative error code or zero.  Being
- *	interrupted by a signal is not an error.
  * @get_ethtool_stats: Return extended statistics about the device.
  *	This is only useful if the device maintains statistics not
  *	included in &struct rtnl_link_stats64.
@@ -908,7 +902,6 @@ struct ethtool_ops {
 	void	(*self_test)(struct net_device *, struct ethtool_test *, u64 *);
 	void	(*get_strings)(struct net_device *, u32 stringset, u8 *);
 	int	(*set_phys_id)(struct net_device *, enum ethtool_phys_id_state);
-	int	(*phys_id)(struct net_device *, u32);
 	void	(*get_ethtool_stats)(struct net_device *,
 				     struct ethtool_stats *, u64 *);
 	int	(*begin)(struct net_device *);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d8b1a8d85a96..927819d92248 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1655,7 +1655,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 	static bool busy;
 	int rc;
 
-	if (!dev->ethtool_ops->set_phys_id && !dev->ethtool_ops->phys_id)
+	if (!dev->ethtool_ops->set_phys_id)
 		return -EOPNOTSUPP;
 
 	if (busy)
@@ -1664,10 +1664,6 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 	if (copy_from_user(&id, useraddr, sizeof(id)))
 		return -EFAULT;
 
-	if (!dev->ethtool_ops->set_phys_id)
-		/* Do it the old way */
-		return dev->ethtool_ops->phys_id(dev, id.data);
-
 	rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE);
 	if (rc < 0)
 		return rc;
-- 
cgit v1.2.3-71-gd317


From 57cc71bc3c0cf18bfd1b7bc8cd0eb6c303da24c5 Mon Sep 17 00:00:00 2001
From: Yaniv Rosner <yanivr@broadcom.com>
Date: Mon, 2 May 2011 21:30:08 +0000
Subject: ethtool: Add 20G bit definitions

Add 20G supported and advertising bit definitions.
20G will be supported with the 57840 chips.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
------
 include/linux/ethtool.h |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index d659fdc77eb3..bd0b50b85f06 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -1025,6 +1025,8 @@ struct ethtool_ops {
 #define SUPPORTED_10000baseKX4_Full	(1 << 18)
 #define SUPPORTED_10000baseKR_Full	(1 << 19)
 #define SUPPORTED_10000baseR_FEC	(1 << 20)
+#define SUPPORTED_20000baseMLD2_Full	(1 << 21)
+#define SUPPORTED_20000baseKR2_Full	(1 << 22)
 
 /* Indicates what features are advertised by the interface. */
 #define ADVERTISED_10baseT_Half		(1 << 0)
@@ -1048,6 +1050,8 @@ struct ethtool_ops {
 #define ADVERTISED_10000baseKX4_Full	(1 << 18)
 #define ADVERTISED_10000baseKR_Full	(1 << 19)
 #define ADVERTISED_10000baseR_FEC	(1 << 20)
+#define ADVERTISED_20000baseMLD2_Full	(1 << 21)
+#define ADVERTISED_20000baseKR2_Full	(1 << 22)
 
 /* The following are all involved in forcing a particular link
  * mode for the device for setting things.  When getting the
-- 
cgit v1.2.3-71-gd317


From eed2a12f1ed9aabf0676f4d0db34aad51976c5c6 Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Wed, 4 May 2011 15:30:11 +0000
Subject: net: Allow ethtool to set interface in loopback mode.

This patch enables ethtool to set the loopback mode on a given interface.
By configuring the interface in loopback mode in conjunction with a policy
route / rule, a userland application can stress the egress / ingress path
exposing the flows of the change in progress and potentially help developer(s)
understand the impact of those changes without even sending a packet out
on the network.

Following set of commands illustrates one such example -
    a) ip -4 addr add 192.168.1.1/24 dev eth1
    b) ip -4 rule add from all iif eth1 lookup 250
    c) ip -4 route add local 0/0 dev lo proto kernel scope host table 250
    d) arp -Ds 192.168.1.100 eth1
    e) arp -Ds 192.168.1.200 eth1
    f) sysctl -w net.ipv4.ip_nonlocal_bind=1
    g) sysctl -w net.ipv4.conf.all.accept_local=1
    # Assuming that the machine has 8 cores
    h) taskset 000f netserver -L 192.168.1.200
    i) taskset 00f0 netperf -t TCP_CRR -L 192.168.1.100 -H 192.168.1.200 -l 30

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Acked-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/loopback.c    | 3 ++-
 include/linux/netdevice.h | 3 ++-
 net/core/ethtool.c        | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index d70fb76edb77..4ce9e5f2c069 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -174,7 +174,8 @@ static void loopback_setup(struct net_device *dev)
 		| NETIF_F_HIGHDMA
 		| NETIF_F_LLTX
 		| NETIF_F_NETNS_LOCAL
-		| NETIF_F_VLAN_CHALLENGED;
+		| NETIF_F_VLAN_CHALLENGED
+		| NETIF_F_LOOPBACK;
 	dev->ethtool_ops	= &loopback_ethtool_ops;
 	dev->header_ops		= &eth_header_ops;
 	dev->netdev_ops		= &loopback_ops;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d5de66af46f9..e7244ed1f9a8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1067,6 +1067,7 @@ struct net_device {
 #define NETIF_F_RXHASH		(1 << 28) /* Receive hashing offload */
 #define NETIF_F_RXCSUM		(1 << 29) /* Receive checksumming offload */
 #define NETIF_F_NOCACHE_COPY	(1 << 30) /* Use no-cache copyfromuser */
+#define NETIF_F_LOOPBACK	(1 << 31) /* Enable loopback */
 
 	/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT	16
@@ -1082,7 +1083,7 @@ struct net_device {
 	/* = all defined minus driver/device-class-related */
 #define NETIF_F_NEVER_CHANGE	(NETIF_F_VLAN_CHALLENGED | \
 				  NETIF_F_LLTX | NETIF_F_NETNS_LOCAL)
-#define NETIF_F_ETHTOOL_BITS	(0x7f3fffff & ~NETIF_F_NEVER_CHANGE)
+#define NETIF_F_ETHTOOL_BITS	(0xff3fffff & ~NETIF_F_NEVER_CHANGE)
 
 	/* List of features with software fallbacks. */
 #define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | \
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 927819d92248..b6f405888538 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -362,7 +362,7 @@ static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GS
 	/* NETIF_F_RXHASH */          "rx-hashing",
 	/* NETIF_F_RXCSUM */          "rx-checksum",
 	/* NETIF_F_NOCACHE_COPY */    "tx-nocache-copy"
-	"",
+	/* NETIF_F_LOOPBACK */        "loopback",
 };
 
 static int __ethtool_get_sset_count(struct net_device *dev, int sset)
-- 
cgit v1.2.3-71-gd317


From 245e37182b7dead255bdc9e333e0bcc128360675 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 8 May 2011 16:41:45 -0700
Subject: ide: Use linux/mutex.h

The IDE code is still including asm/mutex.h instead of linux/mutex.h

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ide.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 072fe8c93e6f..42557851b12e 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -18,13 +18,13 @@
 #include <linux/pci.h>
 #include <linux/completion.h>
 #include <linux/pm.h>
+#include <linux/mutex.h>
 #ifdef CONFIG_BLK_DEV_IDEACPI
 #include <acpi/acpi.h>
 #endif
 #include <asm/byteorder.h>
 #include <asm/system.h>
 #include <asm/io.h>
-#include <asm/mutex.h>
 
 /* for request_sense */
 #include <linux/cdrom.h>
-- 
cgit v1.2.3-71-gd317


From 48752e1b1802231ef2a076f34d861918b7d571c3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 9 May 2011 04:40:44 +0000
Subject: vlan: remove one synchronize_net() call
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

At VLAN dismantle phase, unregister_vlan_dev() makes one
synchronize_net() call after vlan_group_set_device(grp, vlan_id, NULL).

This call can be safely removed because we are calling
unregister_netdevice_queue() to queue device for deletion, and this
process needs at least one rcu grace period to complete.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Ben Greear <greearb@candelatech.com>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Jesse Gross <jesse@nicira.com>
Cc: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Acked-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h |  1 -
 net/8021q/vlan.c        | 10 ++++------
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 546d9d35fbd4..290bd8ac94cf 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -86,7 +86,6 @@ struct vlan_group {
 					    * the vlan is attached to.
 					    */
 	unsigned int		nr_vlans;
-	int			killall;
 	struct hlist_node	hlist;	/* linked list */
 	struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS];
 	struct rcu_head		rcu;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 969e7004cf86..718d635d3379 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -120,9 +120,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	grp->nr_vlans--;
 
 	vlan_group_set_device(grp, vlan_id, NULL);
-	if (!grp->killall)
-		synchronize_net();
-
+	/* Because unregister_netdevice_queue() makes sure at least one rcu
+	 * grace period is respected before device freeing,
+	 * we dont need to call synchronize_net() here.
+	 */
 	unregister_netdevice_queue(dev, head);
 
 	/* If the group is now empty, kill off the group. */
@@ -478,9 +479,6 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		if (dev->reg_state != NETREG_UNREGISTERING)
 			break;
 
-		/* Delete all VLANs for this dev. */
-		grp->killall = 1;
-
 		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
-- 
cgit v1.2.3-71-gd317


From 4940fc889e1e63667a15243028ddcd84d471cd8e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 7 May 2011 23:00:07 +0000
Subject: net: add mac_pton() for parsing MAC address

mac_pton() parses MAC address in form XX:XX:XX:XX:XX:XX and only in that form.

mac_pton() doesn't dirty result until it's sure string representation is valid.

mac_pton() doesn't care about characters _after_ last octet,
it's up to caller to deal with it.

mac_pton() diverges from 0/-E return value convention.
Target usage:

	if (!mac_pton(str, whatever->mac))
		return -EINVAL;
	/* ->mac being u8 [ETH_ALEN] is filled at this point. */
	/* optionally check str[3 * ETH_ALEN - 1] for termination */

Use mac_pton() in pktgen and netconsole for start.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netconsole.c | 20 ++++--------------
 include/linux/if_ether.h |  1 +
 net/core/netpoll.c       | 26 +-----------------------
 net/core/pktgen.c        | 53 ++++++++----------------------------------------
 net/core/utils.c         | 24 ++++++++++++++++++++++
 5 files changed, 38 insertions(+), 86 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 62fdbaa1fb60..a83e101440fd 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -429,8 +429,6 @@ static ssize_t store_remote_mac(struct netconsole_target *nt,
 				size_t count)
 {
 	u8 remote_mac[ETH_ALEN];
-	char *p = (char *) buf;
-	int i;
 
 	if (nt->enabled) {
 		printk(KERN_ERR "netconsole: target (%s) is enabled, "
@@ -439,23 +437,13 @@ static ssize_t store_remote_mac(struct netconsole_target *nt,
 		return -EINVAL;
 	}
 
-	for (i = 0; i < ETH_ALEN - 1; i++) {
-		remote_mac[i] = simple_strtoul(p, &p, 16);
-		if (*p != ':')
-			goto invalid;
-		p++;
-	}
-	remote_mac[ETH_ALEN - 1] = simple_strtoul(p, &p, 16);
-	if (*p && (*p != '\n'))
-		goto invalid;
-
+	if (!mac_pton(buf, remote_mac))
+		return -EINVAL;
+	if (buf[3 * ETH_ALEN - 1] && buf[3 * ETH_ALEN - 1] != '\n')
+		return -EINVAL;
 	memcpy(nt->np.remote_mac, remote_mac, ETH_ALEN);
 
 	return strnlen(buf, count);
-
-invalid:
-	printk(KERN_ERR "netconsole: invalid input\n");
-	return -EINVAL;
 }
 
 /*
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index be69043d2896..0f1325d98295 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -136,6 +136,7 @@ int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr);
 extern struct ctl_table ether_table[];
 #endif
 
+int mac_pton(const char *s, u8 *mac);
 extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len);
 
 #endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 46d9c3a4de2f..2d7d6d473781 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -698,32 +698,8 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 
 	if (*cur != 0) {
 		/* MAC address */
-		if ((delim = strchr(cur, ':')) == NULL)
+		if (!mac_pton(cur, np->remote_mac))
 			goto parse_failed;
-		*delim = 0;
-		np->remote_mac[0] = simple_strtol(cur, NULL, 16);
-		cur = delim + 1;
-		if ((delim = strchr(cur, ':')) == NULL)
-			goto parse_failed;
-		*delim = 0;
-		np->remote_mac[1] = simple_strtol(cur, NULL, 16);
-		cur = delim + 1;
-		if ((delim = strchr(cur, ':')) == NULL)
-			goto parse_failed;
-		*delim = 0;
-		np->remote_mac[2] = simple_strtol(cur, NULL, 16);
-		cur = delim + 1;
-		if ((delim = strchr(cur, ':')) == NULL)
-			goto parse_failed;
-		*delim = 0;
-		np->remote_mac[3] = simple_strtol(cur, NULL, 16);
-		cur = delim + 1;
-		if ((delim = strchr(cur, ':')) == NULL)
-			goto parse_failed;
-		*delim = 0;
-		np->remote_mac[4] = simple_strtol(cur, NULL, 16);
-		cur = delim + 1;
-		np->remote_mac[5] = simple_strtol(cur, NULL, 16);
 	}
 
 	netpoll_print_options(np);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index d41d88b53e18..379270f14771 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1420,11 +1420,6 @@ static ssize_t pktgen_if_write(struct file *file,
 		return count;
 	}
 	if (!strcmp(name, "dst_mac")) {
-		char *v = valstr;
-		unsigned char old_dmac[ETH_ALEN];
-		unsigned char *m = pkt_dev->dst_mac;
-		memcpy(old_dmac, pkt_dev->dst_mac, ETH_ALEN);
-
 		len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
 		if (len < 0)
 			return len;
@@ -1432,35 +1427,16 @@ static ssize_t pktgen_if_write(struct file *file,
 		memset(valstr, 0, sizeof(valstr));
 		if (copy_from_user(valstr, &user_buffer[i], len))
 			return -EFAULT;
-		i += len;
-
-		for (*m = 0; *v && m < pkt_dev->dst_mac + 6; v++) {
-			int value;
-
-			value = hex_to_bin(*v);
-			if (value >= 0)
-				*m = *m * 16 + value;
-
-			if (*v == ':') {
-				m++;
-				*m = 0;
-			}
-		}
 
+		if (!mac_pton(valstr, pkt_dev->dst_mac))
+			return -EINVAL;
 		/* Set up Dest MAC */
-		if (compare_ether_addr(old_dmac, pkt_dev->dst_mac))
-			memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN);
+		memcpy(&pkt_dev->hh[0], pkt_dev->dst_mac, ETH_ALEN);
 
-		sprintf(pg_result, "OK: dstmac");
+		sprintf(pg_result, "OK: dstmac %pM", pkt_dev->dst_mac);
 		return count;
 	}
 	if (!strcmp(name, "src_mac")) {
-		char *v = valstr;
-		unsigned char old_smac[ETH_ALEN];
-		unsigned char *m = pkt_dev->src_mac;
-
-		memcpy(old_smac, pkt_dev->src_mac, ETH_ALEN);
-
 		len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
 		if (len < 0)
 			return len;
@@ -1468,26 +1444,13 @@ static ssize_t pktgen_if_write(struct file *file,
 		memset(valstr, 0, sizeof(valstr));
 		if (copy_from_user(valstr, &user_buffer[i], len))
 			return -EFAULT;
-		i += len;
-
-		for (*m = 0; *v && m < pkt_dev->src_mac + 6; v++) {
-			int value;
-
-			value = hex_to_bin(*v);
-			if (value >= 0)
-				*m = *m * 16 + value;
-
-			if (*v == ':') {
-				m++;
-				*m = 0;
-			}
-		}
 
+		if (!mac_pton(valstr, pkt_dev->src_mac))
+			return -EINVAL;
 		/* Set up Src MAC */
-		if (compare_ether_addr(old_smac, pkt_dev->src_mac))
-			memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN);
+		memcpy(&pkt_dev->hh[6], pkt_dev->src_mac, ETH_ALEN);
 
-		sprintf(pg_result, "OK: srcmac");
+		sprintf(pg_result, "OK: srcmac %pM", pkt_dev->src_mac);
 		return count;
 	}
 
diff --git a/net/core/utils.c b/net/core/utils.c
index 5fea0ab21902..2012bc797f9c 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -296,3 +296,27 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
 				csum_unfold(*sum)));
 }
 EXPORT_SYMBOL(inet_proto_csum_replace4);
+
+int mac_pton(const char *s, u8 *mac)
+{
+	int i;
+
+	/* XX:XX:XX:XX:XX:XX */
+	if (strlen(s) < 3 * ETH_ALEN - 1)
+		return 0;
+
+	/* Don't dirty result unless string is valid MAC. */
+	for (i = 0; i < ETH_ALEN; i++) {
+		if (!strchr("0123456789abcdefABCDEF", s[i * 3]))
+			return 0;
+		if (!strchr("0123456789abcdefABCDEF", s[i * 3 + 1]))
+			return 0;
+		if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':')
+			return 0;
+	}
+	for (i = 0; i < ETH_ALEN; i++) {
+		mac[i] = (hex_to_bin(s[i * 3]) << 4) | hex_to_bin(s[i * 3 + 1]);
+	}
+	return 1;
+}
+EXPORT_SYMBOL(mac_pton);
-- 
cgit v1.2.3-71-gd317


From a09a79f66874c905af35d5bb5e5f2fdc7b6b894d Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz>
Date: Mon, 9 May 2011 13:01:09 +0200
Subject: Don't lock guardpage if the stack is growing up

Linux kernel excludes guard page when performing mlock on a VMA with
down-growing stack. However, some architectures have up-growing stack
and locking the guard page should be excluded in this case too.

This patch fixes lvm2 on PA-RISC (and possibly other architectures with
up-growing stack). lvm2 calculates number of used pages when locking and
when unlocking and reports an internal error if the numbers mismatch.

[ Patch changed fairly extensively to also fix /proc/<pid>/maps for the
  grows-up case, and to move things around a bit to clean it all up and
  share the infrstructure with the /proc bits.

  Tested on ia64 that has both grow-up and grow-down segments  - Linus ]

Signed-off-by: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz>
Tested-by: Tony Luck <tony.luck@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 12 +++++++-----
 include/linux/mm.h | 24 +++++++++++++++++++++++-
 mm/memory.c        | 16 +++++++---------
 3 files changed, 37 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2e7addfd9803..318d8654989b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -214,7 +214,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 	int flags = vma->vm_flags;
 	unsigned long ino = 0;
 	unsigned long long pgoff = 0;
-	unsigned long start;
+	unsigned long start, end;
 	dev_t dev = 0;
 	int len;
 
@@ -227,13 +227,15 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 
 	/* We don't show the stack guard page in /proc/maps */
 	start = vma->vm_start;
-	if (vma->vm_flags & VM_GROWSDOWN)
-		if (!vma_stack_continue(vma->vm_prev, vma->vm_start))
-			start += PAGE_SIZE;
+	if (stack_guard_page_start(vma, start))
+		start += PAGE_SIZE;
+	end = vma->vm_end;
+	if (stack_guard_page_end(vma, end))
+		end -= PAGE_SIZE;
 
 	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
 			start,
-			vma->vm_end,
+			end,
 			flags & VM_READ ? 'r' : '-',
 			flags & VM_WRITE ? 'w' : '-',
 			flags & VM_EXEC ? 'x' : '-',
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2348db26bc3d..6507dde38b16 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1011,11 +1011,33 @@ int set_page_dirty_lock(struct page *page);
 int clear_page_dirty_for_io(struct page *page);
 
 /* Is the vma a continuation of the stack vma above it? */
-static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr)
+static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
 {
 	return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
 }
 
+static inline int stack_guard_page_start(struct vm_area_struct *vma,
+					     unsigned long addr)
+{
+	return (vma->vm_flags & VM_GROWSDOWN) &&
+		(vma->vm_start == addr) &&
+		!vma_growsdown(vma->vm_prev, addr);
+}
+
+/* Is the vma a continuation of the stack vma below it? */
+static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
+{
+	return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
+}
+
+static inline int stack_guard_page_end(struct vm_area_struct *vma,
+					   unsigned long addr)
+{
+	return (vma->vm_flags & VM_GROWSUP) &&
+		(vma->vm_end == addr) &&
+		!vma_growsup(vma->vm_next, addr);
+}
+
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
 		unsigned long new_addr, unsigned long len);
diff --git a/mm/memory.c b/mm/memory.c
index 27f425378112..61e66f026563 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1412,9 +1412,8 @@ no_page_table:
 
 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
 {
-	return (vma->vm_flags & VM_GROWSDOWN) &&
-		(vma->vm_start == addr) &&
-		!vma_stack_continue(vma->vm_prev, addr);
+	return stack_guard_page_start(vma, addr) ||
+	       stack_guard_page_end(vma, addr+PAGE_SIZE);
 }
 
 /**
@@ -1551,12 +1550,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			continue;
 		}
 
-		/*
-		 * For mlock, just skip the stack guard page.
-		 */
-		if ((gup_flags & FOLL_MLOCK) && stack_guard_page(vma, start))
-			goto next_page;
-
 		do {
 			struct page *page;
 			unsigned int foll_flags = gup_flags;
@@ -1573,6 +1566,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 				int ret;
 				unsigned int fault_flags = 0;
 
+				/* For mlock, just skip the stack guard page. */
+				if (foll_flags & FOLL_MLOCK) {
+					if (stack_guard_page(vma, start))
+						goto next_page;
+				}
 				if (foll_flags & FOLL_WRITE)
 					fault_flags |= FAULT_FLAG_WRITE;
 				if (nonblocking)
-- 
cgit v1.2.3-71-gd317


From 70f23fd66bc821a0e99647f70a809e277cc93c4c Mon Sep 17 00:00:00 2001
From: "Justin P. Mattock" <justinmattock@gmail.com>
Date: Tue, 10 May 2011 10:16:21 +0200
Subject: treewide: fix a few typos in comments

- kenrel -> kernel
- whetehr -> whether
- ttt -> tt
- sss -> ss

Signed-off-by: Justin P. Mattock <justinmattock@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/devicetree/bindings/powerpc/nintendo/wii.txt | 2 +-
 Documentation/feature-removal-schedule.txt                 | 2 +-
 arch/arm/mach-msm/include/mach/msm_iomap.h                 | 2 +-
 arch/arm/mach-omap2/control.h                              | 2 +-
 arch/ia64/hp/common/sba_iommu.c                            | 2 +-
 arch/powerpc/include/asm/pte-hash64-64k.h                  | 2 +-
 arch/powerpc/kernel/kgdb.c                                 | 2 +-
 arch/x86/xen/pci-swiotlb-xen.c                             | 2 +-
 drivers/acpi/acpica/utresrc.c                              | 2 +-
 drivers/acpi/video.c                                       | 2 +-
 drivers/firmware/efivars.c                                 | 2 +-
 drivers/macintosh/therm_pm72.c                             | 4 ++--
 drivers/media/dvb/dvb-usb/dw2102.c                         | 2 +-
 drivers/media/video/msp3400-driver.c                       | 2 +-
 drivers/media/video/saa7164/saa7164-encoder.c              | 2 +-
 drivers/media/video/saa7164/saa7164-vbi.c                  | 2 +-
 drivers/message/i2o/README.ioctl                           | 2 +-
 drivers/net/can/pch_can.c                                  | 2 +-
 drivers/net/irda/ali-ircc.c                                | 2 +-
 drivers/net/s2io.c                                         | 2 +-
 drivers/net/ucc_geth_ethtool.c                             | 2 +-
 drivers/net/usb/usbnet.c                                   | 2 +-
 drivers/net/wan/pc300_drv.c                                | 2 +-
 drivers/net/wireless/ath/ath9k/ar9003_eeprom.c             | 8 ++++----
 drivers/net/wireless/ipw2x00/ipw2200.c                     | 2 +-
 drivers/scsi/aic7xxx/aicasm/aicasm_symbol.c                | 2 +-
 drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h                | 2 +-
 drivers/scsi/constants.c                                   | 2 +-
 drivers/scsi/esp_scsi.c                                    | 2 +-
 drivers/scsi/lpfc/lpfc_attr.c                              | 2 +-
 drivers/scsi/lpfc/lpfc_hw.h                                | 2 +-
 drivers/scsi/lpfc/lpfc_sli.c                               | 2 +-
 drivers/scsi/nsp32_debug.c                                 | 2 +-
 drivers/scsi/pcmcia/nsp_debug.c                            | 2 +-
 drivers/scsi/qla4xxx/ql4_mbx.c                             | 2 +-
 drivers/tty/n_gsm.c                                        | 2 +-
 drivers/usb/host/imx21-dbg.c                               | 2 +-
 drivers/usb/misc/usbtest.c                                 | 2 +-
 fs/btrfs/relocation.c                                      | 2 +-
 fs/freevxfs/vxfs_inode.c                                   | 2 +-
 fs/nfsd/stats.c                                            | 2 +-
 fs/squashfs/Kconfig                                        | 4 ++--
 fs/squashfs/cache.c                                        | 2 +-
 fs/xfs/xfs_inode.c                                         | 2 +-
 include/linux/posix-clock.h                                | 2 +-
 include/net/genetlink.h                                    | 2 +-
 include/net/netlink.h                                      | 2 +-
 net/sunrpc/xprtrdma/svc_rdma_transport.c                   | 2 +-
 48 files changed, 53 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/powerpc/nintendo/wii.txt b/Documentation/devicetree/bindings/powerpc/nintendo/wii.txt
index a7e155a023b8..36afa322b04b 100644
--- a/Documentation/devicetree/bindings/powerpc/nintendo/wii.txt
+++ b/Documentation/devicetree/bindings/powerpc/nintendo/wii.txt
@@ -127,7 +127,7 @@ Nintendo Wii device tree
    - reg : should contain the SDHCI registers location and length
    - interrupts : should contain the SDHCI interrupt
 
-1.j) The Inter-Processsor Communication (IPC) node
+1.j) The Inter-Processor Communication (IPC) node
 
   Represent the Inter-Processor Communication interface. This interface
   enables communications between the Broadway and the Starlet processors.
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 492e81df2968..9536652eadbf 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -226,7 +226,7 @@ Who:	Zhang Rui <rui.zhang@intel.com>
 What:	CONFIG_ACPI_PROCFS_POWER
 When:	2.6.39
 Why:	sysfs I/F for ACPI power devices, including AC and Battery,
-        has been working in upstream kenrel since 2.6.24, Sep 2007.
+        has been working in upstream kernel since 2.6.24, Sep 2007.
 	In 2.6.37, we make the sysfs I/F always built in and this option
 	disabled by default.
 	Remove this option and the ACPI power procfs interface in 2.6.39.
diff --git a/arch/arm/mach-msm/include/mach/msm_iomap.h b/arch/arm/mach-msm/include/mach/msm_iomap.h
index c98c7591f3b8..2f494b6a9d0a 100644
--- a/arch/arm/mach-msm/include/mach/msm_iomap.h
+++ b/arch/arm/mach-msm/include/mach/msm_iomap.h
@@ -55,7 +55,7 @@
 
 #include "msm_iomap-8960.h"
 
-/* Virtual addressses shared across all MSM targets. */
+/* Virtual addresses shared across all MSM targets. */
 #define MSM_CSR_BASE		IOMEM(0xE0001000)
 #define MSM_QGIC_DIST_BASE	IOMEM(0xF0000000)
 #define MSM_QGIC_CPU_BASE	IOMEM(0xF0001000)
diff --git a/arch/arm/mach-omap2/control.h b/arch/arm/mach-omap2/control.h
index c2804c1c4efd..a016c8b59e00 100644
--- a/arch/arm/mach-omap2/control.h
+++ b/arch/arm/mach-omap2/control.h
@@ -236,7 +236,7 @@
 #define OMAP343X_CONTROL_WKUP_DEBOBS3 (OMAP343X_CONTROL_GENERAL_WKUP + 0x014)
 #define OMAP343X_CONTROL_WKUP_DEBOBS4 (OMAP343X_CONTROL_GENERAL_WKUP + 0x018)
 
-/* 36xx-only RTA - Retention till Accesss control registers and bits */
+/* 36xx-only RTA - Retention till Access control registers and bits */
 #define OMAP36XX_CONTROL_MEM_RTA_CTRL	0x40C
 #define OMAP36XX_RTA_DISABLE		0x0
 
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 4ce8d1358fee..ab3ccab132ce 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1063,7 +1063,7 @@ static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size,
 		/*
 		** Address does not fall w/in IOVA, must be bypassing
 		*/
-		DBG_BYPASS("sba_unmap_single_atttrs() bypass addr: 0x%lx\n",
+		DBG_BYPASS("sba_unmap_single_attrs() bypass addr: 0x%lx\n",
 			   iova);
 
 #ifdef ENABLE_MARK_CLEAN
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
index c4490f9c67c4..59247e816ac5 100644
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ b/arch/powerpc/include/asm/pte-hash64-64k.h
@@ -22,7 +22,7 @@
 #define _PAGE_HASHPTE	_PAGE_HPTE_SUB
 
 /* Note the full page bits must be in the same location as for normal
- * 4k pages as the same asssembly will be used to insert 64K pages
+ * 4k pages as the same assembly will be used to insert 64K pages
  * wether the kernel has CONFIG_PPC_64K_PAGES or not
  */
 #define _PAGE_F_SECOND  0x00008000 /* full page: hidx bits */
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 42850ee00ada..9411747bbcaf 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -142,7 +142,7 @@ static int kgdb_singlestep(struct pt_regs *regs)
 		return 0;
 
 	/*
-	 * On Book E and perhaps other processsors, singlestep is handled on
+	 * On Book E and perhaps other processors, singlestep is handled on
 	 * the critical exception stack.  This causes current_thread_info()
 	 * to fail, since it it locates the thread_info by masking off
 	 * the low bits of the current stack pointer.  We work around
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index bfd0632fe65e..b480d4207a4c 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -36,7 +36,7 @@ int __init pci_xen_swiotlb_detect(void)
 
 	/* If running as PV guest, either iommu=soft, or swiotlb=force will
 	 * activate this IOMMU. If running as PV privileged, activate it
-	 * irregardlesss.
+	 * irregardless.
 	 */
 	if ((xen_initial_domain() || swiotlb || swiotlb_force) &&
 	    (xen_pv_domain()))
diff --git a/drivers/acpi/acpica/utresrc.c b/drivers/acpi/acpica/utresrc.c
index 84e051844247..6ffd3a8bdaa5 100644
--- a/drivers/acpi/acpica/utresrc.c
+++ b/drivers/acpi/acpica/utresrc.c
@@ -50,7 +50,7 @@ ACPI_MODULE_NAME("utresrc")
 #if defined(ACPI_DISASSEMBLER) || defined (ACPI_DEBUGGER)
 /*
  * Strings used to decode resource descriptors.
- * Used by both the disasssembler and the debugger resource dump routines
+ * Used by both the disassembler and the debugger resource dump routines
  */
 const char *acpi_gbl_bm_decode[] = {
 	"NotBusMaster",
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index ec574fc8fbc6..db39e9e607d8 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -1521,7 +1521,7 @@ static void acpi_video_device_notify(acpi_handle handle, u32 event, void *data)
 		acpi_bus_generate_proc_event(device, event, 0);
 		keycode = KEY_BRIGHTNESSDOWN;
 		break;
-	case ACPI_VIDEO_NOTIFY_ZERO_BRIGHTNESS:	/* zero brightnesss */
+	case ACPI_VIDEO_NOTIFY_ZERO_BRIGHTNESS:	/* zero brightness */
 		if (brightness_switch_enabled)
 			acpi_video_switch_brightness(video_device, event);
 		acpi_bus_generate_proc_event(device, event, 0);
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index ff0c373e3bbf..5629a0c56f61 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -321,7 +321,7 @@ efivar_show_raw(struct efivar_entry *entry, char *buf)
 
 /*
  * Generic read/write functions that call the specific functions of
- * the atttributes...
+ * the attributes...
  */
 static ssize_t efivar_attr_show(struct kobject *kobj, struct attribute *attr,
 				char *buf)
diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c
index bb8b722a9783..0ff92c208005 100644
--- a/drivers/macintosh/therm_pm72.c
+++ b/drivers/macintosh/therm_pm72.c
@@ -44,11 +44,11 @@
  * TODO:  - Check MPU structure version/signature
  *        - Add things like /sbin/overtemp for non-critical
  *          overtemp conditions so userland can take some policy
- *          decisions, like slewing down CPUs
+ *          decisions, like slowing down CPUs
  *	  - Deal with fan and i2c failures in a better way
  *	  - Maybe do a generic PID based on params used for
  *	    U3 and Drives ? Definitely need to factor code a bit
- *          bettter... also make sensor detection more robust using
+ *          better... also make sensor detection more robust using
  *          the device-tree to probe for them
  *        - Figure out how to get the slots consumption and set the
  *          slots fan accordingly
diff --git a/drivers/media/dvb/dvb-usb/dw2102.c b/drivers/media/dvb/dvb-usb/dw2102.c
index f5b9da18f611..d312323504a4 100644
--- a/drivers/media/dvb/dvb-usb/dw2102.c
+++ b/drivers/media/dvb/dvb-usb/dw2102.c
@@ -1377,7 +1377,7 @@ static struct rc_map_table rc_map_su3000_table[] = {
 	{ 0x0f, KEY_BLUE },	/* bottom yellow button */
 	{ 0x14, KEY_AUDIO },	/* Snapshot */
 	{ 0x38, KEY_TV },	/* TV/Radio */
-	{ 0x0c, KEY_ESC }	/* upper Red buttton */
+	{ 0x0c, KEY_ESC }	/* upper Red button */
 };
 
 static struct rc_map_dvb_usb_table_table keys_tables[] = {
diff --git a/drivers/media/video/msp3400-driver.c b/drivers/media/video/msp3400-driver.c
index 8126622fb4f5..de5d481b0328 100644
--- a/drivers/media/video/msp3400-driver.c
+++ b/drivers/media/video/msp3400-driver.c
@@ -96,7 +96,7 @@ MODULE_PARM_DESC(debug, "Enable debug messages [0-3]");
 MODULE_PARM_DESC(stereo_threshold, "Sets signal threshold to activate stereo");
 MODULE_PARM_DESC(standard, "Specify audio standard: 32 = NTSC, 64 = radio, Default: Autodetect");
 MODULE_PARM_DESC(amsound, "Hardwire AM sound at 6.5Hz (France), FM can autoscan");
-MODULE_PARM_DESC(dolby, "Activates Dolby processsing");
+MODULE_PARM_DESC(dolby, "Activates Dolby processing");
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/drivers/media/video/saa7164/saa7164-encoder.c b/drivers/media/video/saa7164/saa7164-encoder.c
index f9d594698832..400364569c8d 100644
--- a/drivers/media/video/saa7164/saa7164-encoder.c
+++ b/drivers/media/video/saa7164/saa7164-encoder.c
@@ -177,7 +177,7 @@ static int saa7164_encoder_buffers_alloc(struct saa7164_port *port)
 		}
 	}
 
-	/* Allocate some kenrel kernel buffers for copying
+	/* Allocate some kernel buffers for copying
 	 * to userpsace.
 	 */
 	len = params->numberoflines * params->pitch;
diff --git a/drivers/media/video/saa7164/saa7164-vbi.c b/drivers/media/video/saa7164/saa7164-vbi.c
index 9e5b01c29cf5..bc1fcedba874 100644
--- a/drivers/media/video/saa7164/saa7164-vbi.c
+++ b/drivers/media/video/saa7164/saa7164-vbi.c
@@ -148,7 +148,7 @@ static int saa7164_vbi_buffers_alloc(struct saa7164_port *port)
 		}
 	}
 
-	/* Allocate some kenrel kernel buffers for copying
+	/* Allocate some kernel buffers for copying
 	 * to userpsace.
 	 */
 	len = params->numberoflines * params->pitch;
diff --git a/drivers/message/i2o/README.ioctl b/drivers/message/i2o/README.ioctl
index 65c0c47aeb79..5fb195af43e2 100644
--- a/drivers/message/i2o/README.ioctl
+++ b/drivers/message/i2o/README.ioctl
@@ -110,7 +110,7 @@ V. Getting Logical Configuration Table
       ENOBUFS     Buffer not large enough.  If this occurs, the required
                   buffer length is written into *(lct->reslen)
 
-VI. Settting Parameters
+VI. Setting Parameters
    
    SYNOPSIS 
  
diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c
index e54712b22c27..d11fbb2b95ff 100644
--- a/drivers/net/can/pch_can.c
+++ b/drivers/net/can/pch_can.c
@@ -653,7 +653,7 @@ static int pch_can_rx_normal(struct net_device *ndev, u32 obj_num, int quota)
 	u16 data_reg;
 
 	do {
-		/* Reading the messsage object from the Message RAM */
+		/* Reading the message object from the Message RAM */
 		iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->ifregs[0].cmask);
 		pch_can_rw_msg_obj(&priv->regs->ifregs[0].creq, obj_num);
 
diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index 872183f29ec4..d532dde5120f 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -1800,7 +1800,7 @@ static int  ali_ircc_dma_receive_complete(struct ali_ircc_cb *self)
 	MessageCount = inb(iobase+ FIR_LSR)&0x07;
 	
 	if (MessageCount > 0)	
-		IRDA_DEBUG(0, "%s(), Messsage count = %d,\n", __func__ , MessageCount);
+		IRDA_DEBUG(0, "%s(), Message count = %d,\n", __func__ , MessageCount);
 		
 	for (i=0; i<=MessageCount; i++)
 	{
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 337bdcd5abc9..665c3d832034 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -7231,7 +7231,7 @@ static void do_s2io_card_down(struct s2io_nic *sp, int do_io)
 		/* As per the HW requirement we need to replenish the
 		 * receive buffer to avoid the ring bump. Since there is
 		 * no intention of processing the Rx frame at this pointwe are
-		 * just settting the ownership bit of rxd in Each Rx
+		 * just setting the ownership bit of rxd in Each Rx
 		 * ring to HW and set the appropriate buffer size
 		 * based on the ring mode
 		 */
diff --git a/drivers/net/ucc_geth_ethtool.c b/drivers/net/ucc_geth_ethtool.c
index 6f92e48f02d3..cfd8881452ac 100644
--- a/drivers/net/ucc_geth_ethtool.c
+++ b/drivers/net/ucc_geth_ethtool.c
@@ -6,7 +6,7 @@
  * Author: Li Yang <leoli@freescale.com>
  *
  * Limitation:
- * Can only get/set setttings of the first queue.
+ * Can only get/set settings of the first queue.
  * Need to re-open the interface manually after changing some parameters.
  *
  * This program is free software; you can redistribute  it and/or modify it
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 069c1cf0fdf7..fef9ecc7b7aa 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -109,7 +109,7 @@ int usbnet_get_endpoints(struct usbnet *dev, struct usb_interface *intf)
 
 		/* take the first altsetting with in-bulk + out-bulk;
 		 * remember any status endpoint, just in case;
-		 * ignore other endpoints and altsetttings.
+		 * ignore other endpoints and altsettings.
 		 */
 		for (ep = 0; ep < alt->desc.bNumEndpoints; ep++) {
 			struct usb_host_endpoint	*e;
diff --git a/drivers/net/wan/pc300_drv.c b/drivers/net/wan/pc300_drv.c
index f875cfae3093..737b59f1a8dc 100644
--- a/drivers/net/wan/pc300_drv.c
+++ b/drivers/net/wan/pc300_drv.c
@@ -1445,7 +1445,7 @@ static void falc_update_stats(pc300_t * card, int ch)
  * Description:	In the remote loopback mode the clock and data recovered
  *		from the line inputs RL1/2 or RDIP/RDIN are routed back
  *		to the line outputs XL1/2 or XDOP/XDON via the analog
- *		transmitter. As in normal mode they are processsed by
+ *		transmitter. As in normal mode they are processed by
  *		the synchronizer and then sent to the system interface.
  *----------------------------------------------------------------------------
  */
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
index 6eadf975ae48..aa1ba5dbfebb 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
@@ -3329,26 +3329,26 @@ static int ar9300_eeprom_restore_internal(struct ath_hw *ah,
 	else
 		cptr = AR9300_BASE_ADDR;
 	ath_dbg(common, ATH_DBG_EEPROM,
-		"Trying EEPROM accesss at Address 0x%04x\n", cptr);
+		"Trying EEPROM access at Address 0x%04x\n", cptr);
 	if (ar9300_check_eeprom_header(ah, read, cptr))
 		goto found;
 
 	cptr = AR9300_BASE_ADDR_512;
 	ath_dbg(common, ATH_DBG_EEPROM,
-		"Trying EEPROM accesss at Address 0x%04x\n", cptr);
+		"Trying EEPROM access at Address 0x%04x\n", cptr);
 	if (ar9300_check_eeprom_header(ah, read, cptr))
 		goto found;
 
 	read = ar9300_read_otp;
 	cptr = AR9300_BASE_ADDR;
 	ath_dbg(common, ATH_DBG_EEPROM,
-		"Trying OTP accesss at Address 0x%04x\n", cptr);
+		"Trying OTP access at Address 0x%04x\n", cptr);
 	if (ar9300_check_eeprom_header(ah, read, cptr))
 		goto found;
 
 	cptr = AR9300_BASE_ADDR_512;
 	ath_dbg(common, ATH_DBG_EEPROM,
-		"Trying OTP accesss at Address 0x%04x\n", cptr);
+		"Trying OTP access at Address 0x%04x\n", cptr);
 	if (ar9300_check_eeprom_header(ah, read, cptr))
 		goto found;
 
diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
index 42c3fe37af64..87813c33bdc2 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/ipw2x00/ipw2200.c
@@ -7430,7 +7430,7 @@ static int ipw_associate_network(struct ipw_priv *priv,
 		priv->assoc_request.capability &=
 		    ~cpu_to_le16(WLAN_CAPABILITY_SHORT_SLOT_TIME);
 
-	IPW_DEBUG_ASSOC("%sssocation attempt: '%s', channel %d, "
+	IPW_DEBUG_ASSOC("%ssociation attempt: '%s', channel %d, "
 			"802.11%c [%d], %s[:%s], enc=%s%s%s%c%c\n",
 			roaming ? "Rea" : "A",
 			print_ssid(ssid, priv->essid, priv->essid_len),
diff --git a/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.c b/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.c
index 078ed600f47a..232aff1fe784 100644
--- a/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.c
+++ b/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.c
@@ -1,5 +1,5 @@
 /*
- * Aic7xxx SCSI host adapter firmware asssembler symbol table implementation
+ * Aic7xxx SCSI host adapter firmware assembler symbol table implementation
  *
  * Copyright (c) 1997 Justin T. Gibbs.
  * Copyright (c) 2002 Adaptec Inc.
diff --git a/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h b/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h
index 2ba73ae7c777..34bbcad7f83f 100644
--- a/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h
+++ b/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h
@@ -1,5 +1,5 @@
 /*
- * Aic7xxx SCSI host adapter firmware asssembler symbol table definitions
+ * Aic7xxx SCSI host adapter firmware assembler symbol table definitions
  *
  * Copyright (c) 1997 Justin T. Gibbs.
  * Copyright (c) 2002 Adaptec Inc.
diff --git a/drivers/scsi/constants.c b/drivers/scsi/constants.c
index d0c82340f0e2..d0cdde57d7d2 100644
--- a/drivers/scsi/constants.c
+++ b/drivers/scsi/constants.c
@@ -34,7 +34,7 @@
 static const char * cdb_byte0_names[] = {
 /* 00-03 */ "Test Unit Ready", "Rezero Unit/Rewind", NULL, "Request Sense",
 /* 04-07 */ "Format Unit/Medium", "Read Block Limits", NULL,
-	    "Reasssign Blocks",
+	    "Reassign Blocks",
 /* 08-0d */ "Read(6)", NULL, "Write(6)", "Seek(6)", NULL, NULL,
 /* 0e-12 */ NULL, "Read Reverse", "Write Filemarks", "Space", "Inquiry",  
 /* 13-16 */ "Verify(6)", "Recover Buffered Data", "Mode Select(6)",
diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c
index 57558523c1b8..182831e1e0b2 100644
--- a/drivers/scsi/esp_scsi.c
+++ b/drivers/scsi/esp_scsi.c
@@ -1059,7 +1059,7 @@ static struct esp_cmd_entry *esp_reconnect_with_tag(struct esp *esp,
 	esp->ops->send_dma_cmd(esp, esp->command_block_dma,
 			       2, 2, 1, ESP_CMD_DMA | ESP_CMD_TI);
 
-	/* ACK the msssage.  */
+	/* ACK the message.  */
 	scsi_esp_cmd(esp, ESP_CMD_MOK);
 
 	for (i = 0; i < ESP_RESELECT_TAG_LIMIT; i++) {
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 17d789325f40..8dcbf8fff673 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -4532,7 +4532,7 @@ lpfc_set_vport_symbolic_name(struct fc_vport *fc_vport)
  *
  * This function is called by the lpfc_get_cfgparam() routine to set the
  * module lpfc_log_verbose into the @phba cfg_log_verbose for use with
- * log messsage according to the module's lpfc_log_verbose parameter setting
+ * log message according to the module's lpfc_log_verbose parameter setting
  * before hba port or vport created.
  **/
 static void
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 95f11ed79463..86b6f7e6686a 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -1002,7 +1002,7 @@ typedef struct _ELS_PKT {	/* Structure is in Big Endian format */
 #define  SLI_MGMT_GRPL     0x102	/* Get registered Port list */
 #define  SLI_MGMT_GPAT     0x110	/* Get Port attributes */
 #define  SLI_MGMT_RHBA     0x200	/* Register HBA */
-#define  SLI_MGMT_RHAT     0x201	/* Register HBA atttributes */
+#define  SLI_MGMT_RHAT     0x201	/* Register HBA attributes */
 #define  SLI_MGMT_RPRT     0x210	/* Register Port */
 #define  SLI_MGMT_RPA      0x211	/* Register Port attributes */
 #define  SLI_MGMT_DHBA     0x300	/* De-register HBA */
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index dacabbe0a586..63c5d627a5a3 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -3040,7 +3040,7 @@ lpfc_sli_sp_handle_rspiocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	list_add_tail(&rspiocbp->list, &(pring->iocb_continueq));
 	pring->iocb_continueq_cnt++;
 
-	/* Now, determine whetehr the list is completed for processing */
+	/* Now, determine whether the list is completed for processing */
 	irsp = &rspiocbp->iocb;
 	if (irsp->ulpLe) {
 		/*
diff --git a/drivers/scsi/nsp32_debug.c b/drivers/scsi/nsp32_debug.c
index 2fb3fb58858d..58806f432a16 100644
--- a/drivers/scsi/nsp32_debug.c
+++ b/drivers/scsi/nsp32_debug.c
@@ -13,7 +13,7 @@ static const char unknown[] = "UNKNOWN";
 
 static const char * group_0_commands[] = {
 /* 00-03 */ "Test Unit Ready", "Rezero Unit", unknown, "Request Sense",
-/* 04-07 */ "Format Unit", "Read Block Limits", unknown, "Reasssign Blocks",
+/* 04-07 */ "Format Unit", "Read Block Limits", unknown, "Reassign Blocks",
 /* 08-0d */ "Read (6)", unknown, "Write (6)", "Seek (6)", unknown, unknown,
 /* 0e-12 */ unknown, "Read Reverse", "Write Filemarks", "Space", "Inquiry",  
 /* 13-16 */ unknown, "Recover Buffered Data", "Mode Select", "Reserve",
diff --git a/drivers/scsi/pcmcia/nsp_debug.c b/drivers/scsi/pcmcia/nsp_debug.c
index 3c6ef64fcbff..6aa7d269d3b3 100644
--- a/drivers/scsi/pcmcia/nsp_debug.c
+++ b/drivers/scsi/pcmcia/nsp_debug.c
@@ -15,7 +15,7 @@ static const char unknown[] = "UNKNOWN";
 
 static const char * group_0_commands[] = {
 /* 00-03 */ "Test Unit Ready", "Rezero Unit", unknown, "Request Sense",
-/* 04-07 */ "Format Unit", "Read Block Limits", unknown, "Reasssign Blocks",
+/* 04-07 */ "Format Unit", "Read Block Limits", unknown, "Reassign Blocks",
 /* 08-0d */ "Read (6)", unknown, "Write (6)", "Seek (6)", unknown, unknown,
 /* 0e-12 */ unknown, "Read Reverse", "Write Filemarks", "Space", "Inquiry",  
 /* 13-16 */ unknown, "Recover Buffered Data", "Mode Select", "Reserve",
diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c
index f9d81c8372c3..d78b58dc5011 100644
--- a/drivers/scsi/qla4xxx/ql4_mbx.c
+++ b/drivers/scsi/qla4xxx/ql4_mbx.c
@@ -19,7 +19,7 @@
  * @mbx_cmd: data pointer for mailbox in registers.
  * @mbx_sts: data pointer for mailbox out registers.
  *
- * This routine isssue mailbox commands and waits for completion.
+ * This routine issue mailbox commands and waits for completion.
  * If outCount is 0, this routine completes successfully WITHOUT waiting
  * for the mailbox command to complete.
  **/
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 74273e638c0d..77623b936538 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -2128,7 +2128,7 @@ static int gsmld_attach_gsm(struct tty_struct *tty, struct gsm_mux *gsm)
 
 /**
  *	gsmld_detach_gsm	-	stop doing 0710 mux
- *	@tty: tty atttached to the mux
+ *	@tty: tty attached to the mux
  *	@gsm: mux
  *
  *	Shutdown and then clean up the resources used by the line discipline
diff --git a/drivers/usb/host/imx21-dbg.c b/drivers/usb/host/imx21-dbg.c
index 512f647448ca..6d7533427163 100644
--- a/drivers/usb/host/imx21-dbg.c
+++ b/drivers/usb/host/imx21-dbg.c
@@ -384,7 +384,7 @@ static void debug_isoc_show_one(struct seq_file *s,
 	seq_printf(s, "%s %d:\n"
 		"cc=0X%02X\n"
 		"scheduled frame %d (%d)\n"
-		"submittted frame %d (%d)\n"
+		"submitted frame %d (%d)\n"
 		"completed frame %d (%d)\n"
 		"requested length=%d\n"
 		"completed length=%d\n\n",
diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
index 388cc128072a..ff9a01f8d405 100644
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -104,7 +104,7 @@ get_endpoints(struct usbtest_dev *dev, struct usb_interface *intf)
 		alt = intf->altsetting + tmp;
 
 		/* take the first altsetting with in-bulk + out-bulk;
-		 * ignore other endpoints and altsetttings.
+		 * ignore other endpoints and altsettings.
 		 */
 		for (ep = 0; ep < alt->desc.bNumEndpoints; ep++) {
 			struct usb_host_endpoint	*e;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 199a80134312..f340f7c99d09 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -709,7 +709,7 @@ again:
 	WARN_ON(cur->checked);
 	if (!list_empty(&cur->upper)) {
 		/*
-		 * the backref was added previously when processsing
+		 * the backref was added previously when processing
 		 * backref of type BTRFS_TREE_BLOCK_REF_KEY
 		 */
 		BUG_ON(!list_is_singular(&cur->upper));
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 2ba6719ac612..1a4311437a8b 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -272,7 +272,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip)
  * *ip:			VFS inode
  *
  * Description:
- *  vxfs_put_fake_inode frees all data asssociated with @ip.
+ *  vxfs_put_fake_inode frees all data associated with @ip.
  */
 void
 vxfs_put_fake_inode(struct inode *ip)
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 5232d3e8fb2f..a2e2402b2afb 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -8,7 +8,7 @@
  *			Statistsics for the reply cache
  *	fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache>
  *			statistics for filehandle lookup
- *	io <bytes-read> <bytes-writtten>
+ *	io <bytes-read> <bytes-written>
  *			statistics for IO throughput
  *	th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%> 
  *			time (seconds) when nfsd thread usage above thresholds
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index efc309fa3035..7797218d0b30 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -42,7 +42,7 @@ config SQUASHFS_LZO
 	select LZO_DECOMPRESS
 	help
 	  Saying Y here includes support for reading Squashfs file systems
-	  compressed with LZO compresssion.  LZO compression is mainly
+	  compressed with LZO compression.  LZO compression is mainly
 	  aimed at embedded systems with slower CPUs where the overheads
 	  of zlib are too high.
 
@@ -57,7 +57,7 @@ config SQUASHFS_XZ
 	select XZ_DEC
 	help
 	  Saying Y here includes support for reading Squashfs file systems
-	  compressed with XZ compresssion.  XZ gives better compression than
+	  compressed with XZ compression.  XZ gives better compression than
 	  the default zlib compression, at the expense of greater CPU and
 	  memory overhead.
 
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index c37b520132ff..4b5a3fbb1f1f 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -29,7 +29,7 @@
  * plus functions layered ontop of the generic cache implementation to
  * access the metadata and fragment caches.
  *
- * To avoid out of memory and fragmentation isssues with vmalloc the cache
+ * To avoid out of memory and fragmentation issues with vmalloc the cache
  * uses sequences of kmalloced PAGE_CACHE_SIZE buffers.
  *
  * It should be noted that the cache is not used for file datablocks, these
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a37480a6e023..d11ce613d692 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1470,7 +1470,7 @@ xfs_itruncate_finish(
 	 * file but the log buffers containing the free and reallocation
 	 * don't, then we'd end up with garbage in the blocks being freed.
 	 * As long as we make the new_size permanent before actually
-	 * freeing any blocks it doesn't matter if they get writtten to.
+	 * freeing any blocks it doesn't matter if they get written to.
 	 *
 	 * The callers must signal into us whether or not the size
 	 * setting here must be synchronous.  There are a few cases
diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
index 7f1183dcd119..34c4498b800f 100644
--- a/include/linux/posix-clock.h
+++ b/include/linux/posix-clock.h
@@ -45,7 +45,7 @@ struct posix_clock;
  * @timer_create:   Create a new timer
  * @timer_delete:   Remove a previously created timer
  * @timer_gettime:  Get remaining time and interval of a timer
- * @timer_setttime: Set a timer's initial expiration and interval
+ * @timer_settime: Set a timer's initial expiration and interval
  * @fasync:         Optional character device fasync method
  * @mmap:           Optional character device mmap method
  * @open:           Optional character device open method
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index b4c7c1cbcf40..d420f28b6d60 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -260,7 +260,7 @@ static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info)
 
 /**
  * gennlmsg_data - head of message payload
- * @gnlh: genetlink messsage header
+ * @gnlh: genetlink message header
  */
 static inline void *genlmsg_data(const struct genlmsghdr *gnlh)
 {
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 8a3906a08f5f..02740a94f108 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -290,7 +290,7 @@ static inline int nlmsg_padlen(int payload)
 
 /**
  * nlmsg_data - head of message payload
- * @nlh: netlink messsage header
+ * @nlh: netlink message header
  */
 static inline void *nlmsg_data(const struct nlmsghdr *nlh)
 {
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 1a10dcd999ea..6c014dd3a20b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -333,7 +333,7 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
 }
 
 /*
- * Processs a completion context
+ * Process a completion context
  */
 static void process_context(struct svcxprt_rdma *xprt,
 			    struct svc_rdma_op_ctxt *ctxt)
-- 
cgit v1.2.3-71-gd317


From 6c6de1aa65189c37cc3c9af78da756c06a99899b Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Sat, 30 Apr 2011 18:56:12 +0900
Subject: nilfs2: super root size should change depending on inode size

The size of super root structure depends on inode size, so
NILFS_SR_BYTES macro should be a function of the inode size.  This
fixes the issue.

Even though a different size value will be written for a possible
future filesystem with extended inode, but fortunately this does not
break disk format compatibility.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/segbuf.c        | 5 ++++-
 fs/nilfs2/segment.c       | 2 +-
 include/linux/nilfs2_fs.h | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 410ec2b1af4f..850a7c0228fb 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -239,12 +239,15 @@ nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf,
 				    u32 seed)
 {
 	struct nilfs_super_root *raw_sr;
+	struct the_nilfs *nilfs = segbuf->sb_super->s_fs_info;
+	unsigned srsize;
 	u32 crc;
 
 	raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data;
+	srsize = NILFS_SR_BYTES(nilfs->ns_inode_size);
 	crc = crc32_le(seed,
 		       (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
-		       NILFS_SR_BYTES - sizeof(raw_sr->sr_sum));
+		       srsize - sizeof(raw_sr->sr_sum));
 	raw_sr->sr_sum = cpu_to_le32(crc);
 }
 
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index abbfab974700..8006d0cd4440 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -894,7 +894,7 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
 	bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
 	raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
 
-	raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES);
+	raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES(isz));
 	raw_sr->sr_nongc_ctime
 		= cpu_to_le64(nilfs_doing_gc() ?
 			      nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 8768c469e93e..bd8678f8a421 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -107,7 +107,7 @@ struct nilfs_super_root {
 #define NILFS_SR_DAT_OFFSET(inode_size)     NILFS_SR_MDT_OFFSET(inode_size, 0)
 #define NILFS_SR_CPFILE_OFFSET(inode_size)  NILFS_SR_MDT_OFFSET(inode_size, 1)
 #define NILFS_SR_SUFILE_OFFSET(inode_size)  NILFS_SR_MDT_OFFSET(inode_size, 2)
-#define NILFS_SR_BYTES                  (sizeof(struct nilfs_super_root))
+#define NILFS_SR_BYTES(inode_size)	    NILFS_SR_MDT_OFFSET(inode_size, 3)
 
 /*
  * Maximal mount counts
-- 
cgit v1.2.3-71-gd317


From 619205da5b567504310daf829dede1187fa29bbc Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Thu, 5 May 2011 01:23:57 +0900
Subject: nilfs2: add ioctl which limits range of segment to be allocated

This adds a new ioctl command which limits range of segment to be
allocated.  This is intended to gather data whithin a range of the
partition before shrinking the filesystem, or to control new log
location for some purpose.

If a range is specified by the ioctl, segment allocator of nilfs tries
to allocate new segments from the range unless no free segments are
available there.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/ioctl.c         | 34 ++++++++++++++++++++++
 fs/nilfs2/sufile.c        | 73 ++++++++++++++++++++++++++++++++++++++++-------
 fs/nilfs2/sufile.h        |  1 +
 include/linux/nilfs2_fs.h |  2 ++
 4 files changed, 100 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index f2469ba6246b..6f617773a7f7 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -698,6 +698,38 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
 	return 0;
 }
 
+static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
+{
+	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+	__u64 range[2];
+	__u64 minseg, maxseg;
+	unsigned long segbytes;
+	int ret = -EPERM;
+
+	if (!capable(CAP_SYS_ADMIN))
+		goto out;
+
+	ret = -EFAULT;
+	if (copy_from_user(range, argp, sizeof(__u64[2])))
+		goto out;
+
+	ret = -ERANGE;
+	if (range[1] > i_size_read(inode->i_sb->s_bdev->bd_inode))
+		goto out;
+
+	segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize;
+
+	minseg = range[0] + segbytes - 1;
+	do_div(minseg, segbytes);
+	maxseg = NILFS_SB2_OFFSET_BYTES(range[1]);
+	do_div(maxseg, segbytes);
+	maxseg--;
+
+	ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg);
+out:
+	return ret;
+}
+
 static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
 				unsigned int cmd, void __user *argp,
 				size_t membsz,
@@ -763,6 +795,8 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return nilfs_ioctl_clean_segments(inode, filp, cmd, argp);
 	case NILFS_IOCTL_SYNC:
 		return nilfs_ioctl_sync(inode, filp, cmd, argp);
+	case NILFS_IOCTL_SET_ALLOC_RANGE:
+		return nilfs_ioctl_set_alloc_range(inode, argp);
 	default:
 		return -ENOTTY;
 	}
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 1d6f488ccae8..f4374df00ad5 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -33,7 +33,9 @@
 
 struct nilfs_sufile_info {
 	struct nilfs_mdt_info mi;
-	unsigned long ncleansegs;
+	unsigned long ncleansegs;/* number of clean segments */
+	__u64 allocmin;		/* lower limit of allocatable segment range */
+	__u64 allocmax;		/* upper limit of allocatable segment range */
 };
 
 static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile)
@@ -247,6 +249,35 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
 	return ret;
 }
 
+/**
+ * nilfs_sufile_set_alloc_range - limit range of segment to be allocated
+ * @sufile: inode of segment usage file
+ * @start: minimum segment number of allocatable region (inclusive)
+ * @end: maximum segment number of allocatable region (inclusive)
+ *
+ * Return Value: On success, 0 is returned.  On error, one of the
+ * following negative error codes is returned.
+ *
+ * %-ERANGE - invalid segment region
+ */
+int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end)
+{
+	struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
+	__u64 nsegs;
+	int ret = -ERANGE;
+
+	down_write(&NILFS_MDT(sufile)->mi_sem);
+	nsegs = nilfs_sufile_get_nsegments(sufile);
+
+	if (start <= end && end < nsegs) {
+		sui->allocmin = start;
+		sui->allocmax = end;
+		ret = 0;
+	}
+	up_write(&NILFS_MDT(sufile)->mi_sem);
+	return ret;
+}
+
 /**
  * nilfs_sufile_alloc - allocate a segment
  * @sufile: inode of segment usage file
@@ -269,11 +300,12 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
 	struct buffer_head *header_bh, *su_bh;
 	struct nilfs_sufile_header *header;
 	struct nilfs_segment_usage *su;
+	struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
 	size_t susz = NILFS_MDT(sufile)->mi_entry_size;
 	__u64 segnum, maxsegnum, last_alloc;
 	void *kaddr;
-	unsigned long nsegments, ncleansegs, nsus;
-	int ret, i, j;
+	unsigned long nsegments, ncleansegs, nsus, cnt;
+	int ret, j;
 
 	down_write(&NILFS_MDT(sufile)->mi_sem);
 
@@ -287,13 +319,31 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
 	kunmap_atomic(kaddr, KM_USER0);
 
 	nsegments = nilfs_sufile_get_nsegments(sufile);
+	maxsegnum = sui->allocmax;
 	segnum = last_alloc + 1;
-	maxsegnum = nsegments - 1;
-	for (i = 0; i < nsegments; i += nsus) {
-		if (segnum >= nsegments) {
-			/* wrap around */
-			segnum = 0;
-			maxsegnum = last_alloc;
+	if (segnum < sui->allocmin || segnum > sui->allocmax)
+		segnum = sui->allocmin;
+
+	for (cnt = 0; cnt < nsegments; cnt += nsus) {
+		if (segnum > maxsegnum) {
+			if (cnt < sui->allocmax - sui->allocmin + 1) {
+				/*
+				 * wrap around in the limited region.
+				 * if allocation started from
+				 * sui->allocmin, this never happens.
+				 */
+				segnum = sui->allocmin;
+				maxsegnum = last_alloc;
+			} else if (segnum > sui->allocmin &&
+				   sui->allocmax + 1 < nsegments) {
+				segnum = sui->allocmax + 1;
+				maxsegnum = nsegments - 1;
+			} else if (sui->allocmin > 0)  {
+				segnum = 0;
+				maxsegnum = sui->allocmin - 1;
+			} else {
+				break; /* never happens */
+			}
 		}
 		ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1,
 							   &su_bh);
@@ -319,7 +369,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
 			header->sh_last_alloc = cpu_to_le64(segnum);
 			kunmap_atomic(kaddr, KM_USER0);
 
-			NILFS_SUI(sufile)->ncleansegs--;
+			sui->ncleansegs--;
 			nilfs_mdt_mark_buffer_dirty(header_bh);
 			nilfs_mdt_mark_buffer_dirty(su_bh);
 			nilfs_mdt_mark_dirty(sufile);
@@ -679,6 +729,9 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
 	kunmap_atomic(kaddr, KM_USER0);
 	brelse(header_bh);
 
+	sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1;
+	sui->allocmin = 0;
+
 	unlock_new_inode(sufile);
  out:
 	*inodep = sufile;
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index a943fbacb45b..57bfee9cd02d 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -36,6 +36,7 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
 
 unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile);
 
+int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end);
 int nilfs_sufile_alloc(struct inode *, __u64 *);
 int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum);
 int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index bd8678f8a421..7454ad7451b4 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -845,5 +845,7 @@ struct nilfs_bdesc {
 	_IOR(NILFS_IOCTL_IDENT, 0x8A, __u64)
 #define NILFS_IOCTL_RESIZE  \
 	_IOW(NILFS_IOCTL_IDENT, 0x8B, __u64)
+#define NILFS_IOCTL_SET_ALLOC_RANGE  \
+	_IOW(NILFS_IOCTL_IDENT, 0x8C, __u64[2])
 
 #endif	/* _LINUX_NILFS_FS_H */
-- 
cgit v1.2.3-71-gd317


From 8369ae33b705222aa05ab53c7d6b4458f4ed161b Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Mon, 9 May 2011 18:56:46 +0200
Subject: bcma: add Broadcom specific AMBA bus driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Broadcom has released cards based on a new AMBA-based bus type. From a
programming point of view, this new bus type differs from AMBA and does
not use AMBA common registers. It also differs enough from SSB. We
decided that a new bus driver is needed to keep the code clean.

In its current form, the driver detects devices present on the bus and
registers them in the system. It allows registering BCMA drivers for
specified bus devices and provides them basic operations. The bus driver
itself includes two important bus managing drivers: ChipCommon core
driver and PCI(c) core driver. They are early used to allow correct
initialization.

Currently code is limited to supporting buses on PCI(e) devices, however
the driver is designed to be used also on other hosts. The host
abstraction layer is implemented and already used for PCI(e).

Support for PCI(e) hosts is working and seems to be stable (access to
80211 core was tested successfully on a few devices). We can still
optimize it by using some fixed windows, but this can be done later
without affecting any external code. Windows are just ranges in MMIO
used for accessing cores on the bus.

Cc: Greg KH <greg@kroah.com>
Cc: Michael Büsch <mb@bu3sch.de>
Cc: Larry Finger <Larry.Finger@lwfinger.net>
Cc: George Kashperko <george@znau.edu.ua>
Cc: Arend van Spriel <arend@broadcom.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Andy Botting <andy@andybotting.com>
Cc: linuxdriverproject <devel@linuxdriverproject.org>
Cc: linux-kernel@vger.kernel.org <linux-kernel@vger.kernel.org>
Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/ABI/testing/sysfs-bus-bcma    |  31 +++
 MAINTAINERS                                 |   7 +
 drivers/Kconfig                             |   2 +
 drivers/Makefile                            |   1 +
 drivers/bcma/Kconfig                        |  33 +++
 drivers/bcma/Makefile                       |   7 +
 drivers/bcma/README                         |  19 ++
 drivers/bcma/TODO                           |   3 +
 drivers/bcma/bcma_private.h                 |  28 +++
 drivers/bcma/core.c                         |  51 ++++
 drivers/bcma/driver_chipcommon.c            |  87 +++++++
 drivers/bcma/driver_chipcommon_pmu.c        | 134 +++++++++++
 drivers/bcma/driver_pci.c                   | 163 +++++++++++++
 drivers/bcma/host_pci.c                     | 196 +++++++++++++++
 drivers/bcma/main.c                         | 247 +++++++++++++++++++
 drivers/bcma/scan.c                         | 360 ++++++++++++++++++++++++++++
 drivers/bcma/scan.h                         |  56 +++++
 include/linux/bcma/bcma.h                   | 224 +++++++++++++++++
 include/linux/bcma/bcma_driver_chipcommon.h | 297 +++++++++++++++++++++++
 include/linux/bcma/bcma_driver_pci.h        |  89 +++++++
 include/linux/bcma/bcma_regs.h              |  34 +++
 include/linux/mod_devicetable.h             |  17 ++
 scripts/mod/file2alias.c                    |  22 ++
 23 files changed, 2108 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-bcma
 create mode 100644 drivers/bcma/Kconfig
 create mode 100644 drivers/bcma/Makefile
 create mode 100644 drivers/bcma/README
 create mode 100644 drivers/bcma/TODO
 create mode 100644 drivers/bcma/bcma_private.h
 create mode 100644 drivers/bcma/core.c
 create mode 100644 drivers/bcma/driver_chipcommon.c
 create mode 100644 drivers/bcma/driver_chipcommon_pmu.c
 create mode 100644 drivers/bcma/driver_pci.c
 create mode 100644 drivers/bcma/host_pci.c
 create mode 100644 drivers/bcma/main.c
 create mode 100644 drivers/bcma/scan.c
 create mode 100644 drivers/bcma/scan.h
 create mode 100644 include/linux/bcma/bcma.h
 create mode 100644 include/linux/bcma/bcma_driver_chipcommon.h
 create mode 100644 include/linux/bcma/bcma_driver_pci.h
 create mode 100644 include/linux/bcma/bcma_regs.h

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-bcma b/Documentation/ABI/testing/sysfs-bus-bcma
new file mode 100644
index 000000000000..06b62badddd1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-bcma
@@ -0,0 +1,31 @@
+What:		/sys/bus/bcma/devices/.../manuf
+Date:		May 2011
+KernelVersion:	2.6.40
+Contact:	Rafał Miłecki <zajec5@gmail.com>
+Description:
+		Each BCMA core has it's manufacturer id. See
+		include/linux/bcma/bcma.h for possible values.
+
+What:		/sys/bus/bcma/devices/.../id
+Date:		May 2011
+KernelVersion:	2.6.40
+Contact:	Rafał Miłecki <zajec5@gmail.com>
+Description:
+		There are a few types of BCMA cores, they can be identified by
+		id field.
+
+What:		/sys/bus/bcma/devices/.../rev
+Date:		May 2011
+KernelVersion:	2.6.40
+Contact:	Rafał Miłecki <zajec5@gmail.com>
+Description:
+		BCMA cores of the same type can still slightly differ depending
+		on their revision. Use it for detailed programming.
+
+What:		/sys/bus/bcma/devices/.../class
+Date:		May 2011
+KernelVersion:	2.6.40
+Contact:	Rafał Miłecki <zajec5@gmail.com>
+Description:
+		Each BCMA core is identified by few fields, including class it
+		belongs to. See include/linux/bcma/bcma.h for possible values.
diff --git a/MAINTAINERS b/MAINTAINERS
index 9f9104987a73..df5585819a62 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5810,6 +5810,13 @@ S:	Maintained
 F:	drivers/ssb/
 F:	include/linux/ssb/
 
+BROADCOM SPECIFIC AMBA DRIVER (BCMA)
+M:	Rafał Miłecki <zajec5@gmail.com>
+L:	linux-wireless@vger.kernel.org
+S:	Maintained
+F:	drivers/bcma/
+F:	include/linux/bcma/
+
 SONY VAIO CONTROL DEVICE DRIVER
 M:	Mattia Dongili <malattia@linux.it>
 L:	platform-driver-x86@vger.kernel.org
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 177c7d156933..aca706751469 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -68,6 +68,8 @@ source "drivers/watchdog/Kconfig"
 
 source "drivers/ssb/Kconfig"
 
+source "drivers/bcma/Kconfig"
+
 source "drivers/mfd/Kconfig"
 
 source "drivers/regulator/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 3f135b6fb014..a29527f4ded6 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -110,6 +110,7 @@ obj-$(CONFIG_HID)		+= hid/
 obj-$(CONFIG_PPC_PS3)		+= ps3/
 obj-$(CONFIG_OF)		+= of/
 obj-$(CONFIG_SSB)		+= ssb/
+obj-$(CONFIG_BCMA)		+= bcma/
 obj-$(CONFIG_VHOST_NET)		+= vhost/
 obj-$(CONFIG_VLYNQ)		+= vlynq/
 obj-$(CONFIG_STAGING)		+= staging/
diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig
new file mode 100644
index 000000000000..353781b5b78b
--- /dev/null
+++ b/drivers/bcma/Kconfig
@@ -0,0 +1,33 @@
+config BCMA_POSSIBLE
+	bool
+	depends on HAS_IOMEM && HAS_DMA
+	default y
+
+menu "Broadcom specific AMBA"
+	depends on BCMA_POSSIBLE
+
+config BCMA
+	tristate "BCMA support"
+	depends on BCMA_POSSIBLE
+	help
+	  Bus driver for Broadcom specific Advanced Microcontroller Bus
+	  Architecture.
+
+config BCMA_HOST_PCI_POSSIBLE
+	bool
+	depends on BCMA && PCI = y
+	default y
+
+config BCMA_HOST_PCI
+	bool "Support for BCMA on PCI-host bus"
+	depends on BCMA_HOST_PCI_POSSIBLE
+
+config BCMA_DEBUG
+	bool "BCMA debugging"
+	depends on BCMA
+	help
+	  This turns on additional debugging messages.
+
+	  If unsure, say N
+
+endmenu
diff --git a/drivers/bcma/Makefile b/drivers/bcma/Makefile
new file mode 100644
index 000000000000..0d56245bcb79
--- /dev/null
+++ b/drivers/bcma/Makefile
@@ -0,0 +1,7 @@
+bcma-y					+= main.o scan.o core.o
+bcma-y					+= driver_chipcommon.o driver_chipcommon_pmu.o
+bcma-y					+= driver_pci.o
+bcma-$(CONFIG_BCMA_HOST_PCI)		+= host_pci.o
+obj-$(CONFIG_BCMA)			+= bcma.o
+
+ccflags-$(CONFIG_BCMA_DEBUG)		:= -DDEBUG
diff --git a/drivers/bcma/README b/drivers/bcma/README
new file mode 100644
index 000000000000..f7e7ce46c603
--- /dev/null
+++ b/drivers/bcma/README
@@ -0,0 +1,19 @@
+Broadcom introduced new bus as replacement for older SSB. It is based on AMBA,
+however from programming point of view there is nothing AMBA specific we use.
+
+Standard AMBA drivers are platform specific, have hardcoded addresses and use
+AMBA standard fields like CID and PID.
+
+In case of Broadcom's cards every device consists of:
+1) Broadcom specific AMBA device. It is put on AMBA bus, but can not be treated
+   as standard AMBA device. Reading it's CID or PID can cause machine lockup.
+2) AMBA standard devices called ports or wrappers. They have CIDs (AMBA_CID)
+   and PIDs (0x103BB369), but we do not use that info for anything. One of that
+   devices is used for managing Broadcom specific core.
+
+Addresses of AMBA devices are not hardcoded in driver and have to be read from
+EPROM.
+
+In this situation we decided to introduce separated bus. It can contain up to
+16 devices identified by Broadcom specific fields: manufacturer, id, revision
+and class.
diff --git a/drivers/bcma/TODO b/drivers/bcma/TODO
new file mode 100644
index 000000000000..da7aa99fe81c
--- /dev/null
+++ b/drivers/bcma/TODO
@@ -0,0 +1,3 @@
+- Interrupts
+- Defines for PCI core driver
+- Create kernel Documentation (use info from README)
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
new file mode 100644
index 000000000000..2f72e9c585fd
--- /dev/null
+++ b/drivers/bcma/bcma_private.h
@@ -0,0 +1,28 @@
+#ifndef LINUX_BCMA_PRIVATE_H_
+#define LINUX_BCMA_PRIVATE_H_
+
+#ifndef pr_fmt
+#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
+#endif
+
+#include <linux/bcma/bcma.h>
+#include <linux/delay.h>
+
+#define BCMA_CORE_SIZE		0x1000
+
+struct bcma_bus;
+
+/* main.c */
+extern int bcma_bus_register(struct bcma_bus *bus);
+extern void bcma_bus_unregister(struct bcma_bus *bus);
+
+/* scan.c */
+int bcma_bus_scan(struct bcma_bus *bus);
+
+#ifdef CONFIG_BCMA_HOST_PCI
+/* host_pci.c */
+extern int __init bcma_host_pci_init(void);
+extern void __exit bcma_host_pci_exit(void);
+#endif /* CONFIG_BCMA_HOST_PCI */
+
+#endif
diff --git a/drivers/bcma/core.c b/drivers/bcma/core.c
new file mode 100644
index 000000000000..ced379f7b371
--- /dev/null
+++ b/drivers/bcma/core.c
@@ -0,0 +1,51 @@
+/*
+ * Broadcom specific AMBA
+ * Core ops
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bcma_private.h"
+#include <linux/bcma/bcma.h>
+
+bool bcma_core_is_enabled(struct bcma_device *core)
+{
+	if ((bcma_aread32(core, BCMA_IOCTL) & (BCMA_IOCTL_CLK | BCMA_IOCTL_FGC))
+	    != BCMA_IOCTL_CLK)
+		return false;
+	if (bcma_aread32(core, BCMA_RESET_CTL) & BCMA_RESET_CTL_RESET)
+		return false;
+	return true;
+}
+EXPORT_SYMBOL_GPL(bcma_core_is_enabled);
+
+static void bcma_core_disable(struct bcma_device *core, u32 flags)
+{
+	if (bcma_aread32(core, BCMA_RESET_CTL) & BCMA_RESET_CTL_RESET)
+		return;
+
+	bcma_awrite32(core, BCMA_IOCTL, flags);
+	bcma_aread32(core, BCMA_IOCTL);
+	udelay(10);
+
+	bcma_awrite32(core, BCMA_RESET_CTL, BCMA_RESET_CTL_RESET);
+	udelay(1);
+}
+
+int bcma_core_enable(struct bcma_device *core, u32 flags)
+{
+	bcma_core_disable(core, flags);
+
+	bcma_awrite32(core, BCMA_IOCTL, (BCMA_IOCTL_CLK | BCMA_IOCTL_FGC | flags));
+	bcma_aread32(core, BCMA_IOCTL);
+
+	bcma_awrite32(core, BCMA_RESET_CTL, 0);
+	udelay(1);
+
+	bcma_awrite32(core, BCMA_IOCTL, (BCMA_IOCTL_CLK | flags));
+	bcma_aread32(core, BCMA_IOCTL);
+	udelay(1);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcma_core_enable);
diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
new file mode 100644
index 000000000000..caf596091d4d
--- /dev/null
+++ b/drivers/bcma/driver_chipcommon.c
@@ -0,0 +1,87 @@
+/*
+ * Broadcom specific AMBA
+ * ChipCommon core driver
+ *
+ * Copyright 2005, Broadcom Corporation
+ * Copyright 2006, 2007, Michael Buesch <mb@bu3sch.de>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bcma_private.h"
+#include <linux/bcma/bcma.h>
+
+static inline u32 bcma_cc_write32_masked(struct bcma_drv_cc *cc, u16 offset,
+					 u32 mask, u32 value)
+{
+	value &= mask;
+	value |= bcma_cc_read32(cc, offset) & ~mask;
+	bcma_cc_write32(cc, offset, value);
+
+	return value;
+}
+
+void bcma_core_chipcommon_init(struct bcma_drv_cc *cc)
+{
+	if (cc->core->id.rev >= 11)
+		cc->status = bcma_cc_read32(cc, BCMA_CC_CHIPSTAT);
+	cc->capabilities = bcma_cc_read32(cc, BCMA_CC_CAP);
+	if (cc->core->id.rev >= 35)
+		cc->capabilities_ext = bcma_cc_read32(cc, BCMA_CC_CAP_EXT);
+
+	bcma_cc_write32(cc, 0x58, 0);
+	bcma_cc_write32(cc, 0x5C, 0);
+
+	if (cc->capabilities & BCMA_CC_CAP_PMU)
+		bcma_pmu_init(cc);
+	if (cc->capabilities & BCMA_CC_CAP_PCTL)
+		pr_err("Power control not implemented!\n");
+}
+
+/* Set chip watchdog reset timer to fire in 'ticks' backplane cycles */
+void bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks)
+{
+	/* instant NMI */
+	bcma_cc_write32(cc, BCMA_CC_WATCHDOG, ticks);
+}
+
+void bcma_chipco_irq_mask(struct bcma_drv_cc *cc, u32 mask, u32 value)
+{
+	bcma_cc_write32_masked(cc, BCMA_CC_IRQMASK, mask, value);
+}
+
+u32 bcma_chipco_irq_status(struct bcma_drv_cc *cc, u32 mask)
+{
+	return bcma_cc_read32(cc, BCMA_CC_IRQSTAT) & mask;
+}
+
+u32 bcma_chipco_gpio_in(struct bcma_drv_cc *cc, u32 mask)
+{
+	return bcma_cc_read32(cc, BCMA_CC_GPIOIN) & mask;
+}
+
+u32 bcma_chipco_gpio_out(struct bcma_drv_cc *cc, u32 mask, u32 value)
+{
+	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOOUT, mask, value);
+}
+
+u32 bcma_chipco_gpio_outen(struct bcma_drv_cc *cc, u32 mask, u32 value)
+{
+	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOOUTEN, mask, value);
+}
+
+u32 bcma_chipco_gpio_control(struct bcma_drv_cc *cc, u32 mask, u32 value)
+{
+	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOCTL, mask, value);
+}
+EXPORT_SYMBOL_GPL(bcma_chipco_gpio_control);
+
+u32 bcma_chipco_gpio_intmask(struct bcma_drv_cc *cc, u32 mask, u32 value)
+{
+	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOIRQ, mask, value);
+}
+
+u32 bcma_chipco_gpio_polarity(struct bcma_drv_cc *cc, u32 mask, u32 value)
+{
+	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOPOL, mask, value);
+}
diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c
new file mode 100644
index 000000000000..f44177a644c7
--- /dev/null
+++ b/drivers/bcma/driver_chipcommon_pmu.c
@@ -0,0 +1,134 @@
+/*
+ * Broadcom specific AMBA
+ * ChipCommon Power Management Unit driver
+ *
+ * Copyright 2009, Michael Buesch <mb@bu3sch.de>
+ * Copyright 2007, Broadcom Corporation
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bcma_private.h"
+#include <linux/bcma/bcma.h>
+
+static void bcma_chipco_chipctl_maskset(struct bcma_drv_cc *cc,
+					u32 offset, u32 mask, u32 set)
+{
+	u32 value;
+
+	bcma_cc_read32(cc, BCMA_CC_CHIPCTL_ADDR);
+	bcma_cc_write32(cc, BCMA_CC_CHIPCTL_ADDR, offset);
+	bcma_cc_read32(cc, BCMA_CC_CHIPCTL_ADDR);
+	value = bcma_cc_read32(cc, BCMA_CC_CHIPCTL_DATA);
+	value &= mask;
+	value |= set;
+	bcma_cc_write32(cc, BCMA_CC_CHIPCTL_DATA, value);
+	bcma_cc_read32(cc, BCMA_CC_CHIPCTL_DATA);
+}
+
+static void bcma_pmu_pll_init(struct bcma_drv_cc *cc)
+{
+	struct bcma_bus *bus = cc->core->bus;
+
+	switch (bus->chipinfo.id) {
+	case 0x4313:
+	case 0x4331:
+	case 43224:
+	case 43225:
+		break;
+	default:
+		pr_err("PLL init unknown for device 0x%04X\n",
+			bus->chipinfo.id);
+	}
+}
+
+static void bcma_pmu_resources_init(struct bcma_drv_cc *cc)
+{
+	struct bcma_bus *bus = cc->core->bus;
+	u32 min_msk = 0, max_msk = 0;
+
+	switch (bus->chipinfo.id) {
+	case 0x4313:
+		min_msk = 0x200D;
+		max_msk = 0xFFFF;
+		break;
+	case 43224:
+		break;
+	default:
+		pr_err("PMU resource config unknown for device 0x%04X\n",
+			bus->chipinfo.id);
+	}
+
+	/* Set the resource masks. */
+	if (min_msk)
+		bcma_cc_write32(cc, BCMA_CC_PMU_MINRES_MSK, min_msk);
+	if (max_msk)
+		bcma_cc_write32(cc, BCMA_CC_PMU_MAXRES_MSK, max_msk);
+}
+
+void bcma_pmu_swreg_init(struct bcma_drv_cc *cc)
+{
+	struct bcma_bus *bus = cc->core->bus;
+
+	switch (bus->chipinfo.id) {
+	case 0x4313:
+	case 0x4331:
+	case 43224:
+		break;
+	default:
+		pr_err("PMU switch/regulators init unknown for device "
+			"0x%04X\n", bus->chipinfo.id);
+	}
+}
+
+void bcma_pmu_workarounds(struct bcma_drv_cc *cc)
+{
+	struct bcma_bus *bus = cc->core->bus;
+
+	switch (bus->chipinfo.id) {
+	case 0x4313:
+		bcma_chipco_chipctl_maskset(cc, 0, ~0, 0x7);
+		break;
+	case 0x4331:
+		pr_err("Enabling Ext PA lines not implemented\n");
+		break;
+	case 43224:
+		if (bus->chipinfo.rev == 0) {
+			pr_err("Workarounds for 43224 rev 0 not fully "
+				"implemented\n");
+			bcma_chipco_chipctl_maskset(cc, 0, ~0, 0xF0);
+		} else {
+			bcma_chipco_chipctl_maskset(cc, 0, ~0, 0xF0);
+		}
+		break;
+	default:
+		pr_err("Workarounds unknown for device 0x%04X\n",
+			bus->chipinfo.id);
+	}
+}
+
+void bcma_pmu_init(struct bcma_drv_cc *cc)
+{
+	u32 pmucap;
+
+	pmucap = bcma_cc_read32(cc, BCMA_CC_PMU_CAP);
+	cc->pmu.rev = (pmucap & BCMA_CC_PMU_CAP_REVISION);
+
+	pr_debug("Found rev %u PMU (capabilities 0x%08X)\n", cc->pmu.rev,
+		 pmucap);
+
+	if (cc->pmu.rev == 1)
+		bcma_cc_mask32(cc, BCMA_CC_PMU_CTL,
+			      ~BCMA_CC_PMU_CTL_NOILPONW);
+	else
+		bcma_cc_set32(cc, BCMA_CC_PMU_CTL,
+			     BCMA_CC_PMU_CTL_NOILPONW);
+
+	if (cc->core->id.id == 0x4329 && cc->core->id.rev == 2)
+		pr_err("Fix for 4329b0 bad LPOM state not implemented!\n");
+
+	bcma_pmu_pll_init(cc);
+	bcma_pmu_resources_init(cc);
+	bcma_pmu_swreg_init(cc);
+	bcma_pmu_workarounds(cc);
+}
diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c
new file mode 100644
index 000000000000..b98b8359bef5
--- /dev/null
+++ b/drivers/bcma/driver_pci.c
@@ -0,0 +1,163 @@
+/*
+ * Broadcom specific AMBA
+ * PCI Core
+ *
+ * Copyright 2005, Broadcom Corporation
+ * Copyright 2006, 2007, Michael Buesch <mb@bu3sch.de>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bcma_private.h"
+#include <linux/bcma/bcma.h>
+
+/**************************************************
+ * R/W ops.
+ **************************************************/
+
+static u32 bcma_pcie_read(struct bcma_drv_pci *pc, u32 address)
+{
+	pcicore_write32(pc, 0x130, address);
+	pcicore_read32(pc, 0x130);
+	return pcicore_read32(pc, 0x134);
+}
+
+#if 0
+static void bcma_pcie_write(struct bcma_drv_pci *pc, u32 address, u32 data)
+{
+	pcicore_write32(pc, 0x130, address);
+	pcicore_read32(pc, 0x130);
+	pcicore_write32(pc, 0x134, data);
+}
+#endif
+
+static void bcma_pcie_mdio_set_phy(struct bcma_drv_pci *pc, u8 phy)
+{
+	const u16 mdio_control = 0x128;
+	const u16 mdio_data = 0x12C;
+	u32 v;
+	int i;
+
+	v = (1 << 30); /* Start of Transaction */
+	v |= (1 << 28); /* Write Transaction */
+	v |= (1 << 17); /* Turnaround */
+	v |= (0x1F << 18);
+	v |= (phy << 4);
+	pcicore_write32(pc, mdio_data, v);
+
+	udelay(10);
+	for (i = 0; i < 200; i++) {
+		v = pcicore_read32(pc, mdio_control);
+		if (v & 0x100 /* Trans complete */)
+			break;
+		msleep(1);
+	}
+}
+
+static u16 bcma_pcie_mdio_read(struct bcma_drv_pci *pc, u8 device, u8 address)
+{
+	const u16 mdio_control = 0x128;
+	const u16 mdio_data = 0x12C;
+	int max_retries = 10;
+	u16 ret = 0;
+	u32 v;
+	int i;
+
+	v = 0x80; /* Enable Preamble Sequence */
+	v |= 0x2; /* MDIO Clock Divisor */
+	pcicore_write32(pc, mdio_control, v);
+
+	if (pc->core->id.rev >= 10) {
+		max_retries = 200;
+		bcma_pcie_mdio_set_phy(pc, device);
+	}
+
+	v = (1 << 30); /* Start of Transaction */
+	v |= (1 << 29); /* Read Transaction */
+	v |= (1 << 17); /* Turnaround */
+	if (pc->core->id.rev < 10)
+		v |= (u32)device << 22;
+	v |= (u32)address << 18;
+	pcicore_write32(pc, mdio_data, v);
+	/* Wait for the device to complete the transaction */
+	udelay(10);
+	for (i = 0; i < 200; i++) {
+		v = pcicore_read32(pc, mdio_control);
+		if (v & 0x100 /* Trans complete */) {
+			udelay(10);
+			ret = pcicore_read32(pc, mdio_data);
+			break;
+		}
+		msleep(1);
+	}
+	pcicore_write32(pc, mdio_control, 0);
+	return ret;
+}
+
+static void bcma_pcie_mdio_write(struct bcma_drv_pci *pc, u8 device,
+				u8 address, u16 data)
+{
+	const u16 mdio_control = 0x128;
+	const u16 mdio_data = 0x12C;
+	int max_retries = 10;
+	u32 v;
+	int i;
+
+	v = 0x80; /* Enable Preamble Sequence */
+	v |= 0x2; /* MDIO Clock Divisor */
+	pcicore_write32(pc, mdio_control, v);
+
+	if (pc->core->id.rev >= 10) {
+		max_retries = 200;
+		bcma_pcie_mdio_set_phy(pc, device);
+	}
+
+	v = (1 << 30); /* Start of Transaction */
+	v |= (1 << 28); /* Write Transaction */
+	v |= (1 << 17); /* Turnaround */
+	if (pc->core->id.rev < 10)
+		v |= (u32)device << 22;
+	v |= (u32)address << 18;
+	v |= data;
+	pcicore_write32(pc, mdio_data, v);
+	/* Wait for the device to complete the transaction */
+	udelay(10);
+	for (i = 0; i < max_retries; i++) {
+		v = pcicore_read32(pc, mdio_control);
+		if (v & 0x100 /* Trans complete */)
+			break;
+		msleep(1);
+	}
+	pcicore_write32(pc, mdio_control, 0);
+}
+
+/**************************************************
+ * Workarounds.
+ **************************************************/
+
+static u8 bcma_pcicore_polarity_workaround(struct bcma_drv_pci *pc)
+{
+	return (bcma_pcie_read(pc, 0x204) & 0x10) ? 0xC0 : 0x80;
+}
+
+static void bcma_pcicore_serdes_workaround(struct bcma_drv_pci *pc)
+{
+	const u8 serdes_pll_device = 0x1D;
+	const u8 serdes_rx_device = 0x1F;
+	u16 tmp;
+
+	bcma_pcie_mdio_write(pc, serdes_rx_device, 1 /* Control */,
+			      bcma_pcicore_polarity_workaround(pc));
+	tmp = bcma_pcie_mdio_read(pc, serdes_pll_device, 1 /* Control */);
+	if (tmp & 0x4000)
+		bcma_pcie_mdio_write(pc, serdes_pll_device, 1, tmp & ~0x4000);
+}
+
+/**************************************************
+ * Init.
+ **************************************************/
+
+void bcma_core_pci_init(struct bcma_drv_pci *pc)
+{
+	bcma_pcicore_serdes_workaround(pc);
+}
diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c
new file mode 100644
index 000000000000..99dd36e8500b
--- /dev/null
+++ b/drivers/bcma/host_pci.c
@@ -0,0 +1,196 @@
+/*
+ * Broadcom specific AMBA
+ * PCI Host
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bcma_private.h"
+#include <linux/bcma/bcma.h>
+#include <linux/pci.h>
+
+static void bcma_host_pci_switch_core(struct bcma_device *core)
+{
+	pci_write_config_dword(core->bus->host_pci, BCMA_PCI_BAR0_WIN,
+			       core->addr);
+	pci_write_config_dword(core->bus->host_pci, BCMA_PCI_BAR0_WIN2,
+			       core->wrap);
+	core->bus->mapped_core = core;
+	pr_debug("Switched to core: 0x%X\n", core->id.id);
+}
+
+static u8 bcma_host_pci_read8(struct bcma_device *core, u16 offset)
+{
+	if (core->bus->mapped_core != core)
+		bcma_host_pci_switch_core(core);
+	return ioread8(core->bus->mmio + offset);
+}
+
+static u16 bcma_host_pci_read16(struct bcma_device *core, u16 offset)
+{
+	if (core->bus->mapped_core != core)
+		bcma_host_pci_switch_core(core);
+	return ioread16(core->bus->mmio + offset);
+}
+
+static u32 bcma_host_pci_read32(struct bcma_device *core, u16 offset)
+{
+	if (core->bus->mapped_core != core)
+		bcma_host_pci_switch_core(core);
+	return ioread32(core->bus->mmio + offset);
+}
+
+static void bcma_host_pci_write8(struct bcma_device *core, u16 offset,
+				 u8 value)
+{
+	if (core->bus->mapped_core != core)
+		bcma_host_pci_switch_core(core);
+	iowrite8(value, core->bus->mmio + offset);
+}
+
+static void bcma_host_pci_write16(struct bcma_device *core, u16 offset,
+				 u16 value)
+{
+	if (core->bus->mapped_core != core)
+		bcma_host_pci_switch_core(core);
+	iowrite16(value, core->bus->mmio + offset);
+}
+
+static void bcma_host_pci_write32(struct bcma_device *core, u16 offset,
+				 u32 value)
+{
+	if (core->bus->mapped_core != core)
+		bcma_host_pci_switch_core(core);
+	iowrite32(value, core->bus->mmio + offset);
+}
+
+static u32 bcma_host_pci_aread32(struct bcma_device *core, u16 offset)
+{
+	if (core->bus->mapped_core != core)
+		bcma_host_pci_switch_core(core);
+	return ioread32(core->bus->mmio + (1 * BCMA_CORE_SIZE) + offset);
+}
+
+static void bcma_host_pci_awrite32(struct bcma_device *core, u16 offset,
+				  u32 value)
+{
+	if (core->bus->mapped_core != core)
+		bcma_host_pci_switch_core(core);
+	iowrite32(value, core->bus->mmio + (1 * BCMA_CORE_SIZE) + offset);
+}
+
+const struct bcma_host_ops bcma_host_pci_ops = {
+	.read8		= bcma_host_pci_read8,
+	.read16		= bcma_host_pci_read16,
+	.read32		= bcma_host_pci_read32,
+	.write8		= bcma_host_pci_write8,
+	.write16	= bcma_host_pci_write16,
+	.write32	= bcma_host_pci_write32,
+	.aread32	= bcma_host_pci_aread32,
+	.awrite32	= bcma_host_pci_awrite32,
+};
+
+static int bcma_host_pci_probe(struct pci_dev *dev,
+			     const struct pci_device_id *id)
+{
+	struct bcma_bus *bus;
+	int err = -ENOMEM;
+	const char *name;
+	u32 val;
+
+	/* Alloc */
+	bus = kzalloc(sizeof(*bus), GFP_KERNEL);
+	if (!bus)
+		goto out;
+
+	/* Basic PCI configuration */
+	err = pci_enable_device(dev);
+	if (err)
+		goto err_kfree_bus;
+
+	name = dev_name(&dev->dev);
+	if (dev->driver && dev->driver->name)
+		name = dev->driver->name;
+	err = pci_request_regions(dev, name);
+	if (err)
+		goto err_pci_disable;
+	pci_set_master(dev);
+
+	/* Disable the RETRY_TIMEOUT register (0x41) to keep
+	 * PCI Tx retries from interfering with C3 CPU state */
+	pci_read_config_dword(dev, 0x40, &val);
+	if ((val & 0x0000ff00) != 0)
+		pci_write_config_dword(dev, 0x40, val & 0xffff00ff);
+
+	/* SSB needed additional powering up, do we have any AMBA PCI cards? */
+	if (!pci_is_pcie(dev))
+		pr_err("PCI card detected, report problems.\n");
+
+	/* Map MMIO */
+	err = -ENOMEM;
+	bus->mmio = pci_iomap(dev, 0, ~0UL);
+	if (!bus->mmio)
+		goto err_pci_release_regions;
+
+	/* Host specific */
+	bus->host_pci = dev;
+	bus->hosttype = BCMA_HOSTTYPE_PCI;
+	bus->ops = &bcma_host_pci_ops;
+
+	/* Register */
+	err = bcma_bus_register(bus);
+	if (err)
+		goto err_pci_unmap_mmio;
+
+	pci_set_drvdata(dev, bus);
+
+out:
+	return err;
+
+err_pci_unmap_mmio:
+	pci_iounmap(dev, bus->mmio);
+err_pci_release_regions:
+	pci_release_regions(dev);
+err_pci_disable:
+	pci_disable_device(dev);
+err_kfree_bus:
+	kfree(bus);
+	return err;
+}
+
+static void bcma_host_pci_remove(struct pci_dev *dev)
+{
+	struct bcma_bus *bus = pci_get_drvdata(dev);
+
+	bcma_bus_unregister(bus);
+	pci_iounmap(dev, bus->mmio);
+	pci_release_regions(dev);
+	pci_disable_device(dev);
+	kfree(bus);
+	pci_set_drvdata(dev, NULL);
+}
+
+static DEFINE_PCI_DEVICE_TABLE(bcma_pci_bridge_tbl) = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4331) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4353) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4727) },
+	{ 0, },
+};
+MODULE_DEVICE_TABLE(pci, bcma_pci_bridge_tbl);
+
+static struct pci_driver bcma_pci_bridge_driver = {
+	.name = "bcma-pci-bridge",
+	.id_table = bcma_pci_bridge_tbl,
+	.probe = bcma_host_pci_probe,
+	.remove = bcma_host_pci_remove,
+};
+
+int __init bcma_host_pci_init(void)
+{
+	return pci_register_driver(&bcma_pci_bridge_driver);
+}
+
+void __exit bcma_host_pci_exit(void)
+{
+	pci_unregister_driver(&bcma_pci_bridge_driver);
+}
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
new file mode 100644
index 000000000000..be52344ed19d
--- /dev/null
+++ b/drivers/bcma/main.c
@@ -0,0 +1,247 @@
+/*
+ * Broadcom specific AMBA
+ * Bus subsystem
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bcma_private.h"
+#include <linux/bcma/bcma.h>
+
+MODULE_DESCRIPTION("Broadcom's specific AMBA driver");
+MODULE_LICENSE("GPL");
+
+static int bcma_bus_match(struct device *dev, struct device_driver *drv);
+static int bcma_device_probe(struct device *dev);
+static int bcma_device_remove(struct device *dev);
+
+static ssize_t manuf_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+	return sprintf(buf, "0x%03X\n", core->id.manuf);
+}
+static ssize_t id_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+	return sprintf(buf, "0x%03X\n", core->id.id);
+}
+static ssize_t rev_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+	return sprintf(buf, "0x%02X\n", core->id.rev);
+}
+static ssize_t class_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+	return sprintf(buf, "0x%X\n", core->id.class);
+}
+static struct device_attribute bcma_device_attrs[] = {
+	__ATTR_RO(manuf),
+	__ATTR_RO(id),
+	__ATTR_RO(rev),
+	__ATTR_RO(class),
+	__ATTR_NULL,
+};
+
+static struct bus_type bcma_bus_type = {
+	.name		= "bcma",
+	.match		= bcma_bus_match,
+	.probe		= bcma_device_probe,
+	.remove		= bcma_device_remove,
+	.dev_attrs	= bcma_device_attrs,
+};
+
+static struct bcma_device *bcma_find_core(struct bcma_bus *bus, u16 coreid)
+{
+	struct bcma_device *core;
+
+	list_for_each_entry(core, &bus->cores, list) {
+		if (core->id.id == coreid)
+			return core;
+	}
+	return NULL;
+}
+
+static void bcma_release_core_dev(struct device *dev)
+{
+	struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+	kfree(core);
+}
+
+static int bcma_register_cores(struct bcma_bus *bus)
+{
+	struct bcma_device *core;
+	int err, dev_id = 0;
+
+	list_for_each_entry(core, &bus->cores, list) {
+		/* We support that cores ourself */
+		switch (core->id.id) {
+		case BCMA_CORE_CHIPCOMMON:
+		case BCMA_CORE_PCI:
+		case BCMA_CORE_PCIE:
+			continue;
+		}
+
+		core->dev.release = bcma_release_core_dev;
+		core->dev.bus = &bcma_bus_type;
+		dev_set_name(&core->dev, "bcma%d:%d", 0/*bus->num*/, dev_id);
+
+		switch (bus->hosttype) {
+		case BCMA_HOSTTYPE_PCI:
+			core->dev.parent = &bus->host_pci->dev;
+			break;
+		case BCMA_HOSTTYPE_NONE:
+		case BCMA_HOSTTYPE_SDIO:
+			break;
+		}
+
+		err = device_register(&core->dev);
+		if (err) {
+			pr_err("Could not register dev for core 0x%03X\n",
+			       core->id.id);
+			continue;
+		}
+		core->dev_registered = true;
+		dev_id++;
+	}
+
+	return 0;
+}
+
+static void bcma_unregister_cores(struct bcma_bus *bus)
+{
+	struct bcma_device *core;
+
+	list_for_each_entry(core, &bus->cores, list) {
+		if (core->dev_registered)
+			device_unregister(&core->dev);
+	}
+}
+
+int bcma_bus_register(struct bcma_bus *bus)
+{
+	int err;
+	struct bcma_device *core;
+
+	/* Scan for devices (cores) */
+	err = bcma_bus_scan(bus);
+	if (err) {
+		pr_err("Failed to scan: %d\n", err);
+		return -1;
+	}
+
+	/* Init CC core */
+	core = bcma_find_core(bus, BCMA_CORE_CHIPCOMMON);
+	if (core) {
+		bus->drv_cc.core = core;
+		bcma_core_chipcommon_init(&bus->drv_cc);
+	}
+
+	/* Init PCIE core */
+	core = bcma_find_core(bus, BCMA_CORE_PCIE);
+	if (core) {
+		bus->drv_pci.core = core;
+		bcma_core_pci_init(&bus->drv_pci);
+	}
+
+	/* Register found cores */
+	bcma_register_cores(bus);
+
+	pr_info("Bus registered\n");
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcma_bus_register);
+
+void bcma_bus_unregister(struct bcma_bus *bus)
+{
+	bcma_unregister_cores(bus);
+}
+EXPORT_SYMBOL_GPL(bcma_bus_unregister);
+
+int __bcma_driver_register(struct bcma_driver *drv, struct module *owner)
+{
+	drv->drv.name = drv->name;
+	drv->drv.bus = &bcma_bus_type;
+	drv->drv.owner = owner;
+
+	return driver_register(&drv->drv);
+}
+EXPORT_SYMBOL_GPL(__bcma_driver_register);
+
+void bcma_driver_unregister(struct bcma_driver *drv)
+{
+	driver_unregister(&drv->drv);
+}
+EXPORT_SYMBOL_GPL(bcma_driver_unregister);
+
+static int bcma_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+	struct bcma_driver *adrv = container_of(drv, struct bcma_driver, drv);
+	const struct bcma_device_id *cid = &core->id;
+	const struct bcma_device_id *did;
+
+	for (did = adrv->id_table; did->manuf || did->id || did->rev; did++) {
+	    if ((did->manuf == cid->manuf || did->manuf == BCMA_ANY_MANUF) &&
+		(did->id == cid->id || did->id == BCMA_ANY_ID) &&
+		(did->rev == cid->rev || did->rev == BCMA_ANY_REV) &&
+		(did->class == cid->class || did->class == BCMA_ANY_CLASS))
+			return 1;
+	}
+	return 0;
+}
+
+static int bcma_device_probe(struct device *dev)
+{
+	struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+	struct bcma_driver *adrv = container_of(dev->driver, struct bcma_driver,
+					       drv);
+	int err = 0;
+
+	if (adrv->probe)
+		err = adrv->probe(core);
+
+	return err;
+}
+
+static int bcma_device_remove(struct device *dev)
+{
+	struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+	struct bcma_driver *adrv = container_of(dev->driver, struct bcma_driver,
+					       drv);
+
+	if (adrv->remove)
+		adrv->remove(core);
+
+	return 0;
+}
+
+static int __init bcma_modinit(void)
+{
+	int err;
+
+	err = bus_register(&bcma_bus_type);
+	if (err)
+		return err;
+
+#ifdef CONFIG_BCMA_HOST_PCI
+	err = bcma_host_pci_init();
+	if (err) {
+		pr_err("PCI host initialization failed\n");
+		err = 0;
+	}
+#endif
+
+	return err;
+}
+fs_initcall(bcma_modinit);
+
+static void __exit bcma_modexit(void)
+{
+#ifdef CONFIG_BCMA_HOST_PCI
+	bcma_host_pci_exit();
+#endif
+	bus_unregister(&bcma_bus_type);
+}
+module_exit(bcma_modexit)
diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c
new file mode 100644
index 000000000000..40d7dcce8933
--- /dev/null
+++ b/drivers/bcma/scan.c
@@ -0,0 +1,360 @@
+/*
+ * Broadcom specific AMBA
+ * Bus scanning
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "scan.h"
+#include "bcma_private.h"
+
+#include <linux/bcma/bcma.h>
+#include <linux/bcma/bcma_regs.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+
+struct bcma_device_id_name {
+	u16 id;
+	const char *name;
+};
+struct bcma_device_id_name bcma_device_names[] = {
+	{ BCMA_CORE_OOB_ROUTER, "OOB Router" },
+	{ BCMA_CORE_INVALID, "Invalid" },
+	{ BCMA_CORE_CHIPCOMMON, "ChipCommon" },
+	{ BCMA_CORE_ILINE20, "ILine 20" },
+	{ BCMA_CORE_SRAM, "SRAM" },
+	{ BCMA_CORE_SDRAM, "SDRAM" },
+	{ BCMA_CORE_PCI, "PCI" },
+	{ BCMA_CORE_MIPS, "MIPS" },
+	{ BCMA_CORE_ETHERNET, "Fast Ethernet" },
+	{ BCMA_CORE_V90, "V90" },
+	{ BCMA_CORE_USB11_HOSTDEV, "USB 1.1 Hostdev" },
+	{ BCMA_CORE_ADSL, "ADSL" },
+	{ BCMA_CORE_ILINE100, "ILine 100" },
+	{ BCMA_CORE_IPSEC, "IPSEC" },
+	{ BCMA_CORE_UTOPIA, "UTOPIA" },
+	{ BCMA_CORE_PCMCIA, "PCMCIA" },
+	{ BCMA_CORE_INTERNAL_MEM, "Internal Memory" },
+	{ BCMA_CORE_MEMC_SDRAM, "MEMC SDRAM" },
+	{ BCMA_CORE_OFDM, "OFDM" },
+	{ BCMA_CORE_EXTIF, "EXTIF" },
+	{ BCMA_CORE_80211, "IEEE 802.11" },
+	{ BCMA_CORE_PHY_A, "PHY A" },
+	{ BCMA_CORE_PHY_B, "PHY B" },
+	{ BCMA_CORE_PHY_G, "PHY G" },
+	{ BCMA_CORE_MIPS_3302, "MIPS 3302" },
+	{ BCMA_CORE_USB11_HOST, "USB 1.1 Host" },
+	{ BCMA_CORE_USB11_DEV, "USB 1.1 Device" },
+	{ BCMA_CORE_USB20_HOST, "USB 2.0 Host" },
+	{ BCMA_CORE_USB20_DEV, "USB 2.0 Device" },
+	{ BCMA_CORE_SDIO_HOST, "SDIO Host" },
+	{ BCMA_CORE_ROBOSWITCH, "Roboswitch" },
+	{ BCMA_CORE_PARA_ATA, "PATA" },
+	{ BCMA_CORE_SATA_XORDMA, "SATA XOR-DMA" },
+	{ BCMA_CORE_ETHERNET_GBIT, "GBit Ethernet" },
+	{ BCMA_CORE_PCIE, "PCIe" },
+	{ BCMA_CORE_PHY_N, "PHY N" },
+	{ BCMA_CORE_SRAM_CTL, "SRAM Controller" },
+	{ BCMA_CORE_MINI_MACPHY, "Mini MACPHY" },
+	{ BCMA_CORE_ARM_1176, "ARM 1176" },
+	{ BCMA_CORE_ARM_7TDMI, "ARM 7TDMI" },
+	{ BCMA_CORE_PHY_LP, "PHY LP" },
+	{ BCMA_CORE_PMU, "PMU" },
+	{ BCMA_CORE_PHY_SSN, "PHY SSN" },
+	{ BCMA_CORE_SDIO_DEV, "SDIO Device" },
+	{ BCMA_CORE_ARM_CM3, "ARM CM3" },
+	{ BCMA_CORE_PHY_HT, "PHY HT" },
+	{ BCMA_CORE_MIPS_74K, "MIPS 74K" },
+	{ BCMA_CORE_MAC_GBIT, "GBit MAC" },
+	{ BCMA_CORE_DDR12_MEM_CTL, "DDR1/DDR2 Memory Controller" },
+	{ BCMA_CORE_PCIE_RC, "PCIe Root Complex" },
+	{ BCMA_CORE_OCP_OCP_BRIDGE, "OCP to OCP Bridge" },
+	{ BCMA_CORE_SHARED_COMMON, "Common Shared" },
+	{ BCMA_CORE_OCP_AHB_BRIDGE, "OCP to AHB Bridge" },
+	{ BCMA_CORE_SPI_HOST, "SPI Host" },
+	{ BCMA_CORE_I2S, "I2S" },
+	{ BCMA_CORE_SDR_DDR1_MEM_CTL, "SDR/DDR1 Memory Controller" },
+	{ BCMA_CORE_SHIM, "SHIM" },
+	{ BCMA_CORE_DEFAULT, "Default" },
+};
+const char *bcma_device_name(struct bcma_device_id *id)
+{
+	int i;
+
+	if (id->manuf == BCMA_MANUF_BCM) {
+		for (i = 0; i < ARRAY_SIZE(bcma_device_names); i++) {
+			if (bcma_device_names[i].id == id->id)
+				return bcma_device_names[i].name;
+		}
+	}
+	return "UNKNOWN";
+}
+
+static u32 bcma_scan_read32(struct bcma_bus *bus, u8 current_coreidx,
+		       u16 offset)
+{
+	return readl(bus->mmio + offset);
+}
+
+static void bcma_scan_switch_core(struct bcma_bus *bus, u32 addr)
+{
+	if (bus->hosttype == BCMA_HOSTTYPE_PCI)
+		pci_write_config_dword(bus->host_pci, BCMA_PCI_BAR0_WIN,
+				       addr);
+}
+
+static u32 bcma_erom_get_ent(struct bcma_bus *bus, u32 **eromptr)
+{
+	u32 ent = readl(*eromptr);
+	(*eromptr)++;
+	return ent;
+}
+
+static void bcma_erom_push_ent(u32 **eromptr)
+{
+	(*eromptr)--;
+}
+
+static s32 bcma_erom_get_ci(struct bcma_bus *bus, u32 **eromptr)
+{
+	u32 ent = bcma_erom_get_ent(bus, eromptr);
+	if (!(ent & SCAN_ER_VALID))
+		return -ENOENT;
+	if ((ent & SCAN_ER_TAG) != SCAN_ER_TAG_CI)
+		return -ENOENT;
+	return ent;
+}
+
+static bool bcma_erom_is_end(struct bcma_bus *bus, u32 **eromptr)
+{
+	u32 ent = bcma_erom_get_ent(bus, eromptr);
+	bcma_erom_push_ent(eromptr);
+	return (ent == (SCAN_ER_TAG_END | SCAN_ER_VALID));
+}
+
+static bool bcma_erom_is_bridge(struct bcma_bus *bus, u32 **eromptr)
+{
+	u32 ent = bcma_erom_get_ent(bus, eromptr);
+	bcma_erom_push_ent(eromptr);
+	return (((ent & SCAN_ER_VALID)) &&
+		((ent & SCAN_ER_TAGX) == SCAN_ER_TAG_ADDR) &&
+		((ent & SCAN_ADDR_TYPE) == SCAN_ADDR_TYPE_BRIDGE));
+}
+
+static void bcma_erom_skip_component(struct bcma_bus *bus, u32 **eromptr)
+{
+	u32 ent;
+	while (1) {
+		ent = bcma_erom_get_ent(bus, eromptr);
+		if ((ent & SCAN_ER_VALID) &&
+		    ((ent & SCAN_ER_TAG) == SCAN_ER_TAG_CI))
+			break;
+		if (ent == (SCAN_ER_TAG_END | SCAN_ER_VALID))
+			break;
+	}
+	bcma_erom_push_ent(eromptr);
+}
+
+static s32 bcma_erom_get_mst_port(struct bcma_bus *bus, u32 **eromptr)
+{
+	u32 ent = bcma_erom_get_ent(bus, eromptr);
+	if (!(ent & SCAN_ER_VALID))
+		return -ENOENT;
+	if ((ent & SCAN_ER_TAG) != SCAN_ER_TAG_MP)
+		return -ENOENT;
+	return ent;
+}
+
+static s32 bcma_erom_get_addr_desc(struct bcma_bus *bus, u32 **eromptr,
+				  u32 type, u8 port)
+{
+	u32 addrl, addrh, sizel, sizeh = 0;
+	u32 size;
+
+	u32 ent = bcma_erom_get_ent(bus, eromptr);
+	if ((!(ent & SCAN_ER_VALID)) ||
+	    ((ent & SCAN_ER_TAGX) != SCAN_ER_TAG_ADDR) ||
+	    ((ent & SCAN_ADDR_TYPE) != type) ||
+	    (((ent & SCAN_ADDR_PORT) >> SCAN_ADDR_PORT_SHIFT) != port)) {
+		bcma_erom_push_ent(eromptr);
+		return -EINVAL;
+	}
+
+	addrl = ent & SCAN_ADDR_ADDR;
+	if (ent & SCAN_ADDR_AG32)
+		addrh = bcma_erom_get_ent(bus, eromptr);
+	else
+		addrh = 0;
+
+	if ((ent & SCAN_ADDR_SZ) == SCAN_ADDR_SZ_SZD) {
+		size = bcma_erom_get_ent(bus, eromptr);
+		sizel = size & SCAN_SIZE_SZ;
+		if (size & SCAN_SIZE_SG32)
+			sizeh = bcma_erom_get_ent(bus, eromptr);
+	} else
+		sizel = SCAN_ADDR_SZ_BASE <<
+				((ent & SCAN_ADDR_SZ) >> SCAN_ADDR_SZ_SHIFT);
+
+	return addrl;
+}
+
+int bcma_bus_scan(struct bcma_bus *bus)
+{
+	u32 erombase;
+	u32 __iomem *eromptr, *eromend;
+
+	s32 cia, cib;
+	u8 ports[2], wrappers[2];
+
+	s32 tmp;
+	u8 i, j;
+
+	int err;
+
+	INIT_LIST_HEAD(&bus->cores);
+	bus->nr_cores = 0;
+
+	bcma_scan_switch_core(bus, BCMA_ADDR_BASE);
+
+	tmp = bcma_scan_read32(bus, 0, BCMA_CC_ID);
+	bus->chipinfo.id = (tmp & BCMA_CC_ID_ID) >> BCMA_CC_ID_ID_SHIFT;
+	bus->chipinfo.rev = (tmp & BCMA_CC_ID_REV) >> BCMA_CC_ID_REV_SHIFT;
+	bus->chipinfo.pkg = (tmp & BCMA_CC_ID_PKG) >> BCMA_CC_ID_PKG_SHIFT;
+
+	erombase = bcma_scan_read32(bus, 0, BCMA_CC_EROM);
+	eromptr = bus->mmio;
+	eromend = eromptr + BCMA_CORE_SIZE / sizeof(u32);
+
+	bcma_scan_switch_core(bus, erombase);
+
+	while (eromptr < eromend) {
+		struct bcma_device *core = kzalloc(sizeof(*core), GFP_KERNEL);
+		if (!core)
+			return -ENOMEM;
+		INIT_LIST_HEAD(&core->list);
+		core->bus = bus;
+
+		/* get CIs */
+		cia = bcma_erom_get_ci(bus, &eromptr);
+		if (cia < 0) {
+			bcma_erom_push_ent(&eromptr);
+			if (bcma_erom_is_end(bus, &eromptr))
+				break;
+			err= -EILSEQ;
+			goto out;
+		}
+		cib = bcma_erom_get_ci(bus, &eromptr);
+		if (cib < 0) {
+			err= -EILSEQ;
+			goto out;
+		}
+
+		/* parse CIs */
+		core->id.class = (cia & SCAN_CIA_CLASS) >> SCAN_CIA_CLASS_SHIFT;
+		core->id.id = (cia & SCAN_CIA_ID) >> SCAN_CIA_ID_SHIFT;
+		core->id.manuf = (cia & SCAN_CIA_MANUF) >> SCAN_CIA_MANUF_SHIFT;
+		ports[0] = (cib & SCAN_CIB_NMP) >> SCAN_CIB_NMP_SHIFT;
+		ports[1] = (cib & SCAN_CIB_NSP) >> SCAN_CIB_NSP_SHIFT;
+		wrappers[0] = (cib & SCAN_CIB_NMW) >> SCAN_CIB_NMW_SHIFT;
+		wrappers[1] = (cib & SCAN_CIB_NSW) >> SCAN_CIB_NSW_SHIFT;
+		core->id.rev = (cib & SCAN_CIB_REV) >> SCAN_CIB_REV_SHIFT;
+
+		if (((core->id.manuf == BCMA_MANUF_ARM) &&
+		     (core->id.id == 0xFFF)) ||
+		    (ports[1] == 0)) {
+			bcma_erom_skip_component(bus, &eromptr);
+			continue;
+		}
+
+		/* check if component is a core at all */
+		if (wrappers[0] + wrappers[1] == 0) {
+			/* we could save addrl of the router
+			if (cid == BCMA_CORE_OOB_ROUTER)
+			 */
+			bcma_erom_skip_component(bus, &eromptr);
+			continue;
+		}
+
+		if (bcma_erom_is_bridge(bus, &eromptr)) {
+			bcma_erom_skip_component(bus, &eromptr);
+			continue;
+		}
+
+		/* get & parse master ports */
+		for (i = 0; i < ports[0]; i++) {
+			u32 mst_port_d = bcma_erom_get_mst_port(bus, &eromptr);
+			if (mst_port_d < 0) {
+				err= -EILSEQ;
+				goto out;
+			}
+		}
+
+		/* get & parse slave ports */
+		for (i = 0; i < ports[1]; i++) {
+			for (j = 0; ; j++) {
+				tmp = bcma_erom_get_addr_desc(bus, &eromptr,
+					SCAN_ADDR_TYPE_SLAVE, i);
+				if (tmp < 0) {
+					/* no more entries for port _i_ */
+					/* pr_debug("erom: slave port %d "
+					 * "has %d descriptors\n", i, j); */
+					break;
+				} else {
+					if (i == 0 && j == 0)
+						core->addr = tmp;
+				}
+			}
+		}
+
+		/* get & parse master wrappers */
+		for (i = 0; i < wrappers[0]; i++) {
+			for (j = 0; ; j++) {
+				tmp = bcma_erom_get_addr_desc(bus, &eromptr,
+					SCAN_ADDR_TYPE_MWRAP, i);
+				if (tmp < 0) {
+					/* no more entries for port _i_ */
+					/* pr_debug("erom: master wrapper %d "
+					 * "has %d descriptors\n", i, j); */
+					break;
+				} else {
+					if (i == 0 && j == 0)
+						core->wrap = tmp;
+				}
+			}
+		}
+
+		/* get & parse slave wrappers */
+		for (i = 0; i < wrappers[1]; i++) {
+			u8 hack = (ports[1] == 1) ? 0 : 1;
+			for (j = 0; ; j++) {
+				tmp = bcma_erom_get_addr_desc(bus, &eromptr,
+					SCAN_ADDR_TYPE_SWRAP, i + hack);
+				if (tmp < 0) {
+					/* no more entries for port _i_ */
+					/* pr_debug("erom: master wrapper %d "
+					 * has %d descriptors\n", i, j); */
+					break;
+				} else {
+					if (wrappers[0] == 0 && !i && !j)
+						core->wrap = tmp;
+				}
+			}
+		}
+
+		pr_info("Core %d found: %s "
+			"(manuf 0x%03X, id 0x%03X, rev 0x%02X, class 0x%X)\n",
+			bus->nr_cores, bcma_device_name(&core->id),
+			core->id.manuf, core->id.id, core->id.rev,
+			core->id.class);
+
+		core->core_index = bus->nr_cores++;
+		list_add(&core->list, &bus->cores);
+		continue;
+out:
+		return err;
+	}
+
+	return 0;
+}
diff --git a/drivers/bcma/scan.h b/drivers/bcma/scan.h
new file mode 100644
index 000000000000..113e6a66884c
--- /dev/null
+++ b/drivers/bcma/scan.h
@@ -0,0 +1,56 @@
+#ifndef BCMA_SCAN_H_
+#define BCMA_SCAN_H_
+
+#define BCMA_ADDR_BASE		0x18000000
+#define BCMA_WRAP_BASE		0x18100000
+
+#define SCAN_ER_VALID		0x00000001
+#define SCAN_ER_TAGX		0x00000006 /* we have to ignore 0x8 bit when checking tag for SCAN_ER_TAG_ADDR */
+#define SCAN_ER_TAG		0x0000000E
+#define  SCAN_ER_TAG_CI		0x00000000
+#define  SCAN_ER_TAG_MP		0x00000002
+#define  SCAN_ER_TAG_ADDR	0x00000004
+#define  SCAN_ER_TAG_END	0x0000000E
+#define SCAN_ER_BAD		0xFFFFFFFF
+
+#define SCAN_CIA_CLASS		0x000000F0
+#define SCAN_CIA_CLASS_SHIFT	4
+#define SCAN_CIA_ID		0x000FFF00
+#define SCAN_CIA_ID_SHIFT	8
+#define SCAN_CIA_MANUF		0xFFF00000
+#define SCAN_CIA_MANUF_SHIFT	20
+
+#define SCAN_CIB_NMP		0x000001F0
+#define SCAN_CIB_NMP_SHIFT	4
+#define SCAN_CIB_NSP		0x00003E00
+#define SCAN_CIB_NSP_SHIFT	9
+#define SCAN_CIB_NMW		0x0007C000
+#define SCAN_CIB_NMW_SHIFT	14
+#define SCAN_CIB_NSW		0x00F80000
+#define SCAN_CIB_NSW_SHIFT	17
+#define SCAN_CIB_REV		0xFF000000
+#define SCAN_CIB_REV_SHIFT	24
+
+#define SCAN_ADDR_AG32		0x00000008
+#define SCAN_ADDR_SZ		0x00000030
+#define SCAN_ADDR_SZ_SHIFT	4
+#define  SCAN_ADDR_SZ_4K	0x00000000
+#define  SCAN_ADDR_SZ_8K	0x00000010
+#define  SCAN_ADDR_SZ_16K	0x00000020
+#define  SCAN_ADDR_SZ_SZD	0x00000030
+#define SCAN_ADDR_TYPE		0x000000C0
+#define  SCAN_ADDR_TYPE_SLAVE	0x00000000
+#define  SCAN_ADDR_TYPE_BRIDGE	0x00000040
+#define  SCAN_ADDR_TYPE_SWRAP	0x00000080
+#define  SCAN_ADDR_TYPE_MWRAP	0x000000C0
+#define SCAN_ADDR_PORT		0x00000F00
+#define SCAN_ADDR_PORT_SHIFT	8
+#define SCAN_ADDR_ADDR		0xFFFFF000
+
+#define SCAN_ADDR_SZ_BASE	0x00001000	/* 4KB */
+
+#define SCAN_SIZE_SZ_ALIGN	0x00000FFF
+#define SCAN_SIZE_SZ		0xFFFFF000
+#define SCAN_SIZE_SG32		0x00000008
+
+#endif /* BCMA_SCAN_H_ */
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
new file mode 100644
index 000000000000..08763e4e848f
--- /dev/null
+++ b/include/linux/bcma/bcma.h
@@ -0,0 +1,224 @@
+#ifndef LINUX_BCMA_H_
+#define LINUX_BCMA_H_
+
+#include <linux/pci.h>
+#include <linux/mod_devicetable.h>
+
+#include <linux/bcma/bcma_driver_chipcommon.h>
+#include <linux/bcma/bcma_driver_pci.h>
+
+#include "bcma_regs.h"
+
+struct bcma_device;
+struct bcma_bus;
+
+enum bcma_hosttype {
+	BCMA_HOSTTYPE_NONE,
+	BCMA_HOSTTYPE_PCI,
+	BCMA_HOSTTYPE_SDIO,
+};
+
+struct bcma_chipinfo {
+	u16 id;
+	u8 rev;
+	u8 pkg;
+};
+
+struct bcma_host_ops {
+	u8 (*read8)(struct bcma_device *core, u16 offset);
+	u16 (*read16)(struct bcma_device *core, u16 offset);
+	u32 (*read32)(struct bcma_device *core, u16 offset);
+	void (*write8)(struct bcma_device *core, u16 offset, u8 value);
+	void (*write16)(struct bcma_device *core, u16 offset, u16 value);
+	void (*write32)(struct bcma_device *core, u16 offset, u32 value);
+	/* Agent ops */
+	u32 (*aread32)(struct bcma_device *core, u16 offset);
+	void (*awrite32)(struct bcma_device *core, u16 offset, u32 value);
+};
+
+/* Core manufacturers */
+#define BCMA_MANUF_ARM			0x43B
+#define BCMA_MANUF_MIPS			0x4A7
+#define BCMA_MANUF_BCM			0x4BF
+
+/* Core class values. */
+#define BCMA_CL_SIM			0x0
+#define BCMA_CL_EROM			0x1
+#define BCMA_CL_CORESIGHT		0x9
+#define BCMA_CL_VERIF			0xB
+#define BCMA_CL_OPTIMO			0xD
+#define BCMA_CL_GEN			0xE
+#define BCMA_CL_PRIMECELL		0xF
+
+/* Core-ID values. */
+#define BCMA_CORE_OOB_ROUTER		0x367	/* Out of band */
+#define BCMA_CORE_INVALID		0x700
+#define BCMA_CORE_CHIPCOMMON		0x800
+#define BCMA_CORE_ILINE20		0x801
+#define BCMA_CORE_SRAM			0x802
+#define BCMA_CORE_SDRAM			0x803
+#define BCMA_CORE_PCI			0x804
+#define BCMA_CORE_MIPS			0x805
+#define BCMA_CORE_ETHERNET		0x806
+#define BCMA_CORE_V90			0x807
+#define BCMA_CORE_USB11_HOSTDEV		0x808
+#define BCMA_CORE_ADSL			0x809
+#define BCMA_CORE_ILINE100		0x80A
+#define BCMA_CORE_IPSEC			0x80B
+#define BCMA_CORE_UTOPIA		0x80C
+#define BCMA_CORE_PCMCIA		0x80D
+#define BCMA_CORE_INTERNAL_MEM		0x80E
+#define BCMA_CORE_MEMC_SDRAM		0x80F
+#define BCMA_CORE_OFDM			0x810
+#define BCMA_CORE_EXTIF			0x811
+#define BCMA_CORE_80211			0x812
+#define BCMA_CORE_PHY_A			0x813
+#define BCMA_CORE_PHY_B			0x814
+#define BCMA_CORE_PHY_G			0x815
+#define BCMA_CORE_MIPS_3302		0x816
+#define BCMA_CORE_USB11_HOST		0x817
+#define BCMA_CORE_USB11_DEV		0x818
+#define BCMA_CORE_USB20_HOST		0x819
+#define BCMA_CORE_USB20_DEV		0x81A
+#define BCMA_CORE_SDIO_HOST		0x81B
+#define BCMA_CORE_ROBOSWITCH		0x81C
+#define BCMA_CORE_PARA_ATA		0x81D
+#define BCMA_CORE_SATA_XORDMA		0x81E
+#define BCMA_CORE_ETHERNET_GBIT		0x81F
+#define BCMA_CORE_PCIE			0x820
+#define BCMA_CORE_PHY_N			0x821
+#define BCMA_CORE_SRAM_CTL		0x822
+#define BCMA_CORE_MINI_MACPHY		0x823
+#define BCMA_CORE_ARM_1176		0x824
+#define BCMA_CORE_ARM_7TDMI		0x825
+#define BCMA_CORE_PHY_LP		0x826
+#define BCMA_CORE_PMU			0x827
+#define BCMA_CORE_PHY_SSN		0x828
+#define BCMA_CORE_SDIO_DEV		0x829
+#define BCMA_CORE_ARM_CM3		0x82A
+#define BCMA_CORE_PHY_HT		0x82B
+#define BCMA_CORE_MIPS_74K		0x82C
+#define BCMA_CORE_MAC_GBIT		0x82D
+#define BCMA_CORE_DDR12_MEM_CTL		0x82E
+#define BCMA_CORE_PCIE_RC		0x82F	/* PCIe Root Complex */
+#define BCMA_CORE_OCP_OCP_BRIDGE	0x830
+#define BCMA_CORE_SHARED_COMMON		0x831
+#define BCMA_CORE_OCP_AHB_BRIDGE	0x832
+#define BCMA_CORE_SPI_HOST		0x833
+#define BCMA_CORE_I2S			0x834
+#define BCMA_CORE_SDR_DDR1_MEM_CTL	0x835	/* SDR/DDR1 memory controller core */
+#define BCMA_CORE_SHIM			0x837	/* SHIM component in ubus/6362 */
+#define BCMA_CORE_DEFAULT		0xFFF
+
+#define BCMA_MAX_NR_CORES		16
+
+struct bcma_device {
+	struct bcma_bus *bus;
+	struct bcma_device_id id;
+
+	struct device dev;
+	bool dev_registered;
+
+	u8 core_index;
+
+	u32 addr;
+	u32 wrap;
+
+	void *drvdata;
+	struct list_head list;
+};
+
+static inline void *bcma_get_drvdata(struct bcma_device *core)
+{
+	return core->drvdata;
+}
+static inline void bcma_set_drvdata(struct bcma_device *core, void *drvdata)
+{
+	core->drvdata = drvdata;
+}
+
+struct bcma_driver {
+	const char *name;
+	const struct bcma_device_id *id_table;
+
+	int (*probe)(struct bcma_device *dev);
+	void (*remove)(struct bcma_device *dev);
+	int (*suspend)(struct bcma_device *dev, pm_message_t state);
+	int (*resume)(struct bcma_device *dev);
+	void (*shutdown)(struct bcma_device *dev);
+
+	struct device_driver drv;
+};
+extern
+int __bcma_driver_register(struct bcma_driver *drv, struct module *owner);
+static inline int bcma_driver_register(struct bcma_driver *drv)
+{
+	return __bcma_driver_register(drv, THIS_MODULE);
+}
+extern void bcma_driver_unregister(struct bcma_driver *drv);
+
+struct bcma_bus {
+	/* The MMIO area. */
+	void __iomem *mmio;
+
+	const struct bcma_host_ops *ops;
+
+	enum bcma_hosttype hosttype;
+	union {
+		/* Pointer to the PCI bus (only for BCMA_HOSTTYPE_PCI) */
+		struct pci_dev *host_pci;
+		/* Pointer to the SDIO device (only for BCMA_HOSTTYPE_SDIO) */
+		struct sdio_func *host_sdio;
+	};
+
+	struct bcma_chipinfo chipinfo;
+
+	struct bcma_device *mapped_core;
+	struct list_head cores;
+	u8 nr_cores;
+
+	struct bcma_drv_cc drv_cc;
+	struct bcma_drv_pci drv_pci;
+};
+
+extern inline u32 bcma_read8(struct bcma_device *core, u16 offset)
+{
+	return core->bus->ops->read8(core, offset);
+}
+extern inline u32 bcma_read16(struct bcma_device *core, u16 offset)
+{
+	return core->bus->ops->read16(core, offset);
+}
+extern inline u32 bcma_read32(struct bcma_device *core, u16 offset)
+{
+	return core->bus->ops->read32(core, offset);
+}
+extern inline
+void bcma_write8(struct bcma_device *core, u16 offset, u32 value)
+{
+	core->bus->ops->write8(core, offset, value);
+}
+extern inline
+void bcma_write16(struct bcma_device *core, u16 offset, u32 value)
+{
+	core->bus->ops->write16(core, offset, value);
+}
+extern inline
+void bcma_write32(struct bcma_device *core, u16 offset, u32 value)
+{
+	core->bus->ops->write32(core, offset, value);
+}
+extern inline u32 bcma_aread32(struct bcma_device *core, u16 offset)
+{
+	return core->bus->ops->aread32(core, offset);
+}
+extern inline
+void bcma_awrite32(struct bcma_device *core, u16 offset, u32 value)
+{
+	core->bus->ops->awrite32(core, offset, value);
+}
+
+extern bool bcma_core_is_enabled(struct bcma_device *core);
+extern int bcma_core_enable(struct bcma_device *core, u32 flags);
+
+#endif /* LINUX_BCMA_H_ */
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
new file mode 100644
index 000000000000..4f8fd6a4c1e6
--- /dev/null
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -0,0 +1,297 @@
+#ifndef LINUX_BCMA_DRIVER_CC_H_
+#define LINUX_BCMA_DRIVER_CC_H_
+
+/** ChipCommon core registers. **/
+#define BCMA_CC_ID			0x0000
+#define  BCMA_CC_ID_ID			0x0000FFFF
+#define  BCMA_CC_ID_ID_SHIFT		0
+#define  BCMA_CC_ID_REV			0x000F0000
+#define  BCMA_CC_ID_REV_SHIFT		16
+#define  BCMA_CC_ID_PKG			0x00F00000
+#define  BCMA_CC_ID_PKG_SHIFT		20
+#define  BCMA_CC_ID_NRCORES		0x0F000000
+#define  BCMA_CC_ID_NRCORES_SHIFT	24
+#define  BCMA_CC_ID_TYPE		0xF0000000
+#define  BCMA_CC_ID_TYPE_SHIFT		28
+#define BCMA_CC_CAP			0x0004		/* Capabilities */
+#define  BCMA_CC_CAP_NRUART		0x00000003	/* # of UARTs */
+#define  BCMA_CC_CAP_MIPSEB		0x00000004	/* MIPS in BigEndian Mode */
+#define  BCMA_CC_CAP_UARTCLK		0x00000018	/* UART clock select */
+#define   BCMA_CC_CAP_UARTCLK_INT	0x00000008	/* UARTs are driven by internal divided clock */
+#define  BCMA_CC_CAP_UARTGPIO		0x00000020	/* UARTs on GPIO 15-12 */
+#define  BCMA_CC_CAP_EXTBUS		0x000000C0	/* External buses present */
+#define  BCMA_CC_CAP_FLASHT		0x00000700	/* Flash Type */
+#define   BCMA_CC_FLASHT_NONE		0x00000000	/* No flash */
+#define   BCMA_CC_FLASHT_STSER		0x00000100	/* ST serial flash */
+#define   BCMA_CC_FLASHT_ATSER		0x00000200	/* Atmel serial flash */
+#define	  BCMA_CC_FLASHT_PARA		0x00000700	/* Parallel flash */
+#define  BCMA_CC_CAP_PLLT		0x00038000	/* PLL Type */
+#define   BCMA_PLLTYPE_NONE		0x00000000
+#define   BCMA_PLLTYPE_1		0x00010000	/* 48Mhz base, 3 dividers */
+#define   BCMA_PLLTYPE_2		0x00020000	/* 48Mhz, 4 dividers */
+#define   BCMA_PLLTYPE_3		0x00030000	/* 25Mhz, 2 dividers */
+#define   BCMA_PLLTYPE_4		0x00008000	/* 48Mhz, 4 dividers */
+#define   BCMA_PLLTYPE_5		0x00018000	/* 25Mhz, 4 dividers */
+#define   BCMA_PLLTYPE_6		0x00028000	/* 100/200 or 120/240 only */
+#define   BCMA_PLLTYPE_7		0x00038000	/* 25Mhz, 4 dividers */
+#define  BCMA_CC_CAP_PCTL		0x00040000	/* Power Control */
+#define  BCMA_CC_CAP_OTPS		0x00380000	/* OTP size */
+#define  BCMA_CC_CAP_OTPS_SHIFT		19
+#define  BCMA_CC_CAP_OTPS_BASE		5
+#define  BCMA_CC_CAP_JTAGM		0x00400000	/* JTAG master present */
+#define  BCMA_CC_CAP_BROM		0x00800000	/* Internal boot ROM active */
+#define  BCMA_CC_CAP_64BIT		0x08000000	/* 64-bit Backplane */
+#define  BCMA_CC_CAP_PMU		0x10000000	/* PMU available (rev >= 20) */
+#define  BCMA_CC_CAP_ECI		0x20000000	/* ECI available (rev >= 20) */
+#define  BCMA_CC_CAP_SPROM		0x40000000	/* SPROM present */
+#define BCMA_CC_CORECTL			0x0008
+#define  BCMA_CC_CORECTL_UARTCLK0	0x00000001	/* Drive UART with internal clock */
+#define	 BCMA_CC_CORECTL_SE		0x00000002	/* sync clk out enable (corerev >= 3) */
+#define  BCMA_CC_CORECTL_UARTCLKEN	0x00000008	/* UART clock enable (rev >= 21) */
+#define BCMA_CC_BIST			0x000C
+#define BCMA_CC_OTPS			0x0010		/* OTP status */
+#define	 BCMA_CC_OTPS_PROGFAIL		0x80000000
+#define	 BCMA_CC_OTPS_PROTECT		0x00000007
+#define	 BCMA_CC_OTPS_HW_PROTECT	0x00000001
+#define	 BCMA_CC_OTPS_SW_PROTECT	0x00000002
+#define	 BCMA_CC_OTPS_CID_PROTECT	0x00000004
+#define BCMA_CC_OTPC			0x0014		/* OTP control */
+#define	 BCMA_CC_OTPC_RECWAIT		0xFF000000
+#define	 BCMA_CC_OTPC_PROGWAIT		0x00FFFF00
+#define	 BCMA_CC_OTPC_PRW_SHIFT		8
+#define	 BCMA_CC_OTPC_MAXFAIL		0x00000038
+#define	 BCMA_CC_OTPC_VSEL		0x00000006
+#define	 BCMA_CC_OTPC_SELVL		0x00000001
+#define BCMA_CC_OTPP			0x0018		/* OTP prog */
+#define	 BCMA_CC_OTPP_COL		0x000000FF
+#define	 BCMA_CC_OTPP_ROW		0x0000FF00
+#define	 BCMA_CC_OTPP_ROW_SHIFT		8
+#define	 BCMA_CC_OTPP_READERR		0x10000000
+#define	 BCMA_CC_OTPP_VALUE		0x20000000
+#define	 BCMA_CC_OTPP_READ		0x40000000
+#define	 BCMA_CC_OTPP_START		0x80000000
+#define	 BCMA_CC_OTPP_BUSY		0x80000000
+#define BCMA_CC_IRQSTAT			0x0020
+#define BCMA_CC_IRQMASK			0x0024
+#define	 BCMA_CC_IRQ_GPIO		0x00000001	/* gpio intr */
+#define	 BCMA_CC_IRQ_EXT		0x00000002	/* ro: ext intr pin (corerev >= 3) */
+#define	 BCMA_CC_IRQ_WDRESET		0x80000000	/* watchdog reset occurred */
+#define BCMA_CC_CHIPCTL			0x0028		/* Rev >= 11 only */
+#define BCMA_CC_CHIPSTAT		0x002C		/* Rev >= 11 only */
+#define BCMA_CC_JCMD			0x0030		/* Rev >= 10 only */
+#define  BCMA_CC_JCMD_START		0x80000000
+#define  BCMA_CC_JCMD_BUSY		0x80000000
+#define  BCMA_CC_JCMD_PAUSE		0x40000000
+#define  BCMA_CC_JCMD0_ACC_MASK		0x0000F000
+#define  BCMA_CC_JCMD0_ACC_IRDR		0x00000000
+#define  BCMA_CC_JCMD0_ACC_DR		0x00001000
+#define  BCMA_CC_JCMD0_ACC_IR		0x00002000
+#define  BCMA_CC_JCMD0_ACC_RESET	0x00003000
+#define  BCMA_CC_JCMD0_ACC_IRPDR	0x00004000
+#define  BCMA_CC_JCMD0_ACC_PDR		0x00005000
+#define  BCMA_CC_JCMD0_IRW_MASK		0x00000F00
+#define  BCMA_CC_JCMD_ACC_MASK		0x000F0000	/* Changes for corerev 11 */
+#define  BCMA_CC_JCMD_ACC_IRDR		0x00000000
+#define  BCMA_CC_JCMD_ACC_DR		0x00010000
+#define  BCMA_CC_JCMD_ACC_IR		0x00020000
+#define  BCMA_CC_JCMD_ACC_RESET		0x00030000
+#define  BCMA_CC_JCMD_ACC_IRPDR		0x00040000
+#define  BCMA_CC_JCMD_ACC_PDR		0x00050000
+#define  BCMA_CC_JCMD_IRW_MASK		0x00001F00
+#define  BCMA_CC_JCMD_IRW_SHIFT		8
+#define  BCMA_CC_JCMD_DRW_MASK		0x0000003F
+#define BCMA_CC_JIR			0x0034		/* Rev >= 10 only */
+#define BCMA_CC_JDR			0x0038		/* Rev >= 10 only */
+#define BCMA_CC_JCTL			0x003C		/* Rev >= 10 only */
+#define  BCMA_CC_JCTL_FORCE_CLK		4		/* Force clock */
+#define  BCMA_CC_JCTL_EXT_EN		2		/* Enable external targets */
+#define  BCMA_CC_JCTL_EN		1		/* Enable Jtag master */
+#define BCMA_CC_FLASHCTL		0x0040
+#define  BCMA_CC_FLASHCTL_START		0x80000000
+#define  BCMA_CC_FLASHCTL_BUSY		BCMA_CC_FLASHCTL_START
+#define BCMA_CC_FLASHADDR		0x0044
+#define BCMA_CC_FLASHDATA		0x0048
+#define BCMA_CC_BCAST_ADDR		0x0050
+#define BCMA_CC_BCAST_DATA		0x0054
+#define BCMA_CC_GPIOIN			0x0060
+#define BCMA_CC_GPIOOUT			0x0064
+#define BCMA_CC_GPIOOUTEN		0x0068
+#define BCMA_CC_GPIOCTL			0x006C
+#define BCMA_CC_GPIOPOL			0x0070
+#define BCMA_CC_GPIOIRQ			0x0074
+#define BCMA_CC_WATCHDOG		0x0080
+#define BCMA_CC_GPIOTIMER		0x0088		/* LED powersave (corerev >= 16) */
+#define  BCMA_CC_GPIOTIMER_ONTIME_SHIFT	16
+#define BCMA_CC_GPIOTOUTM		0x008C		/* LED powersave (corerev >= 16) */
+#define BCMA_CC_CLOCK_N			0x0090
+#define BCMA_CC_CLOCK_SB		0x0094
+#define BCMA_CC_CLOCK_PCI		0x0098
+#define BCMA_CC_CLOCK_M2		0x009C
+#define BCMA_CC_CLOCK_MIPS		0x00A0
+#define BCMA_CC_CLKDIV			0x00A4		/* Rev >= 3 only */
+#define	 BCMA_CC_CLKDIV_SFLASH		0x0F000000
+#define	 BCMA_CC_CLKDIV_SFLASH_SHIFT	24
+#define	 BCMA_CC_CLKDIV_OTP		0x000F0000
+#define	 BCMA_CC_CLKDIV_OTP_SHIFT	16
+#define	 BCMA_CC_CLKDIV_JTAG		0x00000F00
+#define	 BCMA_CC_CLKDIV_JTAG_SHIFT	8
+#define	 BCMA_CC_CLKDIV_UART		0x000000FF
+#define BCMA_CC_CAP_EXT			0x00AC		/* Capabilities */
+#define BCMA_CC_PLLONDELAY		0x00B0		/* Rev >= 4 only */
+#define BCMA_CC_FREFSELDELAY		0x00B4		/* Rev >= 4 only */
+#define BCMA_CC_SLOWCLKCTL		0x00B8		/* 6 <= Rev <= 9 only */
+#define  BCMA_CC_SLOWCLKCTL_SRC		0x00000007	/* slow clock source mask */
+#define	  BCMA_CC_SLOWCLKCTL_SRC_LPO	0x00000000	/* source of slow clock is LPO */
+#define   BCMA_CC_SLOWCLKCTL_SRC_XTAL	0x00000001	/* source of slow clock is crystal */
+#define	  BCMA_CC_SLOECLKCTL_SRC_PCI	0x00000002	/* source of slow clock is PCI */
+#define  BCMA_CC_SLOWCLKCTL_LPOFREQ	0x00000200	/* LPOFreqSel, 1: 160Khz, 0: 32KHz */
+#define  BCMA_CC_SLOWCLKCTL_LPOPD	0x00000400	/* LPOPowerDown, 1: LPO is disabled, 0: LPO is enabled */
+#define  BCMA_CC_SLOWCLKCTL_FSLOW	0x00000800	/* ForceSlowClk, 1: sb/cores running on slow clock, 0: power logic control */
+#define  BCMA_CC_SLOWCLKCTL_IPLL	0x00001000	/* IgnorePllOffReq, 1/0: power logic ignores/honors PLL clock disable requests from core */
+#define  BCMA_CC_SLOWCLKCTL_ENXTAL	0x00002000	/* XtalControlEn, 1/0: power logic does/doesn't disable crystal when appropriate */
+#define  BCMA_CC_SLOWCLKCTL_XTALPU	0x00004000	/* XtalPU (RO), 1/0: crystal running/disabled */
+#define  BCMA_CC_SLOWCLKCTL_CLKDIV	0xFFFF0000	/* ClockDivider (SlowClk = 1/(4+divisor)) */
+#define  BCMA_CC_SLOWCLKCTL_CLKDIV_SHIFT	16
+#define BCMA_CC_SYSCLKCTL		0x00C0		/* Rev >= 3 only */
+#define	 BCMA_CC_SYSCLKCTL_IDLPEN	0x00000001	/* ILPen: Enable Idle Low Power */
+#define	 BCMA_CC_SYSCLKCTL_ALPEN	0x00000002	/* ALPen: Enable Active Low Power */
+#define	 BCMA_CC_SYSCLKCTL_PLLEN	0x00000004	/* ForcePLLOn */
+#define	 BCMA_CC_SYSCLKCTL_FORCEALP	0x00000008	/* Force ALP (or HT if ALPen is not set */
+#define	 BCMA_CC_SYSCLKCTL_FORCEHT	0x00000010	/* Force HT */
+#define  BCMA_CC_SYSCLKCTL_CLKDIV	0xFFFF0000	/* ClkDiv  (ILP = 1/(4+divisor)) */
+#define  BCMA_CC_SYSCLKCTL_CLKDIV_SHIFT	16
+#define BCMA_CC_CLKSTSTR		0x00C4		/* Rev >= 3 only */
+#define BCMA_CC_EROM			0x00FC
+#define BCMA_CC_PCMCIA_CFG		0x0100
+#define BCMA_CC_PCMCIA_MEMWAIT		0x0104
+#define BCMA_CC_PCMCIA_ATTRWAIT		0x0108
+#define BCMA_CC_PCMCIA_IOWAIT		0x010C
+#define BCMA_CC_IDE_CFG			0x0110
+#define BCMA_CC_IDE_MEMWAIT		0x0114
+#define BCMA_CC_IDE_ATTRWAIT		0x0118
+#define BCMA_CC_IDE_IOWAIT		0x011C
+#define BCMA_CC_PROG_CFG		0x0120
+#define BCMA_CC_PROG_WAITCNT		0x0124
+#define BCMA_CC_FLASH_CFG		0x0128
+#define BCMA_CC_FLASH_WAITCNT		0x012C
+#define BCMA_CC_CLKCTLST		0x01E0 /* Clock control and status (rev >= 20) */
+#define  BCMA_CC_CLKCTLST_FORCEALP	0x00000001 /* Force ALP request */
+#define  BCMA_CC_CLKCTLST_FORCEHT	0x00000002 /* Force HT request */
+#define  BCMA_CC_CLKCTLST_FORCEILP	0x00000004 /* Force ILP request */
+#define  BCMA_CC_CLKCTLST_HAVEALPREQ	0x00000008 /* ALP available request */
+#define  BCMA_CC_CLKCTLST_HAVEHTREQ	0x00000010 /* HT available request */
+#define  BCMA_CC_CLKCTLST_HWCROFF	0x00000020 /* Force HW clock request off */
+#define  BCMA_CC_CLKCTLST_HAVEHT	0x00010000 /* HT available */
+#define  BCMA_CC_CLKCTLST_HAVEALP	0x00020000 /* APL available */
+#define BCMA_CC_HW_WORKAROUND		0x01E4 /* Hardware workaround (rev >= 20) */
+#define BCMA_CC_UART0_DATA		0x0300
+#define BCMA_CC_UART0_IMR		0x0304
+#define BCMA_CC_UART0_FCR		0x0308
+#define BCMA_CC_UART0_LCR		0x030C
+#define BCMA_CC_UART0_MCR		0x0310
+#define BCMA_CC_UART0_LSR		0x0314
+#define BCMA_CC_UART0_MSR		0x0318
+#define BCMA_CC_UART0_SCRATCH		0x031C
+#define BCMA_CC_UART1_DATA		0x0400
+#define BCMA_CC_UART1_IMR		0x0404
+#define BCMA_CC_UART1_FCR		0x0408
+#define BCMA_CC_UART1_LCR		0x040C
+#define BCMA_CC_UART1_MCR		0x0410
+#define BCMA_CC_UART1_LSR		0x0414
+#define BCMA_CC_UART1_MSR		0x0418
+#define BCMA_CC_UART1_SCRATCH		0x041C
+/* PMU registers (rev >= 20) */
+#define BCMA_CC_PMU_CTL			0x0600 /* PMU control */
+#define  BCMA_CC_PMU_CTL_ILP_DIV	0xFFFF0000 /* ILP div mask */
+#define  BCMA_CC_PMU_CTL_ILP_DIV_SHIFT	16
+#define  BCMA_CC_PMU_CTL_NOILPONW	0x00000200 /* No ILP on wait */
+#define  BCMA_CC_PMU_CTL_HTREQEN	0x00000100 /* HT req enable */
+#define  BCMA_CC_PMU_CTL_ALPREQEN	0x00000080 /* ALP req enable */
+#define  BCMA_CC_PMU_CTL_XTALFREQ	0x0000007C /* Crystal freq */
+#define  BCMA_CC_PMU_CTL_XTALFREQ_SHIFT	2
+#define  BCMA_CC_PMU_CTL_ILPDIVEN	0x00000002 /* ILP div enable */
+#define  BCMA_CC_PMU_CTL_LPOSEL		0x00000001 /* LPO sel */
+#define BCMA_CC_PMU_CAP			0x0604 /* PMU capabilities */
+#define  BCMA_CC_PMU_CAP_REVISION	0x000000FF /* Revision mask */
+#define BCMA_CC_PMU_STAT		0x0608 /* PMU status */
+#define  BCMA_CC_PMU_STAT_INTPEND	0x00000040 /* Interrupt pending */
+#define  BCMA_CC_PMU_STAT_SBCLKST	0x00000030 /* Backplane clock status? */
+#define  BCMA_CC_PMU_STAT_HAVEALP	0x00000008 /* ALP available */
+#define  BCMA_CC_PMU_STAT_HAVEHT	0x00000004 /* HT available */
+#define  BCMA_CC_PMU_STAT_RESINIT	0x00000003 /* Res init */
+#define BCMA_CC_PMU_RES_STAT		0x060C /* PMU res status */
+#define BCMA_CC_PMU_RES_PEND		0x0610 /* PMU res pending */
+#define BCMA_CC_PMU_TIMER		0x0614 /* PMU timer */
+#define BCMA_CC_PMU_MINRES_MSK		0x0618 /* PMU min res mask */
+#define BCMA_CC_PMU_MAXRES_MSK		0x061C /* PMU max res mask */
+#define BCMA_CC_PMU_RES_TABSEL		0x0620 /* PMU res table sel */
+#define BCMA_CC_PMU_RES_DEPMSK		0x0624 /* PMU res dep mask */
+#define BCMA_CC_PMU_RES_UPDNTM		0x0628 /* PMU res updown timer */
+#define BCMA_CC_PMU_RES_TIMER		0x062C /* PMU res timer */
+#define BCMA_CC_PMU_CLKSTRETCH		0x0630 /* PMU clockstretch */
+#define BCMA_CC_PMU_WATCHDOG		0x0634 /* PMU watchdog */
+#define BCMA_CC_PMU_RES_REQTS		0x0640 /* PMU res req timer sel */
+#define BCMA_CC_PMU_RES_REQT		0x0644 /* PMU res req timer */
+#define BCMA_CC_PMU_RES_REQM		0x0648 /* PMU res req mask */
+#define BCMA_CC_CHIPCTL_ADDR		0x0650
+#define BCMA_CC_CHIPCTL_DATA		0x0654
+#define BCMA_CC_REGCTL_ADDR		0x0658
+#define BCMA_CC_REGCTL_DATA		0x065C
+#define BCMA_CC_PLLCTL_ADDR		0x0660
+#define BCMA_CC_PLLCTL_DATA		0x0664
+
+/* Data for the PMU, if available.
+ * Check availability with ((struct bcma_chipcommon)->capabilities & BCMA_CC_CAP_PMU)
+ */
+struct bcma_chipcommon_pmu {
+	u8 rev;			/* PMU revision */
+	u32 crystalfreq;	/* The active crystal frequency (in kHz) */
+};
+
+struct bcma_drv_cc {
+	struct bcma_device *core;
+	u32 status;
+	u32 capabilities;
+	u32 capabilities_ext;
+	/* Fast Powerup Delay constant */
+	u16 fast_pwrup_delay;
+	struct bcma_chipcommon_pmu pmu;
+};
+
+/* Register access */
+#define bcma_cc_read32(cc, offset) \
+	bcma_read32((cc)->core, offset)
+#define bcma_cc_write32(cc, offset, val) \
+	bcma_write32((cc)->core, offset, val)
+
+#define bcma_cc_mask32(cc, offset, mask) \
+	bcma_cc_write32(cc, offset, bcma_cc_read32(cc, offset) & (mask))
+#define bcma_cc_set32(cc, offset, set) \
+	bcma_cc_write32(cc, offset, bcma_cc_read32(cc, offset) | (set))
+#define bcma_cc_maskset32(cc, offset, mask, set) \
+	bcma_cc_write32(cc, offset, (bcma_cc_read32(cc, offset) & (mask)) | (set))
+
+extern void bcma_core_chipcommon_init(struct bcma_drv_cc *cc);
+
+extern void bcma_chipco_suspend(struct bcma_drv_cc *cc);
+extern void bcma_chipco_resume(struct bcma_drv_cc *cc);
+
+extern void bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc,
+					  u32 ticks);
+
+void bcma_chipco_irq_mask(struct bcma_drv_cc *cc, u32 mask, u32 value);
+
+u32 bcma_chipco_irq_status(struct bcma_drv_cc *cc, u32 mask);
+
+/* Chipcommon GPIO pin access. */
+u32 bcma_chipco_gpio_in(struct bcma_drv_cc *cc, u32 mask);
+u32 bcma_chipco_gpio_out(struct bcma_drv_cc *cc, u32 mask, u32 value);
+u32 bcma_chipco_gpio_outen(struct bcma_drv_cc *cc, u32 mask, u32 value);
+u32 bcma_chipco_gpio_control(struct bcma_drv_cc *cc, u32 mask, u32 value);
+u32 bcma_chipco_gpio_intmask(struct bcma_drv_cc *cc, u32 mask, u32 value);
+u32 bcma_chipco_gpio_polarity(struct bcma_drv_cc *cc, u32 mask, u32 value);
+
+/* PMU support */
+extern void bcma_pmu_init(struct bcma_drv_cc *cc);
+
+#endif /* LINUX_BCMA_DRIVER_CC_H_ */
diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h
new file mode 100644
index 000000000000..b7e191cf00ec
--- /dev/null
+++ b/include/linux/bcma/bcma_driver_pci.h
@@ -0,0 +1,89 @@
+#ifndef LINUX_BCMA_DRIVER_PCI_H_
+#define LINUX_BCMA_DRIVER_PCI_H_
+
+#include <linux/types.h>
+
+struct pci_dev;
+
+/** PCI core registers. **/
+#define BCMA_CORE_PCI_CTL			0x0000	/* PCI Control */
+#define  BCMA_CORE_PCI_CTL_RST_OE		0x00000001 /* PCI_RESET Output Enable */
+#define  BCMA_CORE_PCI_CTL_RST			0x00000002 /* PCI_RESET driven out to pin */
+#define  BCMA_CORE_PCI_CTL_CLK_OE		0x00000004 /* Clock gate Output Enable */
+#define  BCMA_CORE_PCI_CTL_CLK			0x00000008 /* Gate for clock driven out to pin */
+#define BCMA_CORE_PCI_ARBCTL			0x0010	/* PCI Arbiter Control */
+#define  BCMA_CORE_PCI_ARBCTL_INTERN		0x00000001 /* Use internal arbiter */
+#define  BCMA_CORE_PCI_ARBCTL_EXTERN		0x00000002 /* Use external arbiter */
+#define  BCMA_CORE_PCI_ARBCTL_PARKID		0x00000006 /* Mask, selects which agent is parked on an idle bus */
+#define   BCMA_CORE_PCI_ARBCTL_PARKID_LAST	0x00000000 /* Last requestor */
+#define   BCMA_CORE_PCI_ARBCTL_PARKID_4710	0x00000002 /* 4710 */
+#define   BCMA_CORE_PCI_ARBCTL_PARKID_EXT0	0x00000004 /* External requestor 0 */
+#define   BCMA_CORE_PCI_ARBCTL_PARKID_EXT1	0x00000006 /* External requestor 1 */
+#define BCMA_CORE_PCI_ISTAT			0x0020	/* Interrupt status */
+#define  BCMA_CORE_PCI_ISTAT_INTA		0x00000001 /* PCI INTA# */
+#define  BCMA_CORE_PCI_ISTAT_INTB		0x00000002 /* PCI INTB# */
+#define  BCMA_CORE_PCI_ISTAT_SERR		0x00000004 /* PCI SERR# (write to clear) */
+#define  BCMA_CORE_PCI_ISTAT_PERR		0x00000008 /* PCI PERR# (write to clear) */
+#define  BCMA_CORE_PCI_ISTAT_PME		0x00000010 /* PCI PME# */
+#define BCMA_CORE_PCI_IMASK			0x0024	/* Interrupt mask */
+#define  BCMA_CORE_PCI_IMASK_INTA		0x00000001 /* PCI INTA# */
+#define  BCMA_CORE_PCI_IMASK_INTB		0x00000002 /* PCI INTB# */
+#define  BCMA_CORE_PCI_IMASK_SERR		0x00000004 /* PCI SERR# */
+#define  BCMA_CORE_PCI_IMASK_PERR		0x00000008 /* PCI PERR# */
+#define  BCMA_CORE_PCI_IMASK_PME		0x00000010 /* PCI PME# */
+#define BCMA_CORE_PCI_MBOX			0x0028	/* Backplane to PCI Mailbox */
+#define  BCMA_CORE_PCI_MBOX_F0_0		0x00000100 /* PCI function 0, INT 0 */
+#define  BCMA_CORE_PCI_MBOX_F0_1		0x00000200 /* PCI function 0, INT 1 */
+#define  BCMA_CORE_PCI_MBOX_F1_0		0x00000400 /* PCI function 1, INT 0 */
+#define  BCMA_CORE_PCI_MBOX_F1_1		0x00000800 /* PCI function 1, INT 1 */
+#define  BCMA_CORE_PCI_MBOX_F2_0		0x00001000 /* PCI function 2, INT 0 */
+#define  BCMA_CORE_PCI_MBOX_F2_1		0x00002000 /* PCI function 2, INT 1 */
+#define  BCMA_CORE_PCI_MBOX_F3_0		0x00004000 /* PCI function 3, INT 0 */
+#define  BCMA_CORE_PCI_MBOX_F3_1		0x00008000 /* PCI function 3, INT 1 */
+#define BCMA_CORE_PCI_BCAST_ADDR		0x0050	/* Backplane Broadcast Address */
+#define  BCMA_CORE_PCI_BCAST_ADDR_MASK		0x000000FF
+#define BCMA_CORE_PCI_BCAST_DATA		0x0054	/* Backplane Broadcast Data */
+#define BCMA_CORE_PCI_GPIO_IN			0x0060	/* rev >= 2 only */
+#define BCMA_CORE_PCI_GPIO_OUT			0x0064	/* rev >= 2 only */
+#define BCMA_CORE_PCI_GPIO_ENABLE		0x0068	/* rev >= 2 only */
+#define BCMA_CORE_PCI_GPIO_CTL			0x006C	/* rev >= 2 only */
+#define BCMA_CORE_PCI_SBTOPCI0			0x0100	/* Backplane to PCI translation 0 (sbtopci0) */
+#define  BCMA_CORE_PCI_SBTOPCI0_MASK		0xFC000000
+#define BCMA_CORE_PCI_SBTOPCI1			0x0104	/* Backplane to PCI translation 1 (sbtopci1) */
+#define  BCMA_CORE_PCI_SBTOPCI1_MASK		0xFC000000
+#define BCMA_CORE_PCI_SBTOPCI2			0x0108	/* Backplane to PCI translation 2 (sbtopci2) */
+#define  BCMA_CORE_PCI_SBTOPCI2_MASK		0xC0000000
+#define BCMA_CORE_PCI_PCICFG0			0x0400	/* PCI config space 0 (rev >= 8) */
+#define BCMA_CORE_PCI_PCICFG1			0x0500	/* PCI config space 1 (rev >= 8) */
+#define BCMA_CORE_PCI_PCICFG2			0x0600	/* PCI config space 2 (rev >= 8) */
+#define BCMA_CORE_PCI_PCICFG3			0x0700	/* PCI config space 3 (rev >= 8) */
+#define BCMA_CORE_PCI_SPROM(wordoffset)		(0x0800 + ((wordoffset) * 2)) /* SPROM shadow area (72 bytes) */
+
+/* SBtoPCIx */
+#define BCMA_CORE_PCI_SBTOPCI_MEM		0x00000000
+#define BCMA_CORE_PCI_SBTOPCI_IO		0x00000001
+#define BCMA_CORE_PCI_SBTOPCI_CFG0		0x00000002
+#define BCMA_CORE_PCI_SBTOPCI_CFG1		0x00000003
+#define BCMA_CORE_PCI_SBTOPCI_PREF		0x00000004 /* Prefetch enable */
+#define BCMA_CORE_PCI_SBTOPCI_BURST		0x00000008 /* Burst enable */
+#define BCMA_CORE_PCI_SBTOPCI_MRM		0x00000020 /* Memory Read Multiple */
+#define BCMA_CORE_PCI_SBTOPCI_RC		0x00000030 /* Read Command mask (rev >= 11) */
+#define  BCMA_CORE_PCI_SBTOPCI_RC_READ		0x00000000 /* Memory read */
+#define  BCMA_CORE_PCI_SBTOPCI_RC_READL		0x00000010 /* Memory read line */
+#define  BCMA_CORE_PCI_SBTOPCI_RC_READM		0x00000020 /* Memory read multiple */
+
+/* PCIcore specific boardflags */
+#define BCMA_CORE_PCI_BFL_NOPCI			0x00000400 /* Board leaves PCI floating */
+
+struct bcma_drv_pci {
+	struct bcma_device *core;
+	u8 setup_done:1;
+};
+
+/* Register access */
+#define pcicore_read32(pc, offset)		bcma_read32((pc)->core, offset)
+#define pcicore_write32(pc, offset, val)	bcma_write32((pc)->core, offset, val)
+
+extern void bcma_core_pci_init(struct bcma_drv_pci *pc);
+
+#endif /* LINUX_BCMA_DRIVER_PCI_H_ */
diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h
new file mode 100644
index 000000000000..f82d88a960ce
--- /dev/null
+++ b/include/linux/bcma/bcma_regs.h
@@ -0,0 +1,34 @@
+#ifndef LINUX_BCMA_REGS_H_
+#define LINUX_BCMA_REGS_H_
+
+/* Agent registers (common for every core) */
+#define BCMA_IOCTL			0x0408
+#define  BCMA_IOCTL_CLK			0x0001
+#define  BCMA_IOCTL_FGC			0x0002
+#define  BCMA_IOCTL_CORE_BITS		0x3FFC
+#define  BCMA_IOCTL_PME_EN		0x4000
+#define  BCMA_IOCTL_BIST_EN		0x8000
+#define BCMA_RESET_CTL			0x0800
+#define  BCMA_RESET_CTL_RESET		0x0001
+
+/* BCMA PCI config space registers. */
+#define BCMA_PCI_PMCSR			0x44
+#define  BCMA_PCI_PE			0x100
+#define BCMA_PCI_BAR0_WIN		0x80	/* Backplane address space 0 */
+#define BCMA_PCI_BAR1_WIN		0x84	/* Backplane address space 1 */
+#define BCMA_PCI_SPROMCTL		0x88	/* SPROM control */
+#define  BCMA_PCI_SPROMCTL_WE		0x10	/* SPROM write enable */
+#define BCMA_PCI_BAR1_CONTROL		0x8c	/* Address space 1 burst control */
+#define BCMA_PCI_IRQS			0x90	/* PCI interrupts */
+#define BCMA_PCI_IRQMASK		0x94	/* PCI IRQ control and mask (pcirev >= 6 only) */
+#define BCMA_PCI_BACKPLANE_IRQS		0x98	/* Backplane Interrupts */
+#define BCMA_PCI_BAR0_WIN2		0xAC
+#define BCMA_PCI_GPIO_IN		0xB0	/* GPIO Input (pcirev >= 3 only) */
+#define BCMA_PCI_GPIO_OUT		0xB4	/* GPIO Output (pcirev >= 3 only) */
+#define BCMA_PCI_GPIO_OUT_ENABLE	0xB8	/* GPIO Output Enable/Disable (pcirev >= 3 only) */
+#define  BCMA_PCI_GPIO_SCS		0x10	/* PCI config space bit 4 for 4306c0 slow clock source */
+#define  BCMA_PCI_GPIO_HWRAD		0x20	/* PCI config space GPIO 13 for hw radio disable */
+#define  BCMA_PCI_GPIO_XTAL		0x40	/* PCI config space GPIO 14 for Xtal powerup */
+#define  BCMA_PCI_GPIO_PLL		0x80	/* PCI config space GPIO 15 for PLL powerdown */
+
+#endif /* LINUX_BCMA_REGS_H_ */
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 48c007dae476..ae28e93fd072 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -382,6 +382,23 @@ struct ssb_device_id {
 #define SSB_ANY_ID		0xFFFF
 #define SSB_ANY_REV		0xFF
 
+/* Broadcom's specific AMBA core, see drivers/bcma/ */
+struct bcma_device_id {
+	__u16	manuf;
+	__u16	id;
+	__u8	rev;
+	__u8	class;
+};
+#define BCMA_CORE(_manuf, _id, _rev, _class)  \
+	{ .manuf = _manuf, .id = _id, .rev = _rev, .class = _class, }
+#define BCMA_CORETABLE_END  \
+	{ 0, },
+
+#define BCMA_ANY_MANUF		0xFFFF
+#define BCMA_ANY_ID		0xFFFF
+#define BCMA_ANY_REV		0xFF
+#define BCMA_ANY_CLASS		0xFF
+
 struct virtio_device_id {
 	__u32 device;
 	__u32 vendor;
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 88f3f07205f8..e26e2fb462d4 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -702,6 +702,24 @@ static int do_ssb_entry(const char *filename,
 	return 1;
 }
 
+/* Looks like: bcma:mNidNrevNclN. */
+static int do_bcma_entry(const char *filename,
+			 struct bcma_device_id *id, char *alias)
+{
+	id->manuf = TO_NATIVE(id->manuf);
+	id->id = TO_NATIVE(id->id);
+	id->rev = TO_NATIVE(id->rev);
+	id->class = TO_NATIVE(id->class);
+
+	strcpy(alias, "bcma:");
+	ADD(alias, "m", id->manuf != BCMA_ANY_MANUF, id->manuf);
+	ADD(alias, "id", id->id != BCMA_ANY_ID, id->id);
+	ADD(alias, "rev", id->rev != BCMA_ANY_REV, id->rev);
+	ADD(alias, "cl", id->class != BCMA_ANY_CLASS, id->class);
+	add_wildcard(alias);
+	return 1;
+}
+
 /* Looks like: virtio:dNvN */
 static int do_virtio_entry(const char *filename, struct virtio_device_id *id,
 			   char *alias)
@@ -968,6 +986,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info,
 		do_table(symval, sym->st_size,
 			 sizeof(struct ssb_device_id), "ssb",
 			 do_ssb_entry, mod);
+	else if (sym_is(symname, "__mod_bcma_device_table"))
+		do_table(symval, sym->st_size,
+			 sizeof(struct bcma_device_id), "bcma",
+			 do_bcma_entry, mod);
 	else if (sym_is(symname, "__mod_virtio_device_table"))
 		do_table(symval, sym->st_size,
 			 sizeof(struct virtio_device_id), "virtio",
-- 
cgit v1.2.3-71-gd317


From c29c3f70c9eb6f18090da5af9dbe9dcb4adece8c Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Tue, 20 Apr 2010 17:58:24 -0400
Subject: tipc: Abort excessive send requests as early as possible

Adds checks to TIPC's socket send routines to promptly detect and
abort attempts to send more than 66,000 bytes in a single TIPC
message or more than 2**31-1 bytes in a single TIPC byte stream request.
In addition, this ensures that the number of iovecs in a send request
does not exceed the limits of a standard integer variable.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/tipc.h |  2 +-
 net/tipc/socket.c    | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tipc.h b/include/linux/tipc.h
index a5b994a204d2..f2d90091cc20 100644
--- a/include/linux/tipc.h
+++ b/include/linux/tipc.h
@@ -101,7 +101,7 @@ static inline unsigned int tipc_node(__u32 addr)
  * Limiting values for messages
  */
 
-#define TIPC_MAX_USER_MSG_SIZE	66000
+#define TIPC_MAX_USER_MSG_SIZE	66000U
 
 /*
  * Message importance levels
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 29d94d53198d..e1c791798ba1 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -535,6 +535,9 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
 	if (unlikely((m->msg_namelen < sizeof(*dest)) ||
 		     (dest->family != AF_TIPC)))
 		return -EINVAL;
+	if ((total_len > TIPC_MAX_USER_MSG_SIZE) ||
+	    (m->msg_iovlen > (unsigned)INT_MAX))
+		return -EMSGSIZE;
 
 	if (iocb)
 		lock_sock(sk);
@@ -640,6 +643,10 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
 	if (unlikely(dest))
 		return send_msg(iocb, sock, m, total_len);
 
+	if ((total_len > TIPC_MAX_USER_MSG_SIZE) ||
+	    (m->msg_iovlen > (unsigned)INT_MAX))
+		return -EMSGSIZE;
+
 	if (iocb)
 		lock_sock(sk);
 
@@ -723,6 +730,12 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
 		goto exit;
 	}
 
+	if ((total_len > (unsigned)INT_MAX) ||
+	    (m->msg_iovlen > (unsigned)INT_MAX)) {
+		res = -EMSGSIZE;
+		goto exit;
+	}
+
 	/*
 	 * Send each iovec entry using one or more messages
 	 *
-- 
cgit v1.2.3-71-gd317


From f30e6d3e419bfb5540fa82ba7eca01d578556e6b Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Fri, 22 Apr 2011 15:13:54 +0200
Subject: firewire: octlet AT payloads can be stack-allocated

We do not need slab allocations anymore in order to satisfy
streaming DMA mapping constraints, thanks to commit da28947e7e36
"firewire: ohci: avoid separate DMA mapping for small AT payloads".

(Besides, the slab-allocated buffers that firewire-core, firewire-sbp2,
and firedtv used to provide for 8-byte write and lock requests were
still not fully portable since they crossed cacheline boundaries or
shared a cacheline with unrelated CPU-accessed data.  snd-firewire-lib
got this aspect right by using an extra kmalloc/ kfree just for the
8-byte transaction buffer.)

This change replaces kmalloc'ed lock transaction scratch buffers in
firewire-core, firedtv, and snd-firewire-lib by local stack allocations.
Perhaps the most notable result of the change is simpler locking because
there is no need to serialize usages of preallocated per-device buffers
anymore.  Also, allocations and deallocations are simpler.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Acked-by: Clemens Ladisch <clemens@ladisch.de>
---
 drivers/firewire/core-card.c             | 16 ++++++++--------
 drivers/firewire/core-cdev.c             |  4 +---
 drivers/firewire/core-iso.c              | 21 +++++++++++----------
 drivers/firewire/core-transaction.c      |  7 ++++---
 drivers/media/dvb/firewire/firedtv-avc.c | 15 +--------------
 include/linux/firewire.h                 |  3 +--
 sound/firewire/cmp.c                     |  3 +--
 sound/firewire/iso-resources.c           | 12 +++---------
 sound/firewire/iso-resources.h           |  1 -
 9 files changed, 30 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index 3c44fbc81acb..e119f1e6ba47 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -258,8 +258,7 @@ static void allocate_broadcast_channel(struct fw_card *card, int generation)
 
 	if (!card->broadcast_channel_allocated) {
 		fw_iso_resource_manage(card, generation, 1ULL << 31,
-				       &channel, &bandwidth, true,
-				       card->bm_transaction_data);
+				       &channel, &bandwidth, true);
 		if (channel != 31) {
 			fw_notify("failed to allocate broadcast channel\n");
 			return;
@@ -294,6 +293,7 @@ static void bm_work(struct work_struct *work)
 	bool root_device_is_cmc;
 	bool irm_is_1394_1995_only;
 	bool keep_this_irm;
+	__be32 transaction_data[2];
 
 	spin_lock_irq(&card->lock);
 
@@ -355,21 +355,21 @@ static void bm_work(struct work_struct *work)
 			goto pick_me;
 		}
 
-		card->bm_transaction_data[0] = cpu_to_be32(0x3f);
-		card->bm_transaction_data[1] = cpu_to_be32(local_id);
+		transaction_data[0] = cpu_to_be32(0x3f);
+		transaction_data[1] = cpu_to_be32(local_id);
 
 		spin_unlock_irq(&card->lock);
 
 		rcode = fw_run_transaction(card, TCODE_LOCK_COMPARE_SWAP,
 				irm_id, generation, SCODE_100,
 				CSR_REGISTER_BASE + CSR_BUS_MANAGER_ID,
-				card->bm_transaction_data, 8);
+				transaction_data, 8);
 
 		if (rcode == RCODE_GENERATION)
 			/* Another bus reset, BM work has been rescheduled. */
 			goto out;
 
-		bm_id = be32_to_cpu(card->bm_transaction_data[0]);
+		bm_id = be32_to_cpu(transaction_data[0]);
 
 		spin_lock_irq(&card->lock);
 		if (rcode == RCODE_COMPLETE && generation == card->generation)
@@ -490,11 +490,11 @@ static void bm_work(struct work_struct *work)
 		/*
 		 * Make sure that the cycle master sends cycle start packets.
 		 */
-		card->bm_transaction_data[0] = cpu_to_be32(CSR_STATE_BIT_CMSTR);
+		transaction_data[0] = cpu_to_be32(CSR_STATE_BIT_CMSTR);
 		rcode = fw_run_transaction(card, TCODE_WRITE_QUADLET_REQUEST,
 				root_id, generation, SCODE_100,
 				CSR_REGISTER_BASE + CSR_STATE_SET,
-				card->bm_transaction_data, 4);
+				transaction_data, 4);
 		if (rcode == RCODE_GENERATION)
 			goto out;
 	}
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 62ac111af243..2a3f1c4d6906 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -141,7 +141,6 @@ struct iso_resource {
 	int generation;
 	u64 channels;
 	s32 bandwidth;
-	__be32 transaction_data[2];
 	struct iso_resource_event *e_alloc, *e_dealloc;
 };
 
@@ -1229,8 +1228,7 @@ static void iso_resource_work(struct work_struct *work)
 			r->channels, &channel, &bandwidth,
 			todo == ISO_RES_ALLOC ||
 			todo == ISO_RES_REALLOC ||
-			todo == ISO_RES_ALLOC_ONCE,
-			r->transaction_data);
+			todo == ISO_RES_ALLOC_ONCE);
 	/*
 	 * Is this generation outdated already?  As long as this resource sticks
 	 * in the idr, it will be scheduled again for a newer generation or at
diff --git a/drivers/firewire/core-iso.c b/drivers/firewire/core-iso.c
index 481056df9268..f872ede5af37 100644
--- a/drivers/firewire/core-iso.c
+++ b/drivers/firewire/core-iso.c
@@ -196,9 +196,10 @@ EXPORT_SYMBOL(fw_iso_context_stop);
  */
 
 static int manage_bandwidth(struct fw_card *card, int irm_id, int generation,
-			    int bandwidth, bool allocate, __be32 data[2])
+			    int bandwidth, bool allocate)
 {
 	int try, new, old = allocate ? BANDWIDTH_AVAILABLE_INITIAL : 0;
+	__be32 data[2];
 
 	/*
 	 * On a 1394a IRM with low contention, try < 1 is enough.
@@ -233,9 +234,10 @@ static int manage_bandwidth(struct fw_card *card, int irm_id, int generation,
 }
 
 static int manage_channel(struct fw_card *card, int irm_id, int generation,
-		u32 channels_mask, u64 offset, bool allocate, __be32 data[2])
+		u32 channels_mask, u64 offset, bool allocate)
 {
 	__be32 bit, all, old;
+	__be32 data[2];
 	int channel, ret = -EIO, retry = 5;
 
 	old = all = allocate ? cpu_to_be32(~0) : 0;
@@ -284,7 +286,7 @@ static int manage_channel(struct fw_card *card, int irm_id, int generation,
 }
 
 static void deallocate_channel(struct fw_card *card, int irm_id,
-			       int generation, int channel, __be32 buffer[2])
+			       int generation, int channel)
 {
 	u32 mask;
 	u64 offset;
@@ -293,7 +295,7 @@ static void deallocate_channel(struct fw_card *card, int irm_id,
 	offset = channel < 32 ? CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_HI :
 				CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_LO;
 
-	manage_channel(card, irm_id, generation, mask, offset, false, buffer);
+	manage_channel(card, irm_id, generation, mask, offset, false);
 }
 
 /**
@@ -322,7 +324,7 @@ static void deallocate_channel(struct fw_card *card, int irm_id,
  */
 void fw_iso_resource_manage(struct fw_card *card, int generation,
 			    u64 channels_mask, int *channel, int *bandwidth,
-			    bool allocate, __be32 buffer[2])
+			    bool allocate)
 {
 	u32 channels_hi = channels_mask;	/* channels 31...0 */
 	u32 channels_lo = channels_mask >> 32;	/* channels 63...32 */
@@ -335,11 +337,11 @@ void fw_iso_resource_manage(struct fw_card *card, int generation,
 	if (channels_hi)
 		c = manage_channel(card, irm_id, generation, channels_hi,
 				CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_HI,
-				allocate, buffer);
+				allocate);
 	if (channels_lo && c < 0) {
 		c = manage_channel(card, irm_id, generation, channels_lo,
 				CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_LO,
-				allocate, buffer);
+				allocate);
 		if (c >= 0)
 			c += 32;
 	}
@@ -351,14 +353,13 @@ void fw_iso_resource_manage(struct fw_card *card, int generation,
 	if (*bandwidth == 0)
 		return;
 
-	ret = manage_bandwidth(card, irm_id, generation, *bandwidth,
-			       allocate, buffer);
+	ret = manage_bandwidth(card, irm_id, generation, *bandwidth, allocate);
 	if (ret < 0)
 		*bandwidth = 0;
 
 	if (allocate && ret < 0) {
 		if (c >= 0)
-			deallocate_channel(card, irm_id, generation, c, buffer);
+			deallocate_channel(card, irm_id, generation, c);
 		*channel = ret;
 	}
 }
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index d00f8ce902cc..77275fdf6c1f 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -326,8 +326,8 @@ static int allocate_tlabel(struct fw_card *card)
  * It will contain tag, channel, and sy data instead of a node ID then.
  *
  * The payload buffer at @data is going to be DMA-mapped except in case of
- * quadlet-sized payload or of local (loopback) requests.  Hence make sure that
- * the buffer complies with the restrictions for DMA-mapped memory.  The
+ * @length <= 8 or of local (loopback) requests.  Hence make sure that the
+ * buffer complies with the restrictions of the streaming DMA mapping API.
  * @payload must not be freed before the @callback is called.
  *
  * In case of request types without payload, @data is NULL and @length is 0.
@@ -411,7 +411,8 @@ static void transaction_callback(struct fw_card *card, int rcode,
  *
  * Returns the RCODE.  See fw_send_request() for parameter documentation.
  * Unlike fw_send_request(), @data points to the payload of the request or/and
- * to the payload of the response.
+ * to the payload of the response.  DMA mapping restrictions apply to outbound
+ * request payloads of >= 8 bytes but not to inbound response payloads.
  */
 int fw_run_transaction(struct fw_card *card, int tcode, int destination_id,
 		       int generation, int speed, unsigned long long offset,
diff --git a/drivers/media/dvb/firewire/firedtv-avc.c b/drivers/media/dvb/firewire/firedtv-avc.c
index fc5ccd8c923a..21c52e3b522e 100644
--- a/drivers/media/dvb/firewire/firedtv-avc.c
+++ b/drivers/media/dvb/firewire/firedtv-avc.c
@@ -1320,14 +1320,10 @@ static int cmp_read(struct firedtv *fdtv, u64 addr, __be32 *data)
 {
 	int ret;
 
-	mutex_lock(&fdtv->avc_mutex);
-
 	ret = fdtv_read(fdtv, addr, data);
 	if (ret < 0)
 		dev_err(fdtv->device, "CMP: read I/O error\n");
 
-	mutex_unlock(&fdtv->avc_mutex);
-
 	return ret;
 }
 
@@ -1335,18 +1331,9 @@ static int cmp_lock(struct firedtv *fdtv, u64 addr, __be32 data[])
 {
 	int ret;
 
-	mutex_lock(&fdtv->avc_mutex);
-
-	/* data[] is stack-allocated and should not be DMA-mapped. */
-	memcpy(fdtv->avc_data, data, 8);
-
-	ret = fdtv_lock(fdtv, addr, fdtv->avc_data);
+	ret = fdtv_lock(fdtv, addr, data);
 	if (ret < 0)
 		dev_err(fdtv->device, "CMP: lock I/O error\n");
-	else
-		memcpy(data, fdtv->avc_data, 8);
-
-	mutex_unlock(&fdtv->avc_mutex);
 
 	return ret;
 }
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index c64f3680d4f1..de90e1ff8488 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -125,7 +125,6 @@ struct fw_card {
 	struct delayed_work bm_work; /* bus manager job */
 	int bm_retries;
 	int bm_generation;
-	__be32 bm_transaction_data[2];
 	int bm_node_id;
 	bool bm_abdicate;
 
@@ -447,6 +446,6 @@ int fw_iso_context_stop(struct fw_iso_context *ctx);
 void fw_iso_context_destroy(struct fw_iso_context *ctx);
 void fw_iso_resource_manage(struct fw_card *card, int generation,
 			    u64 channels_mask, int *channel, int *bandwidth,
-			    bool allocate, __be32 buffer[2]);
+			    bool allocate);
 
 #endif /* _LINUX_FIREWIRE_H */
diff --git a/sound/firewire/cmp.c b/sound/firewire/cmp.c
index 4a37f3a6fab9..14cacbc655dd 100644
--- a/sound/firewire/cmp.c
+++ b/sound/firewire/cmp.c
@@ -49,10 +49,9 @@ static int pcr_modify(struct cmp_connection *c,
 		      enum bus_reset_handling bus_reset_handling)
 {
 	struct fw_device *device = fw_parent_device(c->resources.unit);
-	__be32 *buffer = c->resources.buffer;
 	int generation = c->resources.generation;
 	int rcode, errors = 0;
-	__be32 old_arg;
+	__be32 old_arg, buffer[2];
 	int err;
 
 	buffer[0] = c->last_pcr_value;
diff --git a/sound/firewire/iso-resources.c b/sound/firewire/iso-resources.c
index 775dbd5f3445..bb9c0c1fb529 100644
--- a/sound/firewire/iso-resources.c
+++ b/sound/firewire/iso-resources.c
@@ -11,7 +11,6 @@
 #include <linux/jiffies.h>
 #include <linux/mutex.h>
 #include <linux/sched.h>
-#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include "iso-resources.h"
 
@@ -25,10 +24,6 @@
  */
 int fw_iso_resources_init(struct fw_iso_resources *r, struct fw_unit *unit)
 {
-	r->buffer = kmalloc(2 * 4, GFP_KERNEL);
-	if (!r->buffer)
-		return -ENOMEM;
-
 	r->channels_mask = ~0uLL;
 	r->unit = fw_unit_get(unit);
 	mutex_init(&r->mutex);
@@ -44,7 +39,6 @@ int fw_iso_resources_init(struct fw_iso_resources *r, struct fw_unit *unit)
 void fw_iso_resources_destroy(struct fw_iso_resources *r)
 {
 	WARN_ON(r->allocated);
-	kfree(r->buffer);
 	mutex_destroy(&r->mutex);
 	fw_unit_put(r->unit);
 }
@@ -131,7 +125,7 @@ retry_after_bus_reset:
 
 	bandwidth = r->bandwidth + r->bandwidth_overhead;
 	fw_iso_resource_manage(card, r->generation, r->channels_mask,
-			       &channel, &bandwidth, true, r->buffer);
+			       &channel, &bandwidth, true);
 	if (channel == -EAGAIN) {
 		mutex_unlock(&r->mutex);
 		goto retry_after_bus_reset;
@@ -184,7 +178,7 @@ int fw_iso_resources_update(struct fw_iso_resources *r)
 	bandwidth = r->bandwidth + r->bandwidth_overhead;
 
 	fw_iso_resource_manage(card, r->generation, 1uLL << r->channel,
-			       &channel, &bandwidth, true, r->buffer);
+			       &channel, &bandwidth, true);
 	/*
 	 * When another bus reset happens, pretend that the allocation
 	 * succeeded; we will try again for the new generation later.
@@ -220,7 +214,7 @@ void fw_iso_resources_free(struct fw_iso_resources *r)
 	if (r->allocated) {
 		bandwidth = r->bandwidth + r->bandwidth_overhead;
 		fw_iso_resource_manage(card, r->generation, 1uLL << r->channel,
-				       &channel, &bandwidth, false, r->buffer);
+				       &channel, &bandwidth, false);
 		if (channel < 0)
 			dev_err(&r->unit->device,
 				"isochronous resource deallocation failed\n");
diff --git a/sound/firewire/iso-resources.h b/sound/firewire/iso-resources.h
index 3f0730e4d841..5a9af7c61657 100644
--- a/sound/firewire/iso-resources.h
+++ b/sound/firewire/iso-resources.h
@@ -24,7 +24,6 @@ struct fw_iso_resources {
 	unsigned int bandwidth_overhead;
 	int generation; /* in which allocation is valid */
 	bool allocated;
-	__be32 *buffer;
 };
 
 int fw_iso_resources_init(struct fw_iso_resources *r,
-- 
cgit v1.2.3-71-gd317


From 13882a82ee1646336c3996c93b4a560a55d2a419 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Mon, 2 May 2011 09:33:56 +0200
Subject: firewire: optimize iso queueing by setting wake only after the last
 packet

When queueing iso packets, the run time is dominated by the two
MMIO accesses that set the DMA context's wake bit.  Because most
drivers submit packets in batches, we can save much time by
removing all but the last wakeup.

The internal kernel API is changed to require a call to
fw_iso_context_queue_flush() after a batch of queued packets.
The user space API does not change, so one call to
FW_CDEV_IOC_QUEUE_ISO must specify multiple packets to take
advantage of this optimization.

In my measurements, this patch reduces the time needed to queue
fifty skip packets from userspace to one sixth on a 2.5 GHz CPU,
or to one third at 800 MHz.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-card.c            |  5 +++++
 drivers/firewire/core-cdev.c            |  1 +
 drivers/firewire/core-iso.c             |  6 ++++++
 drivers/firewire/core.h                 |  2 ++
 drivers/firewire/net.c                  |  4 +++-
 drivers/firewire/ohci.c                 | 19 +++++++++++++++----
 drivers/media/dvb/firewire/firedtv-fw.c |  1 +
 include/linux/firewire.h                |  1 +
 sound/firewire/amdtp.c                  |  1 +
 9 files changed, 35 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index e119f1e6ba47..f05fc7bfceeb 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -630,6 +630,10 @@ static int dummy_queue_iso(struct fw_iso_context *ctx, struct fw_iso_packet *p,
 	return -ENODEV;
 }
 
+static void dummy_flush_queue_iso(struct fw_iso_context *ctx)
+{
+}
+
 static const struct fw_card_driver dummy_driver_template = {
 	.read_phy_reg		= dummy_read_phy_reg,
 	.update_phy_reg		= dummy_update_phy_reg,
@@ -641,6 +645,7 @@ static const struct fw_card_driver dummy_driver_template = {
 	.start_iso		= dummy_start_iso,
 	.set_iso_channels	= dummy_set_iso_channels,
 	.queue_iso		= dummy_queue_iso,
+	.flush_queue_iso	= dummy_flush_queue_iso,
 };
 
 void fw_card_release(struct kref *kref)
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 2a3f1c4d6906..64768c2194f1 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -1107,6 +1107,7 @@ static int ioctl_queue_iso(struct client *client, union ioctl_arg *arg)
 		payload += u.packet.payload_length;
 		count++;
 	}
+	fw_iso_context_queue_flush(ctx);
 
 	a->size    -= uptr_to_u64(p) - a->packets;
 	a->packets  = uptr_to_u64(p);
diff --git a/drivers/firewire/core-iso.c b/drivers/firewire/core-iso.c
index f872ede5af37..57c3973093ad 100644
--- a/drivers/firewire/core-iso.c
+++ b/drivers/firewire/core-iso.c
@@ -185,6 +185,12 @@ int fw_iso_context_queue(struct fw_iso_context *ctx,
 }
 EXPORT_SYMBOL(fw_iso_context_queue);
 
+void fw_iso_context_queue_flush(struct fw_iso_context *ctx)
+{
+	ctx->card->driver->flush_queue_iso(ctx);
+}
+EXPORT_SYMBOL(fw_iso_context_queue_flush);
+
 int fw_iso_context_stop(struct fw_iso_context *ctx)
 {
 	return ctx->card->driver->stop_iso(ctx);
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 25e729cde2f7..0fe4e4e6eda7 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -97,6 +97,8 @@ struct fw_card_driver {
 			 struct fw_iso_buffer *buffer,
 			 unsigned long payload);
 
+	void (*flush_queue_iso)(struct fw_iso_context *ctx);
+
 	int (*stop_iso)(struct fw_iso_context *ctx);
 };
 
diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index 3f04dd3681cf..b9762d07198d 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -881,7 +881,9 @@ static void fwnet_receive_broadcast(struct fw_iso_context *context,
 
 	spin_unlock_irqrestore(&dev->lock, flags);
 
-	if (retval < 0)
+	if (retval >= 0)
+		fw_iso_context_queue_flush(dev->broadcast_rcv_context);
+	else
 		fw_error("requeue failed\n");
 }
 
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index f9f55703375e..438e6c831170 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -1192,9 +1192,6 @@ static void context_append(struct context *ctx,
 	wmb(); /* finish init of new descriptors before branch_address update */
 	ctx->prev->branch_address = cpu_to_le32(d_bus | z);
 	ctx->prev = find_branch_descriptor(d, z);
-
-	reg_write(ctx->ohci, CONTROL_SET(ctx->regs), CONTEXT_WAKE);
-	flush_writes(ctx->ohci);
 }
 
 static void context_stop(struct context *ctx)
@@ -1348,8 +1345,12 @@ static int at_context_queue_packet(struct context *ctx,
 
 	context_append(ctx, d, z, 4 - z);
 
-	if (!ctx->running)
+	if (ctx->running) {
+		reg_write(ohci, CONTROL_SET(ctx->regs), CONTEXT_WAKE);
+		flush_writes(ohci);
+	} else {
 		context_run(ctx, 0);
+	}
 
 	return 0;
 }
@@ -3121,6 +3122,15 @@ static int ohci_queue_iso(struct fw_iso_context *base,
 	return ret;
 }
 
+static void ohci_flush_queue_iso(struct fw_iso_context *base)
+{
+	struct context *ctx =
+			&container_of(base, struct iso_context, base)->context;
+
+	reg_write(ctx->ohci, CONTROL_SET(ctx->regs), CONTEXT_WAKE);
+	flush_writes(ctx->ohci);
+}
+
 static const struct fw_card_driver ohci_driver = {
 	.enable			= ohci_enable,
 	.read_phy_reg		= ohci_read_phy_reg,
@@ -3137,6 +3147,7 @@ static const struct fw_card_driver ohci_driver = {
 	.free_iso_context	= ohci_free_iso_context,
 	.set_iso_channels	= ohci_set_iso_channels,
 	.queue_iso		= ohci_queue_iso,
+	.flush_queue_iso	= ohci_flush_queue_iso,
 	.start_iso		= ohci_start_iso,
 	.stop_iso		= ohci_stop_iso,
 };
diff --git a/drivers/media/dvb/firewire/firedtv-fw.c b/drivers/media/dvb/firewire/firedtv-fw.c
index 8022b743af91..864b6274c729 100644
--- a/drivers/media/dvb/firewire/firedtv-fw.c
+++ b/drivers/media/dvb/firewire/firedtv-fw.c
@@ -125,6 +125,7 @@ static void handle_iso(struct fw_iso_context *context, u32 cycle,
 
 		i = (i + 1) & (N_PACKETS - 1);
 	}
+	fw_iso_context_queue_flush(ctx->context);
 	ctx->current_packet = i;
 }
 
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index de90e1ff8488..c0fb405bb435 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -440,6 +440,7 @@ int fw_iso_context_queue(struct fw_iso_context *ctx,
 			 struct fw_iso_packet *packet,
 			 struct fw_iso_buffer *buffer,
 			 unsigned long payload);
+void fw_iso_context_queue_flush(struct fw_iso_context *ctx);
 int fw_iso_context_start(struct fw_iso_context *ctx,
 			 int cycle, int sync, int tags);
 int fw_iso_context_stop(struct fw_iso_context *ctx);
diff --git a/sound/firewire/amdtp.c b/sound/firewire/amdtp.c
index b18140ff2b93..87657dd7714c 100644
--- a/sound/firewire/amdtp.c
+++ b/sound/firewire/amdtp.c
@@ -396,6 +396,7 @@ static void out_packet_callback(struct fw_iso_context *context, u32 cycle,
 
 	for (i = 0; i < packets; ++i)
 		queue_out_packet(s, ++cycle);
+	fw_iso_context_queue_flush(s->context);
 }
 
 static int queue_initial_skip_packets(struct amdtp_out_stream *s)
-- 
cgit v1.2.3-71-gd317


From 105e53f863c04e1d9e5bb34bf753c9fdbce6a60c Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 1 May 2011 20:50:31 +0200
Subject: firewire: sbp2: parallelize login, reconnect, logout

The struct sbp2_logical_unit.work items can all be executed in parallel
but are not reentrant.  Furthermore, reconnect or re-login work must be
executed in a WQ_MEM_RECLAIM workqueue.

Hence replace the old single-threaded firewire-sbp2 workqueue by a
concurrency-managed but non-reentrant workqueue with rescuer.
firewire-core already maintains one, hence use this one.

In earlier versions of this change, I observed occasional failures of
parallel INQUIRY to an Initio INIC-2430 FireWire 800 to dual IDE bridge.
More testing indicates that parallel INQUIRY is not actually a problem,
but too quick successions of logout and login + INQUIRY, e.g. a quick
sequence of cable plugout and plugin, can result in failed INQUIRY.
This does not seem to be something that should or could be addressed by
serialization.

Another dual-LU device to which I currently have access to, an
OXUF924DSB FireWire 800 to dual SATA bridge with firmware from MacPower,
has been successfully tested with this too.

This change is beneficial to environments with two or more FireWire
storage devices, especially if they are located on the same bus.
Management tasks that should be performed as soon and as quickly as
possible, especially reconnect, are no longer held up by tasks on other
devices that may take a long time, especially login with INQUIRY and sd
or sr driver probe.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-card.c        |  4 ++--
 drivers/firewire/core-cdev.c        |  2 +-
 drivers/firewire/core-device.c      |  5 +++--
 drivers/firewire/core-transaction.c | 12 ++++++------
 drivers/firewire/core.h             |  2 --
 drivers/firewire/sbp2.c             |  9 +--------
 include/linux/firewire.h            |  2 ++
 7 files changed, 15 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index bb8c4d22b03e..29d2423fae6d 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -228,7 +228,7 @@ void fw_schedule_bus_reset(struct fw_card *card, bool delayed, bool short_reset)
 
 	/* Use an arbitrary short delay to combine multiple reset requests. */
 	fw_card_get(card);
-	if (!queue_delayed_work(fw_wq, &card->br_work,
+	if (!queue_delayed_work(fw_workqueue, &card->br_work,
 				delayed ? DIV_ROUND_UP(HZ, 100) : 0))
 		fw_card_put(card);
 }
@@ -241,7 +241,7 @@ static void br_work(struct work_struct *work)
 	/* Delay for 2s after last reset per IEEE 1394 clause 8.2.1. */
 	if (card->reset_jiffies != 0 &&
 	    time_before64(get_jiffies_64(), card->reset_jiffies + 2 * HZ)) {
-		if (!queue_delayed_work(fw_wq, &card->br_work, 2 * HZ))
+		if (!queue_delayed_work(fw_workqueue, &card->br_work, 2 * HZ))
 			fw_card_put(card);
 		return;
 	}
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index aa1131d26e30..b1c11775839c 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -149,7 +149,7 @@ static void release_iso_resource(struct client *, struct client_resource *);
 static void schedule_iso_resource(struct iso_resource *r, unsigned long delay)
 {
 	client_get(r->client);
-	if (!queue_delayed_work(fw_wq, &r->work, delay))
+	if (!queue_delayed_work(fw_workqueue, &r->work, delay))
 		client_put(r->client);
 }
 
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index ef900d923f15..95a471401892 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -725,12 +725,13 @@ struct fw_device *fw_device_get_by_devt(dev_t devt)
 	return device;
 }
 
-struct workqueue_struct *fw_wq;
+struct workqueue_struct *fw_workqueue;
+EXPORT_SYMBOL(fw_workqueue);
 
 static void fw_schedule_device_work(struct fw_device *device,
 				    unsigned long delay)
 {
-	queue_delayed_work(fw_wq, &device->work, delay);
+	queue_delayed_work(fw_workqueue, &device->work, delay);
 }
 
 /*
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index d4c28a217b2c..334b82a3542c 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -1214,21 +1214,21 @@ static int __init fw_core_init(void)
 {
 	int ret;
 
-	fw_wq = alloc_workqueue(KBUILD_MODNAME,
-				WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
-	if (!fw_wq)
+	fw_workqueue = alloc_workqueue("firewire",
+				       WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
+	if (!fw_workqueue)
 		return -ENOMEM;
 
 	ret = bus_register(&fw_bus_type);
 	if (ret < 0) {
-		destroy_workqueue(fw_wq);
+		destroy_workqueue(fw_workqueue);
 		return ret;
 	}
 
 	fw_cdev_major = register_chrdev(0, "firewire", &fw_device_ops);
 	if (fw_cdev_major < 0) {
 		bus_unregister(&fw_bus_type);
-		destroy_workqueue(fw_wq);
+		destroy_workqueue(fw_workqueue);
 		return fw_cdev_major;
 	}
 
@@ -1244,7 +1244,7 @@ static void __exit fw_core_cleanup(void)
 {
 	unregister_chrdev(fw_cdev_major, "firewire");
 	bus_unregister(&fw_bus_type);
-	destroy_workqueue(fw_wq);
+	destroy_workqueue(fw_workqueue);
 	idr_destroy(&fw_device_idr);
 }
 
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 00ea7730c6a7..0fe4e4e6eda7 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -140,8 +140,6 @@ void fw_cdev_handle_phy_packet(struct fw_card *card, struct fw_packet *p);
 extern struct rw_semaphore fw_device_rwsem;
 extern struct idr fw_device_idr;
 extern int fw_cdev_major;
-struct workqueue_struct;
-extern struct workqueue_struct *fw_wq;
 
 struct fw_device *fw_device_get_by_devt(dev_t devt);
 int fw_device_set_broadcast_channel(struct device *dev, void *gen);
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index 2aafc614ae14..41841a3e3f99 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -826,8 +826,6 @@ static void sbp2_target_put(struct sbp2_target *tgt)
 	kref_put(&tgt->kref, sbp2_release_target);
 }
 
-static struct workqueue_struct *sbp2_wq;
-
 /*
  * Always get the target's kref when scheduling work on one its units.
  * Each workqueue job is responsible to call sbp2_target_put() upon return.
@@ -835,7 +833,7 @@ static struct workqueue_struct *sbp2_wq;
 static void sbp2_queue_work(struct sbp2_logical_unit *lu, unsigned long delay)
 {
 	sbp2_target_get(lu->tgt);
-	if (!queue_delayed_work(sbp2_wq, &lu->work, delay))
+	if (!queue_delayed_work(fw_workqueue, &lu->work, delay))
 		sbp2_target_put(lu->tgt);
 }
 
@@ -1645,17 +1643,12 @@ MODULE_ALIAS("sbp2");
 
 static int __init sbp2_init(void)
 {
-	sbp2_wq = create_singlethread_workqueue(KBUILD_MODNAME);
-	if (!sbp2_wq)
-		return -ENOMEM;
-
 	return driver_register(&sbp2_driver.driver);
 }
 
 static void __exit sbp2_cleanup(void)
 {
 	driver_unregister(&sbp2_driver.driver);
-	destroy_workqueue(sbp2_wq);
 }
 
 module_init(sbp2_init);
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index c0fb405bb435..5e6f42789afe 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -449,4 +449,6 @@ void fw_iso_resource_manage(struct fw_card *card, int generation,
 			    u64 channels_mask, int *channel, int *bandwidth,
 			    bool allocate);
 
+extern struct workqueue_struct *fw_workqueue;
+
 #endif /* _LINUX_FIREWIRE_H */
-- 
cgit v1.2.3-71-gd317


From 1b9ba000177ee47bcc5b44c7c34e48e735f5f9b1 Mon Sep 17 00:00:00 2001
From: Roger Quadros <roger.quadros@nokia.com>
Date: Mon, 9 May 2011 13:08:06 +0300
Subject: usb: gadget: composite: Allow function drivers to pause control
 transfers

Some USB function drivers (e.g. f_mass_storage.c) need to delay or defer the
data/status stages of standard control requests like SET_CONFIGURATION or
SET_INTERFACE till they are done with their bookkeeping and are actually ready
for accepting new commands to their interface.

They can now achieve this functionality by returning USB_GADGET_DELAYED_STATUS
in their setup handlers (e.g. set_alt()). The composite framework will then
defer completion of the control transfer by not completing the data/status stages.

This ensures that the host does not send new packets to the interface till the
function driver is ready to take them.

When the function driver that requested for USB_GADGET_DELAYED_STATUS is done
with its bookkeeping, it should signal the composite framework to continue with
the data/status stages of the control transfer. It can do so by invoking
the new API usb_composite_setup_continue(). This is where the control transfer's
data/status stages are completed and host can initiate new transfers.

The DELAYED_STATUS mechanism is currently only supported if the expected data phase
is 0 bytes (i.e. w_length == 0). Since SET_CONFIGURATION and SET_INTERFACE are the
only cases that will use this mechanism, this is not a limitation.

Signed-off-by: Roger Quadros <roger.quadros@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/composite.c | 62 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/usb/composite.h  | 16 ++++++++++-
 2 files changed, 76 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 82314ed22506..5cbb1a41c223 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -461,12 +461,23 @@ static int set_config(struct usb_composite_dev *cdev,
 			reset_config(cdev);
 			goto done;
 		}
+
+		if (result == USB_GADGET_DELAYED_STATUS) {
+			DBG(cdev,
+			 "%s: interface %d (%s) requested delayed status\n",
+					__func__, tmp, f->name);
+			cdev->delayed_status++;
+			DBG(cdev, "delayed_status count %d\n",
+					cdev->delayed_status);
+		}
 	}
 
 	/* when we return, be sure our power usage is valid */
 	power = c->bMaxPower ? (2 * c->bMaxPower) : CONFIG_USB_GADGET_VBUS_DRAW;
 done:
 	usb_gadget_vbus_draw(gadget, power);
+	if (result >= 0 && cdev->delayed_status)
+		result = USB_GADGET_DELAYED_STATUS;
 	return result;
 }
 
@@ -895,6 +906,14 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 		if (w_value && !f->set_alt)
 			break;
 		value = f->set_alt(f, w_index, w_value);
+		if (value == USB_GADGET_DELAYED_STATUS) {
+			DBG(cdev,
+			 "%s: interface %d (%s) requested delayed status\n",
+					__func__, intf, f->name);
+			cdev->delayed_status++;
+			DBG(cdev, "delayed_status count %d\n",
+					cdev->delayed_status);
+		}
 		break;
 	case USB_REQ_GET_INTERFACE:
 		if (ctrl->bRequestType != (USB_DIR_IN|USB_RECIP_INTERFACE))
@@ -958,7 +977,7 @@ unknown:
 	}
 
 	/* respond with data transfer before status phase? */
-	if (value >= 0) {
+	if (value >= 0 && value != USB_GADGET_DELAYED_STATUS) {
 		req->length = value;
 		req->zero = value < w_length;
 		value = usb_ep_queue(gadget->ep0, req, GFP_ATOMIC);
@@ -967,6 +986,10 @@ unknown:
 			req->status = 0;
 			composite_setup_complete(gadget->ep0, req);
 		}
+	} else if (value == USB_GADGET_DELAYED_STATUS && w_length != 0) {
+		WARN(cdev,
+			"%s: Delayed status not supported for w_length != 0",
+			__func__);
 	}
 
 done:
@@ -1289,3 +1312,40 @@ void usb_composite_unregister(struct usb_composite_driver *driver)
 		return;
 	usb_gadget_unregister_driver(&composite_driver);
 }
+
+/**
+ * usb_composite_setup_continue() - Continue with the control transfer
+ * @cdev: the composite device who's control transfer was kept waiting
+ *
+ * This function must be called by the USB function driver to continue
+ * with the control transfer's data/status stage in case it had requested to
+ * delay the data/status stages. A USB function's setup handler (e.g. set_alt())
+ * can request the composite framework to delay the setup request's data/status
+ * stages by returning USB_GADGET_DELAYED_STATUS.
+ */
+void usb_composite_setup_continue(struct usb_composite_dev *cdev)
+{
+	int			value;
+	struct usb_request	*req = cdev->req;
+	unsigned long		flags;
+
+	DBG(cdev, "%s\n", __func__);
+	spin_lock_irqsave(&cdev->lock, flags);
+
+	if (cdev->delayed_status == 0) {
+		WARN(cdev, "%s: Unexpected call\n", __func__);
+
+	} else if (--cdev->delayed_status == 0) {
+		DBG(cdev, "%s: Completing delayed status\n", __func__);
+		req->length = 0;
+		value = usb_ep_queue(cdev->gadget->ep0, req, GFP_ATOMIC);
+		if (value < 0) {
+			DBG(cdev, "ep_queue --> %d\n", value);
+			req->status = 0;
+			composite_setup_complete(cdev->gadget->ep0, req);
+		}
+	}
+
+	spin_unlock_irqrestore(&cdev->lock, flags);
+}
+
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 882a084a8411..b78cba466d3d 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -37,6 +37,14 @@
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
 
+/*
+ * USB function drivers should return USB_GADGET_DELAYED_STATUS if they
+ * wish to delay the data/status stages of the control transfer till they
+ * are ready. The control transfer will then be kept from completing till
+ * all the function drivers that requested for USB_GADGET_DELAYED_STAUS
+ * invoke usb_composite_setup_continue().
+ */
+#define USB_GADGET_DELAYED_STATUS       0x7fff	/* Impossibly large value */
 
 struct usb_configuration;
 
@@ -285,6 +293,7 @@ struct usb_composite_driver {
 extern int usb_composite_probe(struct usb_composite_driver *driver,
 			       int (*bind)(struct usb_composite_dev *cdev));
 extern void usb_composite_unregister(struct usb_composite_driver *driver);
+extern void usb_composite_setup_continue(struct usb_composite_dev *cdev);
 
 
 /**
@@ -342,7 +351,12 @@ struct usb_composite_dev {
 	 */
 	unsigned			deactivations;
 
-	/* protects at least deactivation count */
+	/* the composite driver won't complete the control transfer's
+	 * data/status stages till delayed_status is zero.
+	 */
+	int				delayed_status;
+
+	/* protects deactivations and delayed_status counts*/
 	spinlock_t			lock;
 };
 
-- 
cgit v1.2.3-71-gd317


From c0a86a9bea55d505574120f3e9775e3844276505 Mon Sep 17 00:00:00 2001
From: Seth Heasley <seth.heasley@intel.com>
Date: Tue, 19 Apr 2011 16:35:15 -0700
Subject: x86/PCI: irq and pci_ids patch for Intel Panther Point DeviceIDs

This patch adds the LPC Controller DeviceIDs for the Intel Panther Point PCH.

Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Seth Heasley <seth.heasley@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/irq.c      | 4 +++-
 include/linux/pci_ids.h | 3 +++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 8201165bae28..372e9b8989b3 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -602,7 +602,9 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
 	||  (device >= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN && 
 	     device <= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX)
 	||  (device >= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN &&
-	     device <= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX)) {
+	     device <= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX)
+	||  (device >= PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN &&
+	     device <= PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX)) {
 		r->name = "PIIX/ICH";
 		r->get = pirq_piix_get;
 		r->set = pirq_piix_set;
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 4e2c9150a785..52f4ed4de490 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2480,6 +2480,9 @@
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS	0x1c22
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN	0x1c41
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX	0x1c5f
+#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_SMBUS	0x1e22
+#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN	0x1e40
+#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX	0x1e5f
 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS	0x1d22
 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0	0x1d40
 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1	0x1d41
-- 
cgit v1.2.3-71-gd317


From 0ee8dcb87e403397e575674d0e79272b06dea12e Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Wed, 9 Mar 2011 15:41:59 +0800
Subject: KVM: cleanup memslot_id function

We can get memslot id from memslot->id directly

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h |  6 +++++-
 virt/kvm/kvm_main.c      | 17 -----------------
 2 files changed, 5 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ab428552af8e..57d7092d7717 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -365,7 +365,6 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 		      bool *writable);
 pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
 			 struct kvm_memory_slot *slot, gfn_t gfn);
-int memslot_id(struct kvm *kvm, gfn_t gfn);
 void kvm_release_pfn_dirty(pfn_t);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
@@ -597,6 +596,11 @@ static inline void kvm_guest_exit(void)
 	current->flags &= ~PF_VCPU;
 }
 
+static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
+{
+	return gfn_to_memslot(kvm, gfn)->id;
+}
+
 static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
 					       gfn_t gfn)
 {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6330653480e4..58146457bf97 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -996,23 +996,6 @@ out:
 	return size;
 }
 
-int memslot_id(struct kvm *kvm, gfn_t gfn)
-{
-	int i;
-	struct kvm_memslots *slots = kvm_memslots(kvm);
-	struct kvm_memory_slot *memslot = NULL;
-
-	for (i = 0; i < slots->nmemslots; ++i) {
-		memslot = &slots->memslots[i];
-
-		if (gfn >= memslot->base_gfn
-		    && gfn < memslot->base_gfn + memslot->npages)
-			break;
-	}
-
-	return memslot - slots->memslots;
-}
-
 static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
 				     gfn_t *nr_pages)
 {
-- 
cgit v1.2.3-71-gd317


From c761e5868e6737abe0464636ebd7fcbb6814c626 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Fri, 1 Apr 2011 11:25:03 -0300
Subject: Revert "KVM: Fix race between nmi injection and enabling nmi window"

This reverts commit f86368493ec038218e8663cc1b6e5393cd8e008a.

Simpler fix to follow.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c       | 4 +---
 include/linux/kvm_host.h | 1 -
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a38fb9bb342b..b9402d5fa0e9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -361,8 +361,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 
 void kvm_inject_nmi(struct kvm_vcpu *vcpu)
 {
-	kvm_make_request(KVM_REQ_NMI, vcpu);
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
+	vcpu->arch.nmi_pending = 1;
 }
 EXPORT_SYMBOL_GPL(kvm_inject_nmi);
 
@@ -5208,8 +5208,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			r = 1;
 			goto out;
 		}
-		if (kvm_check_request(KVM_REQ_NMI, vcpu))
-			vcpu->arch.nmi_pending = true;
 	}
 
 	r = kvm_mmu_reload(vcpu);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 57d7092d7717..7ca831e55186 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -43,7 +43,6 @@
 #define KVM_REQ_DEACTIVATE_FPU    10
 #define KVM_REQ_EVENT             11
 #define KVM_REQ_APF_HALT          12
-#define KVM_REQ_NMI               13
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
-- 
cgit v1.2.3-71-gd317


From cef4dea07f6720b36cc93e18a2e68be4bdb71a92 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 20 Jan 2010 12:01:20 +0200
Subject: KVM: 16-byte mmio support

Since sse instructions can issue 16-byte mmios, we need to support them.  We
can't increase the kvm_run mmio buffer size to 16 bytes without breaking
compatibility, so instead we break the large mmios into two smaller 8-byte
ones.  Since the bus is 64-bit we aren't breaking any atomicity guarantees.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c              | 34 +++++++++++++++++++++++++---------
 include/linux/kvm_host.h        |  7 ++++++-
 3 files changed, 32 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 35f81b110260..e820c6339b8b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -30,6 +30,7 @@
 #define KVM_MEMORY_SLOTS 32
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_MMIO_SIZE 16
 
 #define KVM_PIO_PAGE_OFFSET 1
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bb6b9d3f5e93..11d692c7018d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3833,8 +3833,10 @@ mmio:
 	vcpu->mmio_needed = 1;
 	vcpu->run->exit_reason = KVM_EXIT_MMIO;
 	vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
-	vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+	vcpu->mmio_size = bytes;
+	vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
 	vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
+	vcpu->mmio_index = 0;
 
 	return X86EMUL_IO_NEEDED;
 }
@@ -3886,11 +3888,14 @@ mmio:
 	val += handled;
 
 	vcpu->mmio_needed = 1;
+	memcpy(vcpu->mmio_data, val, bytes);
 	vcpu->run->exit_reason = KVM_EXIT_MMIO;
 	vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
-	vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+	vcpu->mmio_size = bytes;
+	vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
 	vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
-	memcpy(vcpu->run->mmio.data, val, bytes);
+	memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
+	vcpu->mmio_index = 0;
 
 	return X86EMUL_CONTINUE;
 }
@@ -4498,11 +4503,9 @@ restart:
 		if (!vcpu->arch.pio.in)
 			vcpu->arch.pio.count = 0;
 		r = EMULATE_DO_MMIO;
-	} else if (vcpu->mmio_needed) {
-		if (vcpu->mmio_is_write)
-			vcpu->mmio_needed = 0;
+	} else if (vcpu->mmio_needed)
 		r = EMULATE_DO_MMIO;
-	} else if (r == EMULATION_RESTART)
+	else if (r == EMULATION_RESTART)
 		goto restart;
 	else
 		r = EMULATE_DONE;
@@ -5450,9 +5453,22 @@ static int complete_mmio(struct kvm_vcpu *vcpu)
 		return 1;
 
 	if (vcpu->mmio_needed) {
-		memcpy(vcpu->mmio_data, run->mmio.data, 8);
-		vcpu->mmio_read_completed = 1;
 		vcpu->mmio_needed = 0;
+		if (!vcpu->mmio_is_write)
+			memcpy(vcpu->mmio_data, run->mmio.data, 8);
+		vcpu->mmio_index += 8;
+		if (vcpu->mmio_index < vcpu->mmio_size) {
+			run->exit_reason = KVM_EXIT_MMIO;
+			run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index;
+			memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8);
+			run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
+			run->mmio.is_write = vcpu->mmio_is_write;
+			vcpu->mmio_needed = 1;
+			return 0;
+		}
+		if (vcpu->mmio_is_write)
+			return 1;
+		vcpu->mmio_read_completed = 1;
 	}
 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7ca831e55186..d1f507567068 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -27,6 +27,10 @@
 
 #include <asm/kvm_host.h>
 
+#ifndef KVM_MMIO_SIZE
+#define KVM_MMIO_SIZE 8
+#endif
+
 /*
  * vcpu->requests bit members
  */
@@ -132,7 +136,8 @@ struct kvm_vcpu {
 	int mmio_read_completed;
 	int mmio_is_write;
 	int mmio_size;
-	unsigned char mmio_data[8];
+	int mmio_index;
+	unsigned char mmio_data[KVM_MMIO_SIZE];
 	gpa_t mmio_phys_addr;
 #endif
 
-- 
cgit v1.2.3-71-gd317


From 92a1f12d2598f429bd8639e21d89305e787115c5 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 25 Mar 2011 09:44:51 +0100
Subject: KVM: X86: Implement userspace interface to set virtual_tsc_khz

This patch implements two new vm-ioctls to get and set the
virtual_tsc_khz if the machine supports tsc-scaling. Setting
the tsc-frequency is only possible before userspace creates
any vcpu.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 Documentation/kvm/api.txt       | 23 +++++++++++++++++++++++
 arch/x86/include/asm/kvm_host.h |  7 +++++++
 arch/x86/kvm/svm.c              | 20 ++++++++++++++++++++
 arch/x86/kvm/x86.c              | 35 +++++++++++++++++++++++++++++++++++
 include/linux/kvm.h             |  5 +++++
 5 files changed, 90 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 9bef4e4cec50..1b9eaa7e8856 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -1263,6 +1263,29 @@ struct kvm_assigned_msix_entry {
 	__u16 padding[3];
 };
 
+4.54 KVM_SET_TSC_KHZ
+
+Capability: KVM_CAP_TSC_CONTROL
+Architectures: x86
+Type: vcpu ioctl
+Parameters: virtual tsc_khz
+Returns: 0 on success, -1 on error
+
+Specifies the tsc frequency for the virtual machine. The unit of the
+frequency is KHz.
+
+4.55 KVM_GET_TSC_KHZ
+
+Capability: KVM_CAP_GET_TSC_KHZ
+Architectures: x86
+Type: vcpu ioctl
+Parameters: none
+Returns: virtual tsc-khz on success, negative value on error
+
+Returns the tsc frequency of the guest. The unit of the return value is
+KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
+error.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index da0a8ce3a139..bd57639fd5db 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -655,6 +655,13 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 extern bool tdp_enabled;
 
+/* control of guest tsc rate supported? */
+extern bool kvm_has_tsc_control;
+/* minimum supported tsc_khz for guests */
+extern u32  kvm_min_guest_tsc_khz;
+/* maximum supported tsc_khz for guests */
+extern u32  kvm_max_guest_tsc_khz;
+
 enum emulation_result {
 	EMULATE_DONE,       /* no further processing */
 	EMULATE_DO_MMIO,      /* kvm_run filled with mmio request */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8c4549bef4ed..a98873762433 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -64,6 +64,8 @@ MODULE_LICENSE("GPL");
 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
 
 #define TSC_RATIO_RSVD          0xffffff0000000000ULL
+#define TSC_RATIO_MIN		0x0000000000000001ULL
+#define TSC_RATIO_MAX		0x000000ffffffffffULL
 
 static bool erratum_383_found __read_mostly;
 
@@ -189,6 +191,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm);
 static int nested_svm_vmexit(struct vcpu_svm *svm);
 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
 				      bool has_error_code, u32 error_code);
+static u64 __scale_tsc(u64 ratio, u64 tsc);
 
 enum {
 	VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
@@ -798,6 +801,23 @@ static __init int svm_hardware_setup(void)
 	if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
 		kvm_enable_efer_bits(EFER_FFXSR);
 
+	if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+		u64 max;
+
+		kvm_has_tsc_control = true;
+
+		/*
+		 * Make sure the user can only configure tsc_khz values that
+		 * fit into a signed integer.
+		 * A min value is not calculated needed because it will always
+		 * be 1 on all machines and a value of 0 is used to disable
+		 * tsc-scaling for the vcpu.
+		 */
+		max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX));
+
+		kvm_max_guest_tsc_khz = max;
+	}
+
 	if (nested) {
 		printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
 		kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 579ce34e7904..1d5a7f418795 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -87,6 +87,11 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
 int ignore_msrs = 0;
 module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
 
+bool kvm_has_tsc_control;
+EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
+u32  kvm_max_guest_tsc_khz;
+EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
+
 #define KVM_NR_SHARED_MSRS 16
 
 struct kvm_shared_msrs_global {
@@ -1986,6 +1991,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
 	case KVM_CAP_XSAVE:
 	case KVM_CAP_ASYNC_PF:
+	case KVM_CAP_GET_TSC_KHZ:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -2012,6 +2018,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_XCRS:
 		r = cpu_has_xsave;
 		break;
+	case KVM_CAP_TSC_CONTROL:
+		r = kvm_has_tsc_control;
+		break;
 	default:
 		r = 0;
 		break;
@@ -3045,6 +3054,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
 		break;
 	}
+	case KVM_SET_TSC_KHZ: {
+		u32 user_tsc_khz;
+
+		r = -EINVAL;
+		if (!kvm_has_tsc_control)
+			break;
+
+		user_tsc_khz = (u32)arg;
+
+		if (user_tsc_khz >= kvm_max_guest_tsc_khz)
+			goto out;
+
+		kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz);
+
+		r = 0;
+		goto out;
+	}
+	case KVM_GET_TSC_KHZ: {
+		r = -EIO;
+		if (check_tsc_unstable())
+			goto out;
+
+		r = vcpu_tsc_khz(vcpu);
+
+		goto out;
+	}
 	default:
 		r = -EINVAL;
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ea2dc1a2e13d..2f63ebeac639 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -541,6 +541,8 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_PPC_GET_PVINFO 57
 #define KVM_CAP_PPC_IRQ_LEVEL 58
 #define KVM_CAP_ASYNC_PF 59
+#define KVM_CAP_TSC_CONTROL 60
+#define KVM_CAP_GET_TSC_KHZ 61
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -677,6 +679,9 @@ struct kvm_clock_data {
 #define KVM_SET_PIT2              _IOW(KVMIO,  0xa0, struct kvm_pit_state2)
 /* Available with KVM_CAP_PPC_GET_PVINFO */
 #define KVM_PPC_GET_PVINFO	  _IOW(KVMIO,  0xa1, struct kvm_ppc_pvinfo)
+/* Available with KVM_CAP_TSC_CONTROL */
+#define KVM_SET_TSC_KHZ           _IO(KVMIO,  0xa2)
+#define KVM_GET_TSC_KHZ           _IO(KVMIO,  0xa3)
 
 /*
  * ioctls for vcpu fds
-- 
cgit v1.2.3-71-gd317


From b42fc3cbc3d6e284463e93896679379443e19d56 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Tue, 12 Apr 2011 21:30:17 -0400
Subject: KVM: Fix off by one in kvm_for_each_vcpu iteration

This patch avoids gcc issuing the following warning when KVM_MAX_VCPUS=1:
warning: array subscript is above array bounds

kvm_for_each_vcpu currently checks to see if the index for the vcpu is
valid /after/ loading it. We don't run into problems because the address
is still inside the enclosing struct kvm and we never deference or write
to it, so this isn't a security issue.

The warning occurs when KVM_MAX_VCPUS=1 because the increment portion of
the loop will *always* cause the loop to load an invalid location since
++idx will always be > 0.

This patch moves the load so that the check occurs before the load and
we don't run into the compiler warning.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d1f507567068..0bc3d372e3cb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -296,9 +296,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 }
 
 #define kvm_for_each_vcpu(idx, vcpup, kvm) \
-	for (idx = 0, vcpup = kvm_get_vcpu(kvm, idx); \
-	     idx < atomic_read(&kvm->online_vcpus) && vcpup; \
-	     vcpup = kvm_get_vcpu(kvm, ++idx))
+	for (idx = 0; \
+	     idx < atomic_read(&kvm->online_vcpus) && \
+	     (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
+	     idx++)
 
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3-71-gd317


From b130e5cec958bae3867cf6ab09a9b24ba8fada01 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Tue, 3 May 2011 16:57:07 -0700
Subject: nl80211: Introduce NL80211_MESH_SETUP_USERSPACE_AMPE

Introduce a new configuration option to support AMPE from userspace.

Prior to this series we only supported authentication in userspace: an
authentication daemon would authenticate peer candidates in userspace
and hand them over to the kernel.  From that point the mesh stack would
take over and establish a peer link (Mesh Peering Management).

These patches introduce support for Authenticated Mesh Peering Exchange
in userspace.  The userspace daemon implements the AMPE protocol and on
successfull completion create mesh peers and install encryption keys.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    | 10 ++++++++++
 include/net/cfg80211.h     |  4 +++-
 net/mac80211/cfg.c         |  6 +++++-
 net/mac80211/ieee80211_i.h |  6 +++++-
 net/mac80211/mesh.c        |  2 +-
 net/mac80211/mesh_plink.c  |  5 +++--
 net/wireless/nl80211.c     |  4 +++-
 7 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index a75dea9c416e..c53b916036c5 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1769,6 +1769,15 @@ enum nl80211_meshconf_params {
  * @NL80211_MESH_SETUP_USERSPACE_AUTH: Enable this option if an authentication
  * daemon will be authenticating mesh candidates.
  *
+ * @NL80211_MESH_SETUP_USERSPACE_AMPE: Enable this option if an authentication
+ * daemon will be securing peer link frames.  AMPE is a secured version of Mesh
+ * Peering Management (MPM) and is implemented with the assistance of a
+ * userspace daemon.  When this flag is set, the kernel will send peer
+ * management frames to a userspace daemon that will implement AMPE
+ * functionality (security capabilities selection, key confirmation, and key
+ * management).  When the flag is unset (default), the kernel can autonomously
+ * complete (unsecured) mesh peering without the need of a userspace daemon.
+ *
  * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number
  * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use
  */
@@ -1778,6 +1787,7 @@ enum nl80211_mesh_setup_params {
 	NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC,
 	NL80211_MESH_SETUP_IE,
 	NL80211_MESH_SETUP_USERSPACE_AUTH,
+	NL80211_MESH_SETUP_USERSPACE_AMPE,
 
 	/* keep last */
 	__NL80211_MESH_SETUP_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0920daf36807..10c17d68059f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -695,7 +695,8 @@ struct mesh_config {
  * @path_metric: which metric to use
  * @ie: vendor information elements (optional)
  * @ie_len: length of vendor information elements
- * @is_secure: or not
+ * @is_authenticated: this mesh requires authentication
+ * @is_secure: this mesh uses security
  *
  * These parameters are fixed when the mesh is created.
  */
@@ -706,6 +707,7 @@ struct mesh_setup {
 	u8  path_metric;
 	const u8 *ie;
 	u8 ie_len;
+	bool is_authenticated;
 	bool is_secure;
 };
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 1ebc13383ae7..18c2555e04e6 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1064,7 +1064,11 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
 	memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len);
 	ifmsh->mesh_pp_id = setup->path_sel_proto;
 	ifmsh->mesh_pm_id = setup->path_metric;
-	ifmsh->is_secure = setup->is_secure;
+	ifmsh->security = IEEE80211_MESH_SEC_NONE;
+	if (setup->is_authenticated)
+		ifmsh->security |= IEEE80211_MESH_SEC_AUTHED;
+	if (setup->is_secure)
+		ifmsh->security |= IEEE80211_MESH_SEC_SECURED;
 
 	return 0;
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index e89bc27f8dc3..7f4d0dc1d534 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -490,7 +490,11 @@ struct ieee80211_if_mesh {
 	bool accepting_plinks;
 	const u8 *ie;
 	u8 ie_len;
-	bool is_secure;
+	enum {
+		IEEE80211_MESH_SEC_NONE = 0x0,
+		IEEE80211_MESH_SEC_AUTHED = 0x1,
+		IEEE80211_MESH_SEC_SECURED = 0x2,
+	} security;
 };
 
 #ifdef CONFIG_MAC80211_MESH
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index c1299e249541..2a59eb345131 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -574,7 +574,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
 			       &elems);
 
 	/* ignore beacons from secure mesh peers if our security is off */
-	if (elems.rsn_len && !sdata->u.mesh.is_secure)
+	if (elems.rsn_len && sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE)
 		return;
 
 	if (elems.ds_params && elems.ds_params_len == 1)
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 84e5b056af02..87abf8deb369 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -251,7 +251,7 @@ void mesh_neighbour_update(u8 *hw_addr, u32 rates,
 		rcu_read_unlock();
 		/* Userspace handles peer allocation when security is enabled
 		 * */
-		if (sdata->u.mesh.is_secure)
+		if (sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED)
 			cfg80211_notify_new_peer_candidate(sdata->dev, hw_addr,
 					elems->ie_start, elems->total_len,
 					GFP_KERNEL);
@@ -460,7 +460,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		mpl_dbg("Mesh plink: missing necessary peer link ie\n");
 		return;
 	}
-	if (elems.rsn_len && !sdata->u.mesh.is_secure) {
+	if (elems.rsn_len &&
+			sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) {
 		mpl_dbg("Mesh plink: can't establish link with secure peer\n");
 		return;
 	}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0a199a1ca9b6..64efc2d7a7ad 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2871,6 +2871,7 @@ static const struct nla_policy
 	[NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG },
 	[NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY,
 		.len = IEEE80211_MAX_DATA_LEN },
+	[NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG },
 };
 
 static int nl80211_parse_mesh_config(struct genl_info *info,
@@ -2980,7 +2981,8 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
 		setup->ie = nla_data(ieattr);
 		setup->ie_len = nla_len(ieattr);
 	}
-	setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]);
+	setup->is_authenticated = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]);
+	setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AMPE]);
 
 	return 0;
 }
-- 
cgit v1.2.3-71-gd317


From d3aaec8ab76c2d604c2ba7332e1338674607597b Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Tue, 3 May 2011 16:57:09 -0700
Subject: mac80211: Drop MESH_PLINK category and use new ANA-approved
 MESH_ACTION

Note: This breaks compatibility with previous mesh protocol instances.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 5 ++---
 net/mac80211/mesh.c       | 2 +-
 net/mac80211/mesh_plink.c | 2 +-
 net/mac80211/rx.c         | 7 +++++--
 4 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 79690b710665..ee1c96a866cc 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1261,9 +1261,8 @@ enum ieee80211_category {
 	WLAN_CATEGORY_MULTIHOP_ACTION = 14,
 	WLAN_CATEGORY_SELF_PROTECTED = 15,
 	WLAN_CATEGORY_WMM = 17,
-	/* TODO: remove MESH_PLINK and MESH_PATH_SEL after */
-	/*       mesh is updated to current 802.11s draft  */
-	WLAN_CATEGORY_MESH_PLINK = 30,
+	/* TODO: remove MESH_PATH_SEL after mesh is updated
+	 * to current 802.11s draft  */
 	WLAN_CATEGORY_MESH_PATH_SEL = 32,
 	WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126,
 	WLAN_CATEGORY_VENDOR_SPECIFIC = 127,
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 2a59eb345131..75378e852f00 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -600,7 +600,7 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
 					  struct ieee80211_rx_status *rx_status)
 {
 	switch (mgmt->u.action.category) {
-	case WLAN_CATEGORY_MESH_PLINK:
+	case WLAN_CATEGORY_MESH_ACTION:
 		mesh_rx_plink_frame(sdata, mgmt, len, rx_status);
 		break;
 	case WLAN_CATEGORY_MESH_PATH_SEL:
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 87abf8deb369..0120e9e36286 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -182,7 +182,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 	memcpy(mgmt->da, da, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	/* BSSID is left zeroed, wildcard value */
-	mgmt->u.action.category = WLAN_CATEGORY_MESH_PLINK;
+	mgmt->u.action.category = WLAN_CATEGORY_MESH_ACTION;
 	mgmt->u.action.u.plink_action.action_code = action;
 
 	if (action == PLINK_CLOSE)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 81241e18f3a4..634f3d97a279 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -495,8 +495,11 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
 			return RX_DROP_MONITOR;
 
 		if (ieee80211_is_action(hdr->frame_control)) {
+			u8 category;
 			mgmt = (struct ieee80211_mgmt *)hdr;
-			if (mgmt->u.action.category != WLAN_CATEGORY_MESH_PLINK)
+			category = mgmt->u.action.category;
+			if (category != WLAN_CATEGORY_MESH_ACTION &&
+				category != WLAN_CATEGORY_SELF_PROTECTED)
 				return RX_DROP_MONITOR;
 			return RX_CONTINUE;
 		}
@@ -2205,7 +2208,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			goto handled;
 		}
 		break;
-	case WLAN_CATEGORY_MESH_PLINK:
+	case WLAN_CATEGORY_MESH_ACTION:
 		if (!ieee80211_vif_is_mesh(&sdata->vif))
 			break;
 		goto queue;
-- 
cgit v1.2.3-71-gd317


From 0a35d36d6f019bde6c98812456798275b02e5aee Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Wed, 4 May 2011 10:24:56 -0700
Subject: cfg80211: Use capability info to detect mesh beacons.

Mesh beacons no longer use all-zeroes BSSID.  Beacon frames for MBSS,
infrastructure BSS, or IBSS are differentiated by the Capability
Information field in the Beacon frame.  A mesh STA sets the ESS and IBSS
subfields to 0 in transmitted Beacon or Probe Response management
frames.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h |  5 +++++
 net/wireless/scan.c       | 14 +++++++-------
 2 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index ee1c96a866cc..d527fb7bd67a 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1002,6 +1002,11 @@ struct ieee80211_ht_info {
 
 #define WLAN_CAPABILITY_ESS		(1<<0)
 #define WLAN_CAPABILITY_IBSS		(1<<1)
+
+/* A mesh STA sets the ESS and IBSS capability bits to zero */
+#define WLAN_CAPABILITY_IS_MBSS(cap)	\
+	(!((cap) & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)))
+
 #define WLAN_CAPABILITY_CF_POLLABLE	(1<<2)
 #define WLAN_CAPABILITY_CF_POLL_REQUEST	(1<<3)
 #define WLAN_CAPABILITY_PRIVACY		(1<<4)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index fbf6f33ae4d0..62e542a2b192 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -210,7 +210,7 @@ static bool is_mesh(struct cfg80211_bss *a,
 {
 	const u8 *ie;
 
-	if (!is_zero_ether_addr(a->bssid))
+	if (!WLAN_CAPABILITY_IS_MBSS(a->capability))
 		return false;
 
 	ie = cfg80211_find_ie(WLAN_EID_MESH_ID,
@@ -248,11 +248,7 @@ static int cmp_bss(struct cfg80211_bss *a,
 	if (a->channel != b->channel)
 		return b->channel->center_freq - a->channel->center_freq;
 
-	r = memcmp(a->bssid, b->bssid, ETH_ALEN);
-	if (r)
-		return r;
-
-	if (is_zero_ether_addr(a->bssid)) {
+	if (WLAN_CAPABILITY_IS_MBSS(a->capability | b->capability)) {
 		r = cmp_ies(WLAN_EID_MESH_ID,
 			    a->information_elements,
 			    a->len_information_elements,
@@ -267,6 +263,10 @@ static int cmp_bss(struct cfg80211_bss *a,
 			       b->len_information_elements);
 	}
 
+	r = memcmp(a->bssid, b->bssid, ETH_ALEN);
+	if (r)
+		return r;
+
 	return cmp_ies(WLAN_EID_SSID,
 		       a->information_elements,
 		       a->len_information_elements,
@@ -407,7 +407,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 
 	res->ts = jiffies;
 
-	if (is_zero_ether_addr(res->pub.bssid)) {
+	if (WLAN_CAPABILITY_IS_MBSS(res->pub.capability)) {
 		/* must be mesh, verify */
 		meshid = cfg80211_find_ie(WLAN_EID_MESH_ID,
 					  res->pub.information_elements,
-- 
cgit v1.2.3-71-gd317


From 9c3990aaec0ad9f686ef6480f6861f2df89b2a7a Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Tue, 3 May 2011 16:57:11 -0700
Subject: nl80211: Let userspace drive the peer link management states.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  4 ++++
 include/net/cfg80211.h  | 29 +++++++++++++++++++++++++++++
 net/mac80211/cfg.c      | 30 +++++++++++++++++++++---------
 net/mac80211/sta_info.h | 23 -----------------------
 net/wireless/nl80211.c  |  6 ++++++
 5 files changed, 60 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index c53b916036c5..de96783954a1 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -913,6 +913,9 @@ enum nl80211_commands {
  * @NL80211_ATTR_SUPPORT_MESH_AUTH: Currently, this means the underlying driver
  *	allows auth frames in a mesh to be passed to userspace for processing via
  *	the @NL80211_MESH_SETUP_USERSPACE_AUTH flag.
+ * @NL80211_ATTR_STA_PLINK_STATE: The state of a mesh peer link. Used when
+ *      userspace is driving the peer link management state machine.
+ *      @NL80211_MESH_SETUP_USERSPACE_AMPE must be enabled.
  *
  * @NL80211_ATTR_WOWLAN_SUPPORTED: indicates, as part of the wiphy capabilities,
  *	the supported WoWLAN triggers
@@ -1109,6 +1112,7 @@ enum nl80211_attrs {
 	NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
 
 	NL80211_ATTR_SUPPORT_MESH_AUTH,
+	NL80211_ATTR_STA_PLINK_STATE,
 
 	NL80211_ATTR_WOWLAN_TRIGGERS,
 	NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 10c17d68059f..4b0d035be64f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -371,6 +371,33 @@ enum plink_actions {
 	PLINK_ACTION_BLOCK,
 };
 
+/**
+ * enum plink_states - state of a mesh peer link finite state machine
+ *
+ * @PLINK_LISTEN: initial state, considered the implicit state of non
+ * existant mesh peer links
+ * @PLINK_OPN_SNT: mesh plink open frame has been sent to this mesh
+ * peer @PLINK_OPN_RCVD: mesh plink open frame has been received from
+ * this mesh peer
+ * @PLINK_CNF_RCVD: mesh plink confirm frame has been received from
+ * this mesh peer
+ * @PLINK_ESTAB: mesh peer link is established
+ * @PLINK_HOLDING: mesh peer link is being closed or cancelled
+ * @PLINK_BLOCKED: all frames transmitted from this mesh plink are
+ * discarded
+ * @PLINK_INVALID: reserved
+ */
+enum plink_state {
+	PLINK_LISTEN,
+	PLINK_OPN_SNT,
+	PLINK_OPN_RCVD,
+	PLINK_CNF_RCVD,
+	PLINK_ESTAB,
+	PLINK_HOLDING,
+	PLINK_BLOCKED,
+	PLINK_INVALID,
+};
+
 /**
  * struct station_parameters - station parameters
  *
@@ -387,6 +414,7 @@ enum plink_actions {
  * @listen_interval: listen interval or -1 for no change
  * @aid: AID or zero for no change
  * @plink_action: plink action to take
+ * @plink_state: set the peer link state for a station
  * @ht_capa: HT capabilities of station
  */
 struct station_parameters {
@@ -397,6 +425,7 @@ struct station_parameters {
 	u16 aid;
 	u8 supported_rates_len;
 	u8 plink_action;
+	u8 plink_state;
 	struct ieee80211_ht_cap *ht_capa;
 };
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 18c2555e04e6..51f775772d9e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -734,15 +734,27 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 						  params->ht_capa,
 						  &sta->sta.ht_cap);
 
-	if (ieee80211_vif_is_mesh(&sdata->vif) && params->plink_action) {
-		switch (params->plink_action) {
-		case PLINK_ACTION_OPEN:
-			mesh_plink_open(sta);
-			break;
-		case PLINK_ACTION_BLOCK:
-			mesh_plink_block(sta);
-			break;
-		}
+	if (ieee80211_vif_is_mesh(&sdata->vif)) {
+		if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED)
+			switch (params->plink_state) {
+			case PLINK_LISTEN:
+			case PLINK_ESTAB:
+			case PLINK_BLOCKED:
+				sta->plink_state = params->plink_state;
+				break;
+			default:
+				/*  nothing  */
+				break;
+			}
+		else
+			switch (params->plink_action) {
+			case PLINK_ACTION_OPEN:
+				mesh_plink_open(sta);
+				break;
+			case PLINK_ACTION_BLOCK:
+				mesh_plink_block(sta);
+				break;
+			}
 	}
 }
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index af1a7f8c8675..f00b4dcb49d7 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -173,29 +173,6 @@ struct sta_ampdu_mlme {
 };
 
 
-/**
- * enum plink_state - state of a mesh peer link finite state machine
- *
- * @PLINK_LISTEN: initial state, considered the implicit state of non existant
- * 	mesh peer links
- * @PLINK_OPN_SNT: mesh plink open frame has been sent to this mesh peer
- * @PLINK_OPN_RCVD: mesh plink open frame has been received from this mesh peer
- * @PLINK_CNF_RCVD: mesh plink confirm frame has been received from this mesh
- * 	peer
- * @PLINK_ESTAB: mesh peer link is established
- * @PLINK_HOLDING: mesh peer link is being closed or cancelled
- * @PLINK_BLOCKED: all frames transmitted from this mesh plink are discarded
- */
-enum plink_state {
-	PLINK_LISTEN,
-	PLINK_OPN_SNT,
-	PLINK_OPN_RCVD,
-	PLINK_CNF_RCVD,
-	PLINK_ESTAB,
-	PLINK_HOLDING,
-	PLINK_BLOCKED
-};
-
 /**
  * struct sta_info - STA information
  *
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 64efc2d7a7ad..f698c1d116e4 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -174,6 +174,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG },
 	[NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED },
 	[NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED },
+	[NL80211_ATTR_STA_PLINK_STATE] = { .type = NLA_U8 },
 };
 
 /* policy for the key attributes */
@@ -2247,6 +2248,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 	memset(&params, 0, sizeof(params));
 
 	params.listen_interval = -1;
+	params.plink_state = PLINK_INVALID;
 
 	if (info->attrs[NL80211_ATTR_STA_AID])
 		return -EINVAL;
@@ -2278,6 +2280,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 		params.plink_action =
 		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
 
+	if (info->attrs[NL80211_ATTR_STA_PLINK_STATE])
+		params.plink_state =
+		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
+
 	err = get_vlan(info, rdev, &params.vlan);
 	if (err)
 		goto out;
-- 
cgit v1.2.3-71-gd317


From 8f9cb77d6d213c153b0571f494df0c24456aaf47 Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@cozybit.com>
Date: Tue, 3 May 2011 16:57:14 -0700
Subject: mac80211: Self-protected management frames are not robust

They may contain encrypted information elements (as AMPE frames do)
but they are not encrypted.

Signed-off-by: Thomas Pedersen <thomas@cozybit.com>
Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index d527fb7bd67a..b2eee5879883 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1520,6 +1520,7 @@ static inline bool ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr)
 		category = ((u8 *) hdr) + 24;
 		return *category != WLAN_CATEGORY_PUBLIC &&
 			*category != WLAN_CATEGORY_HT &&
+			*category != WLAN_CATEGORY_SELF_PROTECTED &&
 			*category != WLAN_CATEGORY_VENDOR_SPECIFIC;
 	}
 
-- 
cgit v1.2.3-71-gd317


From 1073e4ee595265086a592a056d903bf4fcc8885a Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Wed, 11 May 2011 02:08:09 +0200
Subject: bcma: add missing GPIO defines, use PULL register only when available
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Similar patch was commited to ssb.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_chipcommon.c            | 6 ++++--
 include/linux/bcma/bcma_driver_chipcommon.h | 5 +++++
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
index caf596091d4d..606102256b44 100644
--- a/drivers/bcma/driver_chipcommon.c
+++ b/drivers/bcma/driver_chipcommon.c
@@ -29,8 +29,10 @@ void bcma_core_chipcommon_init(struct bcma_drv_cc *cc)
 	if (cc->core->id.rev >= 35)
 		cc->capabilities_ext = bcma_cc_read32(cc, BCMA_CC_CAP_EXT);
 
-	bcma_cc_write32(cc, 0x58, 0);
-	bcma_cc_write32(cc, 0x5C, 0);
+	if (cc->core->id.rev >= 20) {
+		bcma_cc_write32(cc, BCMA_CC_GPIOPULLUP, 0);
+		bcma_cc_write32(cc, BCMA_CC_GPIOPULLDOWN, 0);
+	}
 
 	if (cc->capabilities & BCMA_CC_CAP_PMU)
 		bcma_pmu_init(cc);
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 4f8fd6a4c1e6..083c3b6cd5ce 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -113,6 +113,8 @@
 #define BCMA_CC_FLASHDATA		0x0048
 #define BCMA_CC_BCAST_ADDR		0x0050
 #define BCMA_CC_BCAST_DATA		0x0054
+#define BCMA_CC_GPIOPULLUP		0x0058		/* Rev >= 20 only */
+#define BCMA_CC_GPIOPULLDOWN		0x005C		/* Rev >= 20 only */
 #define BCMA_CC_GPIOIN			0x0060
 #define BCMA_CC_GPIOOUT			0x0064
 #define BCMA_CC_GPIOOUTEN		0x0068
@@ -121,6 +123,9 @@
 #define BCMA_CC_GPIOIRQ			0x0074
 #define BCMA_CC_WATCHDOG		0x0080
 #define BCMA_CC_GPIOTIMER		0x0088		/* LED powersave (corerev >= 16) */
+#define  BCMA_CC_GPIOTIMER_OFFTIME	0x0000FFFF
+#define  BCMA_CC_GPIOTIMER_OFFTIME_SHIFT	0
+#define  BCMA_CC_GPIOTIMER_ONTIME	0xFFFF0000
 #define  BCMA_CC_GPIOTIMER_ONTIME_SHIFT	16
 #define BCMA_CC_GPIOTOUTM		0x008C		/* LED powersave (corerev >= 16) */
 #define BCMA_CC_CLOCK_N			0x0090
-- 
cgit v1.2.3-71-gd317


From 8576f815d5c8beb8b10f96abe31831b90af3d352 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Wed, 11 May 2011 02:10:58 +0200
Subject: ssb: move ssb_commit_settings and export it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commiting settings is possible on devices without PCI core (but with CC
core). Export it for usage in drivers supporting other cores.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/driver_pcicore.c | 26 --------------------------
 drivers/ssb/main.c           | 25 +++++++++++++++++++++++++
 include/linux/ssb/ssb.h      |  1 +
 3 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ssb/driver_pcicore.c b/drivers/ssb/driver_pcicore.c
index 8fde1220bc89..82feb348c8bb 100644
--- a/drivers/ssb/driver_pcicore.c
+++ b/drivers/ssb/driver_pcicore.c
@@ -21,8 +21,6 @@ static u16 ssb_pcie_mdio_read(struct ssb_pcicore *pc, u8 device, u8 address);
 static void ssb_pcie_mdio_write(struct ssb_pcicore *pc, u8 device,
 				u8 address, u16 data);
 
-static void ssb_commit_settings(struct ssb_bus *bus);
-
 static inline
 u32 pcicore_read32(struct ssb_pcicore *pc, u16 offset)
 {
@@ -659,30 +657,6 @@ static void ssb_pcie_mdio_write(struct ssb_pcicore *pc, u8 device,
 	pcicore_write32(pc, mdio_control, 0);
 }
 
-static void ssb_broadcast_value(struct ssb_device *dev,
-				u32 address, u32 data)
-{
-	/* This is used for both, PCI and ChipCommon core, so be careful. */
-	BUILD_BUG_ON(SSB_PCICORE_BCAST_ADDR != SSB_CHIPCO_BCAST_ADDR);
-	BUILD_BUG_ON(SSB_PCICORE_BCAST_DATA != SSB_CHIPCO_BCAST_DATA);
-
-	ssb_write32(dev, SSB_PCICORE_BCAST_ADDR, address);
-	ssb_read32(dev, SSB_PCICORE_BCAST_ADDR); /* flush */
-	ssb_write32(dev, SSB_PCICORE_BCAST_DATA, data);
-	ssb_read32(dev, SSB_PCICORE_BCAST_DATA); /* flush */
-}
-
-static void ssb_commit_settings(struct ssb_bus *bus)
-{
-	struct ssb_device *dev;
-
-	dev = bus->chipco.dev ? bus->chipco.dev : bus->pcicore.dev;
-	if (WARN_ON(!dev))
-		return;
-	/* This forces an update of the cached registers. */
-	ssb_broadcast_value(dev, 0xFD8, 0);
-}
-
 int ssb_pcicore_dev_irqvecs_enable(struct ssb_pcicore *pc,
 				   struct ssb_device *dev)
 {
diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index ad3da93a428c..ee2937c41424 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -1329,6 +1329,31 @@ error:
 }
 EXPORT_SYMBOL(ssb_bus_powerup);
 
+static void ssb_broadcast_value(struct ssb_device *dev,
+				u32 address, u32 data)
+{
+	/* This is used for both, PCI and ChipCommon core, so be careful. */
+	BUILD_BUG_ON(SSB_PCICORE_BCAST_ADDR != SSB_CHIPCO_BCAST_ADDR);
+	BUILD_BUG_ON(SSB_PCICORE_BCAST_DATA != SSB_CHIPCO_BCAST_DATA);
+
+	ssb_write32(dev, SSB_PCICORE_BCAST_ADDR, address);
+	ssb_read32(dev, SSB_PCICORE_BCAST_ADDR); /* flush */
+	ssb_write32(dev, SSB_PCICORE_BCAST_DATA, data);
+	ssb_read32(dev, SSB_PCICORE_BCAST_DATA); /* flush */
+}
+
+void ssb_commit_settings(struct ssb_bus *bus)
+{
+	struct ssb_device *dev;
+
+	dev = bus->chipco.dev ? bus->chipco.dev : bus->pcicore.dev;
+	if (WARN_ON(!dev))
+		return;
+	/* This forces an update of the cached registers. */
+	ssb_broadcast_value(dev, 0xFD8, 0);
+}
+EXPORT_SYMBOL(ssb_commit_settings);
+
 u32 ssb_admatch_base(u32 adm)
 {
 	u32 base = 0;
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 7e99b348834c..f017b8900f78 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -518,6 +518,7 @@ extern int ssb_bus_may_powerdown(struct ssb_bus *bus);
  * Otherwise static always-on powercontrol will be used. */
 extern int ssb_bus_powerup(struct ssb_bus *bus, bool dynamic_pctl);
 
+extern void ssb_commit_settings(struct ssb_bus *bus);
 
 /* Various helper functions */
 extern u32 ssb_admatch_base(u32 adm);
-- 
cgit v1.2.3-71-gd317


From 807f8a8c300435d5483e8d78df9dcdbc27333166 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <coelho@ti.com>
Date: Wed, 11 May 2011 17:09:35 +0300
Subject: cfg80211/nl80211: add support for scheduled scans

Implement new functionality for scheduled scan offload.  With this feature we
can scan automatically at certain intervals.

The idea is that the hardware can perform scan automatically and filter on
desired results without waking up the host unnecessarily.

Add NL80211_CMD_START_SCHED_SCAN and NL80211_CMD_STOP_SCHED_SCAN
commands to the nl80211 interface.  When results are available they are
reported by NL80211_CMD_SCHED_SCAN_RESULTS events.  The userspace is
informed when the scheduled scan has stopped with a
NL80211_CMD_SCHED_SCAN_STOPPED event, which can be triggered either by
the driver or by a call to NL80211_CMD_STOP_SCHED_SCAN.

Signed-off-by: Luciano Coelho <coelho@ti.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  25 +++++
 include/net/cfg80211.h  |  57 +++++++++++
 net/wireless/core.c     |  12 ++-
 net/wireless/core.h     |   7 ++
 net/wireless/nl80211.c  | 250 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h  |   4 +
 net/wireless/scan.c     |  70 ++++++++++++++
 7 files changed, 424 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index de96783954a1..f8b5595ba4af 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -203,6 +203,26 @@
  * @NL80211_CMD_SCAN_ABORTED: scan was aborted, for unspecified reasons,
  *	partial scan results may be available
  *
+ * @NL80211_CMD_START_SCHED_SCAN: start a scheduled scan.  Like with normal
+ *	scans, if SSIDs (%NL80211_ATTR_SCAN_SSIDS) are passed, they are used
+ *	in the probe requests.  For broadcast, a broadcast SSID must be
+ *	passed (ie. an empty string).  If no SSID is passed, no probe
+ *	requests are sent and a passive scan is performed.
+ *	%NL80211_ATTR_SCAN_FREQUENCIES, if passed, define which channels
+ *	should be scanned; if not passed, all channels allowed for the
+ *	current regulatory domain are used.  Extra IEs can also be passed
+ *	from the userspace by using the %NL80211_ATTR_IE attribute.
+ * @NL80211_CMD_STOP_SCHED_SCAN: stop a scheduled scan
+ * @NL80211_CMD_SCHED_SCAN_RESULTS: indicates that there are scheduled scan
+ *	results available.
+ * @NL80211_CMD_SCHED_SCAN_STOPPED: indicates that the scheduled scan has
+ *	stopped.  The driver may issue this event at any time during a
+ *	scheduled scan.  One reason for stopping the scan is if the hardware
+ *	does not support starting an association or a normal scan while running
+ *	a scheduled scan.  This event is also sent when the
+ *	%NL80211_CMD_STOP_SCHED_SCAN command is received or when the interface
+ *	is brought down while a scheduled scan was running.
+ *
  * @NL80211_CMD_GET_SURVEY: get survey resuls, e.g. channel occupation
  *      or noise level
  * @NL80211_CMD_NEW_SURVEY_RESULTS: survey data notification (as a reply to
@@ -545,6 +565,11 @@ enum nl80211_commands {
 	NL80211_CMD_GET_WOWLAN,
 	NL80211_CMD_SET_WOWLAN,
 
+	NL80211_CMD_START_SCHED_SCAN,
+	NL80211_CMD_STOP_SCHED_SCAN,
+	NL80211_CMD_SCHED_SCAN_RESULTS,
+	NL80211_CMD_SCHED_SCAN_STOPPED,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4b0d035be64f..e214c85b74d2 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -823,6 +823,33 @@ struct cfg80211_scan_request {
 	struct ieee80211_channel *channels[0];
 };
 
+/**
+ * struct cfg80211_sched_scan_request - scheduled scan request description
+ *
+ * @ssids: SSIDs to scan for (passed in the probe_reqs in active scans)
+ * @n_ssids: number of SSIDs
+ * @n_channels: total number of channels to scan
+ * @ie: optional information element(s) to add into Probe Request or %NULL
+ * @ie_len: length of ie in octets
+ * @wiphy: the wiphy this was for
+ * @dev: the interface
+ * @channels: channels to scan
+ */
+struct cfg80211_sched_scan_request {
+	struct cfg80211_ssid *ssids;
+	int n_ssids;
+	u32 n_channels;
+	const u8 *ie;
+	size_t ie_len;
+
+	/* internal */
+	struct wiphy *wiphy;
+	struct net_device *dev;
+
+	/* keep last */
+	struct ieee80211_channel *channels[0];
+};
+
 /**
  * enum cfg80211_signal_type - signal type
  *
@@ -1292,6 +1319,10 @@ struct cfg80211_wowlan {
  * @set_power_mgmt: Configure WLAN power management. A timeout value of -1
  *	allows the driver to adjust the dynamic ps timeout value.
  * @set_cqm_rssi_config: Configure connection quality monitor RSSI threshold.
+ * @sched_scan_start: Tell the driver to start a scheduled scan.
+ * @sched_scan_stop: Tell the driver to stop an ongoing scheduled
+ *	scan.  The driver_initiated flag specifies whether the driver
+ *	itself has informed that the scan has stopped.
  *
  * @mgmt_frame_register: Notify driver that a management frame type was
  *	registered. Note that this callback may not sleep, and cannot run
@@ -1478,6 +1509,12 @@ struct cfg80211_ops {
 	int	(*set_ringparam)(struct wiphy *wiphy, u32 tx, u32 rx);
 	void	(*get_ringparam)(struct wiphy *wiphy,
 				 u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max);
+
+	int	(*sched_scan_start)(struct wiphy *wiphy,
+				struct net_device *dev,
+				struct cfg80211_sched_scan_request *request);
+	int	(*sched_scan_stop)(struct wiphy *wiphy, struct net_device *dev,
+				   bool driver_initiated);
 };
 
 /*
@@ -1522,6 +1559,7 @@ struct cfg80211_ops {
  * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
  * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing
  *	auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH.
+ * @WIPHY_FLAG_SCHED_SCAN: The device supports scheduled scans.
  */
 enum wiphy_flags {
 	WIPHY_FLAG_CUSTOM_REGULATORY		= BIT(0),
@@ -1534,6 +1572,7 @@ enum wiphy_flags {
 	WIPHY_FLAG_CONTROL_PORT_PROTOCOL	= BIT(7),
 	WIPHY_FLAG_IBSS_RSN			= BIT(8),
 	WIPHY_FLAG_MESH_AUTH			= BIT(10),
+	WIPHY_FLAG_SUPPORTS_SCHED_SCAN		= BIT(11),
 };
 
 struct mac_address {
@@ -2354,6 +2393,24 @@ int cfg80211_wext_siwpmksa(struct net_device *dev,
  */
 void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted);
 
+/**
+ * cfg80211_sched_scan_results - notify that new scan results are available
+ *
+ * @wiphy: the wiphy which got scheduled scan results
+ */
+void cfg80211_sched_scan_results(struct wiphy *wiphy);
+
+/**
+ * cfg80211_sched_scan_stopped - notify that the scheduled scan has stopped
+ *
+ * @wiphy: the wiphy on which the scheduled scan stopped
+ *
+ * The driver can call this function to inform cfg80211 that the
+ * scheduled scan had to be stopped, for whatever reason.  The driver
+ * is then called back via the sched_scan_stop operation when done.
+ */
+void cfg80211_sched_scan_stopped(struct wiphy *wiphy);
+
 /**
  * cfg80211_inform_bss_frame - inform cfg80211 of a received BSS frame
  *
diff --git a/net/wireless/core.c b/net/wireless/core.c
index bea0d80710c8..f924a49b2425 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -370,7 +370,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 	spin_lock_init(&rdev->bss_lock);
 	INIT_LIST_HEAD(&rdev->bss_list);
 	INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done);
-
+	INIT_WORK(&rdev->sched_scan_results_wk, __cfg80211_sched_scan_results);
+	INIT_WORK(&rdev->sched_scan_stopped_wk, __cfg80211_sched_scan_stopped);
 #ifdef CONFIG_CFG80211_WEXT
 	rdev->wiphy.wext = &cfg80211_wext_handler;
 #endif
@@ -672,6 +673,11 @@ static void wdev_cleanup_work(struct work_struct *work)
 		___cfg80211_scan_done(rdev, true);
 	}
 
+	if (WARN_ON(rdev->sched_scan_req &&
+		    rdev->sched_scan_req->dev == wdev->netdev)) {
+		__cfg80211_stop_sched_scan(rdev, false);
+	}
+
 	cfg80211_unlock_rdev(rdev);
 
 	mutex_lock(&rdev->devlist_mtx);
@@ -759,6 +765,10 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			break;
 		case NL80211_IFTYPE_P2P_CLIENT:
 		case NL80211_IFTYPE_STATION:
+			cfg80211_lock_rdev(rdev);
+			__cfg80211_stop_sched_scan(rdev, false);
+			cfg80211_unlock_rdev(rdev);
+
 			wdev_lock(wdev);
 #ifdef CONFIG_CFG80211_WEXT
 			kfree(wdev->wext.ie);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 7a18c10a7fb6..e3f7b1d995cd 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -60,8 +60,11 @@ struct cfg80211_registered_device {
 	struct rb_root bss_tree;
 	u32 bss_generation;
 	struct cfg80211_scan_request *scan_req; /* protected by RTNL */
+	struct cfg80211_sched_scan_request *sched_scan_req;
 	unsigned long suspend_at;
 	struct work_struct scan_done_wk;
+	struct work_struct sched_scan_results_wk;
+	struct work_struct sched_scan_stopped_wk;
 
 #ifdef CONFIG_NL80211_TESTMODE
 	struct genl_info *testmode_info;
@@ -411,6 +414,10 @@ void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len);
 void cfg80211_sme_disassoc(struct net_device *dev, int idx);
 void __cfg80211_scan_done(struct work_struct *wk);
 void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak);
+void __cfg80211_sched_scan_results(struct work_struct *wk);
+int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
+			       bool driver_initiated);
+void __cfg80211_sched_scan_stopped(struct work_struct *wk);
 void cfg80211_upload_connect_keys(struct wireless_dev *wdev);
 int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev, enum nl80211_iftype ntype,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 95dd5832e719..4fac370284c0 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -761,6 +761,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	}
 	CMD(set_channel, SET_CHANNEL);
 	CMD(set_wds_peer, SET_WDS_PEER);
+	if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
+		CMD(sched_scan_start, START_SCHED_SCAN);
 
 #undef CMD
 
@@ -3357,6 +3359,179 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
+static int nl80211_start_sched_scan(struct sk_buff *skb,
+				    struct genl_info *info)
+{
+	struct cfg80211_sched_scan_request *request;
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct cfg80211_ssid *ssid;
+	struct ieee80211_channel *channel;
+	struct nlattr *attr;
+	struct wiphy *wiphy;
+	int err, tmp, n_ssids = 0, n_channels, i;
+	enum ieee80211_band band;
+	size_t ie_len;
+
+	if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) ||
+	    !rdev->ops->sched_scan_start)
+		return -EOPNOTSUPP;
+
+	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
+		return -EINVAL;
+
+	if (rdev->sched_scan_req)
+		return -EINPROGRESS;
+
+	wiphy = &rdev->wiphy;
+
+	if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
+		n_channels = validate_scan_freqs(
+				info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]);
+		if (!n_channels)
+			return -EINVAL;
+	} else {
+		n_channels = 0;
+
+		for (band = 0; band < IEEE80211_NUM_BANDS; band++)
+			if (wiphy->bands[band])
+				n_channels += wiphy->bands[band]->n_channels;
+	}
+
+	if (info->attrs[NL80211_ATTR_SCAN_SSIDS])
+		nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS],
+				    tmp)
+			n_ssids++;
+
+	if (n_ssids > wiphy->max_scan_ssids)
+		return -EINVAL;
+
+	if (info->attrs[NL80211_ATTR_IE])
+		ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+	else
+		ie_len = 0;
+
+	if (ie_len > wiphy->max_scan_ie_len)
+		return -EINVAL;
+
+	request = kzalloc(sizeof(*request)
+			+ sizeof(*ssid) * n_ssids
+			+ sizeof(channel) * n_channels
+			+ ie_len, GFP_KERNEL);
+	if (!request)
+		return -ENOMEM;
+
+	if (n_ssids)
+		request->ssids = (void *)&request->channels[n_channels];
+	request->n_ssids = n_ssids;
+	if (ie_len) {
+		if (request->ssids)
+			request->ie = (void *)(request->ssids + n_ssids);
+		else
+			request->ie = (void *)(request->channels + n_channels);
+	}
+
+	i = 0;
+	if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
+		/* user specified, bail out if channel not found */
+		nla_for_each_nested(attr,
+				    info->attrs[NL80211_ATTR_SCAN_FREQUENCIES],
+				    tmp) {
+			struct ieee80211_channel *chan;
+
+			chan = ieee80211_get_channel(wiphy, nla_get_u32(attr));
+
+			if (!chan) {
+				err = -EINVAL;
+				goto out_free;
+			}
+
+			/* ignore disabled channels */
+			if (chan->flags & IEEE80211_CHAN_DISABLED)
+				continue;
+
+			request->channels[i] = chan;
+			i++;
+		}
+	} else {
+		/* all channels */
+		for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+			int j;
+			if (!wiphy->bands[band])
+				continue;
+			for (j = 0; j < wiphy->bands[band]->n_channels; j++) {
+				struct ieee80211_channel *chan;
+
+				chan = &wiphy->bands[band]->channels[j];
+
+				if (chan->flags & IEEE80211_CHAN_DISABLED)
+					continue;
+
+				request->channels[i] = chan;
+				i++;
+			}
+		}
+	}
+
+	if (!i) {
+		err = -EINVAL;
+		goto out_free;
+	}
+
+	request->n_channels = i;
+
+	i = 0;
+	if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) {
+		nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS],
+				    tmp) {
+			if (request->ssids[i].ssid_len >
+			    IEEE80211_MAX_SSID_LEN) {
+				err = -EINVAL;
+				goto out_free;
+			}
+			memcpy(request->ssids[i].ssid, nla_data(attr),
+			       nla_len(attr));
+			request->ssids[i].ssid_len = nla_len(attr);
+			i++;
+		}
+	}
+
+	if (info->attrs[NL80211_ATTR_IE]) {
+		request->ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+		memcpy((void *)request->ie,
+		       nla_data(info->attrs[NL80211_ATTR_IE]),
+		       request->ie_len);
+	}
+
+	request->dev = dev;
+	request->wiphy = &rdev->wiphy;
+
+	err = rdev->ops->sched_scan_start(&rdev->wiphy, dev, request);
+	if (!err) {
+		rdev->sched_scan_req = request;
+		nl80211_send_sched_scan(rdev, dev,
+					NL80211_CMD_START_SCHED_SCAN);
+		goto out;
+	}
+
+out_free:
+	kfree(request);
+out:
+	return err;
+}
+
+static int nl80211_stop_sched_scan(struct sk_buff *skb,
+				   struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+
+	if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) ||
+	    !rdev->ops->sched_scan_stop)
+		return -EOPNOTSUPP;
+
+	return __cfg80211_stop_sched_scan(rdev, false);
+}
+
 static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 			    struct cfg80211_registered_device *rdev,
 			    struct wireless_dev *wdev,
@@ -5326,6 +5501,22 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.dumpit = nl80211_dump_scan,
 	},
+	{
+		.cmd = NL80211_CMD_START_SCHED_SCAN,
+		.doit = nl80211_start_sched_scan,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_STOP_SCHED_SCAN,
+		.doit = nl80211_stop_sched_scan,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 	{
 		.cmd = NL80211_CMD_AUTHENTICATE,
 		.doit = nl80211_authenticate,
@@ -5652,6 +5843,28 @@ static int nl80211_send_scan_msg(struct sk_buff *msg,
 	return -EMSGSIZE;
 }
 
+static int
+nl80211_send_sched_scan_msg(struct sk_buff *msg,
+			    struct cfg80211_registered_device *rdev,
+			    struct net_device *netdev,
+			    u32 pid, u32 seq, int flags, u32 cmd)
+{
+	void *hdr;
+
+	hdr = nl80211hdr_put(msg, pid, seq, flags, cmd);
+	if (!hdr)
+		return -1;
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+
+	return genlmsg_end(msg, hdr);
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
 void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
 			     struct net_device *netdev)
 {
@@ -5709,6 +5922,43 @@ void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
 				nl80211_scan_mcgrp.id, GFP_KERNEL);
 }
 
+void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev,
+				     struct net_device *netdev)
+{
+	struct sk_buff *msg;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	if (nl80211_send_sched_scan_msg(msg, rdev, netdev, 0, 0, 0,
+					NL80211_CMD_SCHED_SCAN_RESULTS) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_scan_mcgrp.id, GFP_KERNEL);
+}
+
+void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
+			     struct net_device *netdev, u32 cmd)
+{
+	struct sk_buff *msg;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	if (nl80211_send_sched_scan_msg(msg, rdev, netdev, 0, 0, 0, cmd) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_scan_mcgrp.id, GFP_KERNEL);
+}
+
 /*
  * This can happen on global regulatory changes or device specific settings
  * based on custom world regulatory domains.
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index f2af6955a665..2f1bfb87a651 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -12,6 +12,10 @@ void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
 			    struct net_device *netdev);
 void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
 			       struct net_device *netdev);
+void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
+			     struct net_device *netdev, u32 cmd);
+void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev,
+				     struct net_device *netdev);
 void nl80211_send_reg_change_event(struct regulatory_request *request);
 void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
 			  struct net_device *netdev,
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 62e542a2b192..65dfae3b9d41 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -93,6 +93,76 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
 }
 EXPORT_SYMBOL(cfg80211_scan_done);
 
+void __cfg80211_sched_scan_results(struct work_struct *wk)
+{
+	struct cfg80211_registered_device *rdev;
+
+	rdev = container_of(wk, struct cfg80211_registered_device,
+			    sched_scan_results_wk);
+
+	cfg80211_lock_rdev(rdev);
+
+	/* we don't have sched_scan_req anymore if the scan is stopping */
+	if (rdev->sched_scan_req)
+		nl80211_send_sched_scan_results(rdev,
+						rdev->sched_scan_req->dev);
+
+	cfg80211_unlock_rdev(rdev);
+}
+
+void cfg80211_sched_scan_results(struct wiphy *wiphy)
+{
+	/* ignore if we're not scanning */
+	if (wiphy_to_dev(wiphy)->sched_scan_req)
+		queue_work(cfg80211_wq,
+			   &wiphy_to_dev(wiphy)->sched_scan_results_wk);
+}
+EXPORT_SYMBOL(cfg80211_sched_scan_results);
+
+void __cfg80211_sched_scan_stopped(struct work_struct *wk)
+{
+	struct cfg80211_registered_device *rdev;
+
+	rdev = container_of(wk, struct cfg80211_registered_device,
+			    sched_scan_stopped_wk);
+
+	cfg80211_lock_rdev(rdev);
+	__cfg80211_stop_sched_scan(rdev, true);
+	cfg80211_unlock_rdev(rdev);
+}
+
+void cfg80211_sched_scan_stopped(struct wiphy *wiphy)
+{
+	queue_work(cfg80211_wq, &wiphy_to_dev(wiphy)->sched_scan_stopped_wk);
+}
+EXPORT_SYMBOL(cfg80211_sched_scan_stopped);
+
+int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
+			       bool driver_initiated)
+{
+	int err;
+	struct net_device *dev;
+
+	ASSERT_RDEV_LOCK(rdev);
+
+	if (!rdev->sched_scan_req)
+		return 0;
+
+	dev = rdev->sched_scan_req->dev;
+
+	err = rdev->ops->sched_scan_stop(&rdev->wiphy, dev,
+					 driver_initiated);
+	if (err)
+		return err;
+
+	nl80211_send_sched_scan(rdev, dev, NL80211_CMD_SCHED_SCAN_STOPPED);
+
+	kfree(rdev->sched_scan_req);
+	rdev->sched_scan_req = NULL;
+
+	return err;
+}
+
 static void bss_release(struct kref *ref)
 {
 	struct cfg80211_internal_bss *bss;
-- 
cgit v1.2.3-71-gd317


From bbe6ad6dcb1eb26bd12ec85320f402721c3383ae Mon Sep 17 00:00:00 2001
From: Luciano Coelho <coelho@ti.com>
Date: Wed, 11 May 2011 17:09:37 +0300
Subject: cfg80211/nl80211: add interval attribute for scheduled scans

Introduce NL80211_ATTR_SCHED_SCAN_INTERVAL as a required attribute for
NL80211_CMD_START_SCHED_SCAN.  This value informs the driver at which
intervals the scheduled scan cycles should be executed.

Signed-off-by: Luciano Coelho <coelho@ti.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 25 ++++++++++++++++---------
 include/net/cfg80211.h  |  2 ++
 net/wireless/nl80211.c  | 10 ++++++++++
 3 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index f8b5595ba4af..281a2bb6a6ec 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -203,15 +203,17 @@
  * @NL80211_CMD_SCAN_ABORTED: scan was aborted, for unspecified reasons,
  *	partial scan results may be available
  *
- * @NL80211_CMD_START_SCHED_SCAN: start a scheduled scan.  Like with normal
- *	scans, if SSIDs (%NL80211_ATTR_SCAN_SSIDS) are passed, they are used
- *	in the probe requests.  For broadcast, a broadcast SSID must be
- *	passed (ie. an empty string).  If no SSID is passed, no probe
- *	requests are sent and a passive scan is performed.
- *	%NL80211_ATTR_SCAN_FREQUENCIES, if passed, define which channels
- *	should be scanned; if not passed, all channels allowed for the
- *	current regulatory domain are used.  Extra IEs can also be passed
- *	from the userspace by using the %NL80211_ATTR_IE attribute.
+ * @NL80211_CMD_START_SCHED_SCAN: start a scheduled scan at certain
+ *	intervals, as specified by %NL80211_ATTR_SCHED_SCAN_INTERVAL.
+ *	Like with normal scans, if SSIDs (%NL80211_ATTR_SCAN_SSIDS)
+ *	are passed, they are used in the probe requests.  For
+ *	broadcast, a broadcast SSID must be passed (ie. an empty
+ *	string).  If no SSID is passed, no probe requests are sent and
+ *	a passive scan is performed.  %NL80211_ATTR_SCAN_FREQUENCIES,
+ *	if passed, define which channels should be scanned; if not
+ *	passed, all channels allowed for the current regulatory domain
+ *	are used.  Extra IEs can also be passed from the userspace by
+ *	using the %NL80211_ATTR_IE attribute.
  * @NL80211_CMD_STOP_SCHED_SCAN: stop a scheduled scan
  * @NL80211_CMD_SCHED_SCAN_RESULTS: indicates that there are scheduled scan
  *	results available.
@@ -948,6 +950,9 @@ enum nl80211_commands {
  *	indicate which WoW triggers should be enabled. This is also
  *	used by %NL80211_CMD_GET_WOWLAN to get the currently enabled WoWLAN
  *	triggers.
+
+ * @NL80211_ATTR_SCHED_SCAN_INTERVAL: Interval between scheduled scan
+ *	cycles, in msecs.
  *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -1142,6 +1147,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_WOWLAN_TRIGGERS,
 	NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED,
 
+	NL80211_ATTR_SCHED_SCAN_INTERVAL,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e214c85b74d2..1f1e221b6ce3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -829,6 +829,7 @@ struct cfg80211_scan_request {
  * @ssids: SSIDs to scan for (passed in the probe_reqs in active scans)
  * @n_ssids: number of SSIDs
  * @n_channels: total number of channels to scan
+ * @interval: interval between each scheduled scan cycle
  * @ie: optional information element(s) to add into Probe Request or %NULL
  * @ie_len: length of ie in octets
  * @wiphy: the wiphy this was for
@@ -839,6 +840,7 @@ struct cfg80211_sched_scan_request {
 	struct cfg80211_ssid *ssids;
 	int n_ssids;
 	u32 n_channels;
+	u32 interval;
 	const u8 *ie;
 	size_t ie_len;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4fac370284c0..b5b050b62f2a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -175,6 +175,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED },
 	[NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED },
 	[NL80211_ATTR_STA_PLINK_STATE] = { .type = NLA_U8 },
+	[NL80211_ATTR_SCHED_SCAN_INTERVAL] = { .type = NLA_U32 },
 };
 
 /* policy for the key attributes */
@@ -3370,6 +3371,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
 	struct nlattr *attr;
 	struct wiphy *wiphy;
 	int err, tmp, n_ssids = 0, n_channels, i;
+	u32 interval;
 	enum ieee80211_band band;
 	size_t ie_len;
 
@@ -3383,6 +3385,13 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
 	if (rdev->sched_scan_req)
 		return -EINPROGRESS;
 
+	if (!info->attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL])
+		return -EINVAL;
+
+	interval = nla_get_u32(info->attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]);
+	if (interval == 0)
+		return -EINVAL;
+
 	wiphy = &rdev->wiphy;
 
 	if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
@@ -3505,6 +3514,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
 
 	request->dev = dev;
 	request->wiphy = &rdev->wiphy;
+	request->interval = interval;
 
 	err = rdev->ops->sched_scan_start(&rdev->wiphy, dev, request);
 	if (!err) {
-- 
cgit v1.2.3-71-gd317


From 2e711c04dbbf7a7732a3f7073b1fc285d12b369d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 26 Apr 2011 19:15:07 +0200
Subject: PM: Remove sysdev suspend, resume and shutdown operations

Since suspend, resume and shutdown operations in struct sysdev_class
and struct sysdev_driver are not used any more, remove them.  Also
drop sysdev_suspend(), sysdev_resume() and sysdev_shutdown() used
for executing those operations and modify all of their users
accordingly.  This reduces kernel code size quite a bit and reduces
its complexity.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/sh/Kconfig          |   1 -
 arch/x86/Kconfig         |   1 -
 arch/x86/kernel/apm_32.c |   4 -
 drivers/base/Kconfig     |   7 --
 drivers/base/base.h      |   2 -
 drivers/base/sys.c       | 202 +----------------------------------------------
 drivers/xen/manage.c     |   8 +-
 include/linux/device.h   |   7 --
 include/linux/pm.h       |   8 --
 include/linux/sysdev.h   |  11 ---
 kernel/kexec.c           |   9 +--
 kernel/power/hibernate.c |  18 +----
 kernel/power/suspend.c   |   8 +-
 kernel/sys.c             |   3 -
 14 files changed, 7 insertions(+), 282 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 4b89da248d17..bc439de48cd1 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -24,7 +24,6 @@ config SUPERH
 	select RTC_LIB
 	select GENERIC_ATOMIC64
 	select GENERIC_IRQ_SHOW
-	select ARCH_NO_SYSDEV_OPS
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc6c53a95bfd..140e254fe546 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -71,7 +71,6 @@ config X86
 	select GENERIC_IRQ_SHOW
 	select IRQ_FORCED_THREADING
 	select USE_GENERIC_SMP_HELPERS if SMP
-	select ARCH_NO_SYSDEV_OPS
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS)
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index adee12e0da1f..3bfa02235965 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1238,7 +1238,6 @@ static int suspend(int vetoable)
 	dpm_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
-	sysdev_suspend(PMSG_SUSPEND);
 	syscore_suspend();
 
 	local_irq_enable();
@@ -1258,7 +1257,6 @@ static int suspend(int vetoable)
 	err = (err == APM_SUCCESS) ? 0 : -EIO;
 
 	syscore_resume();
-	sysdev_resume();
 	local_irq_enable();
 
 	dpm_resume_noirq(PMSG_RESUME);
@@ -1282,7 +1280,6 @@ static void standby(void)
 	dpm_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
-	sysdev_suspend(PMSG_SUSPEND);
 	syscore_suspend();
 	local_irq_enable();
 
@@ -1292,7 +1289,6 @@ static void standby(void)
 
 	local_irq_disable();
 	syscore_resume();
-	sysdev_resume();
 	local_irq_enable();
 
 	dpm_resume_noirq(PMSG_RESUME);
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index e9e5238f3106..d57e8d0fb823 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -168,11 +168,4 @@ config SYS_HYPERVISOR
 	bool
 	default n
 
-config ARCH_NO_SYSDEV_OPS
-	bool
-	---help---
-	  To be selected by architectures that don't use sysdev class or
-	  sysdev driver power management (suspend/resume) and shutdown
-	  operations.
-
 endmenu
diff --git a/drivers/base/base.h b/drivers/base/base.h
index 19f49e41ce5d..a34dca0ad041 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -111,8 +111,6 @@ static inline int driver_match_device(struct device_driver *drv,
 	return drv->bus->match ? drv->bus->match(dev, drv) : 1;
 }
 
-extern void sysdev_shutdown(void);
-
 extern char *make_class_name(const char *name, struct kobject *kobj);
 
 extern int devres_release_all(struct device *dev);
diff --git a/drivers/base/sys.c b/drivers/base/sys.c
index acde9b5ee131..9dff77bfe1e3 100644
--- a/drivers/base/sys.c
+++ b/drivers/base/sys.c
@@ -328,203 +328,8 @@ void sysdev_unregister(struct sys_device *sysdev)
 	kobject_put(&sysdev->kobj);
 }
 
-
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-/**
- *	sysdev_shutdown - Shut down all system devices.
- *
- *	Loop over each class of system devices, and the devices in each
- *	of those classes. For each device, we call the shutdown method for
- *	each driver registered for the device - the auxiliaries,
- *	and the class driver.
- *
- *	Note: The list is iterated in reverse order, so that we shut down
- *	child devices before we shut down their parents. The list ordering
- *	is guaranteed by virtue of the fact that child devices are registered
- *	after their parents.
- */
-void sysdev_shutdown(void)
-{
-	struct sysdev_class *cls;
-
-	pr_debug("Shutting Down System Devices\n");
-
-	mutex_lock(&sysdev_drivers_lock);
-	list_for_each_entry_reverse(cls, &system_kset->list, kset.kobj.entry) {
-		struct sys_device *sysdev;
-
-		pr_debug("Shutting down type '%s':\n",
-			 kobject_name(&cls->kset.kobj));
-
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
-			struct sysdev_driver *drv;
-			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
-
-			/* Call auxiliary drivers first */
-			list_for_each_entry(drv, &cls->drivers, entry) {
-				if (drv->shutdown)
-					drv->shutdown(sysdev);
-			}
-
-			/* Now call the generic one */
-			if (cls->shutdown)
-				cls->shutdown(sysdev);
-		}
-	}
-	mutex_unlock(&sysdev_drivers_lock);
-}
-
-static void __sysdev_resume(struct sys_device *dev)
-{
-	struct sysdev_class *cls = dev->cls;
-	struct sysdev_driver *drv;
-
-	/* First, call the class-specific one */
-	if (cls->resume)
-		cls->resume(dev);
-	WARN_ONCE(!irqs_disabled(),
-		"Interrupts enabled after %pF\n", cls->resume);
-
-	/* Call auxiliary drivers next. */
-	list_for_each_entry(drv, &cls->drivers, entry) {
-		if (drv->resume)
-			drv->resume(dev);
-		WARN_ONCE(!irqs_disabled(),
-			"Interrupts enabled after %pF\n", drv->resume);
-	}
-}
-
-/**
- *	sysdev_suspend - Suspend all system devices.
- *	@state:		Power state to enter.
- *
- *	We perform an almost identical operation as sysdev_shutdown()
- *	above, though calling ->suspend() instead. Interrupts are disabled
- *	when this called. Devices are responsible for both saving state and
- *	quiescing or powering down the device.
- *
- *	This is only called by the device PM core, so we let them handle
- *	all synchronization.
- */
-int sysdev_suspend(pm_message_t state)
-{
-	struct sysdev_class *cls;
-	struct sys_device *sysdev, *err_dev;
-	struct sysdev_driver *drv, *err_drv;
-	int ret;
-
-	pr_debug("Checking wake-up interrupts\n");
-
-	/* Return error code if there are any wake-up interrupts pending */
-	ret = check_wakeup_irqs();
-	if (ret)
-		return ret;
-
-	WARN_ONCE(!irqs_disabled(),
-		"Interrupts enabled while suspending system devices\n");
-
-	pr_debug("Suspending System Devices\n");
-
-	list_for_each_entry_reverse(cls, &system_kset->list, kset.kobj.entry) {
-		pr_debug("Suspending type '%s':\n",
-			 kobject_name(&cls->kset.kobj));
-
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
-			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
-
-			/* Call auxiliary drivers first */
-			list_for_each_entry(drv, &cls->drivers, entry) {
-				if (drv->suspend) {
-					ret = drv->suspend(sysdev, state);
-					if (ret)
-						goto aux_driver;
-				}
-				WARN_ONCE(!irqs_disabled(),
-					"Interrupts enabled after %pF\n",
-					drv->suspend);
-			}
-
-			/* Now call the generic one */
-			if (cls->suspend) {
-				ret = cls->suspend(sysdev, state);
-				if (ret)
-					goto cls_driver;
-				WARN_ONCE(!irqs_disabled(),
-					"Interrupts enabled after %pF\n",
-					cls->suspend);
-			}
-		}
-	}
-	return 0;
-	/* resume current sysdev */
-cls_driver:
-	drv = NULL;
-	printk(KERN_ERR "Class suspend failed for %s: %d\n",
-		kobject_name(&sysdev->kobj), ret);
-
-aux_driver:
-	if (drv)
-		printk(KERN_ERR "Class driver suspend failed for %s: %d\n",
-				kobject_name(&sysdev->kobj), ret);
-	list_for_each_entry(err_drv, &cls->drivers, entry) {
-		if (err_drv == drv)
-			break;
-		if (err_drv->resume)
-			err_drv->resume(sysdev);
-	}
-
-	/* resume other sysdevs in current class */
-	list_for_each_entry(err_dev, &cls->kset.list, kobj.entry) {
-		if (err_dev == sysdev)
-			break;
-		pr_debug(" %s\n", kobject_name(&err_dev->kobj));
-		__sysdev_resume(err_dev);
-	}
-
-	/* resume other classes */
-	list_for_each_entry_continue(cls, &system_kset->list, kset.kobj.entry) {
-		list_for_each_entry(err_dev, &cls->kset.list, kobj.entry) {
-			pr_debug(" %s\n", kobject_name(&err_dev->kobj));
-			__sysdev_resume(err_dev);
-		}
-	}
-	return ret;
-}
-EXPORT_SYMBOL_GPL(sysdev_suspend);
-
-/**
- *	sysdev_resume - Bring system devices back to life.
- *
- *	Similar to sysdev_suspend(), but we iterate the list forwards
- *	to guarantee that parent devices are resumed before their children.
- *
- *	Note: Interrupts are disabled when called.
- */
-int sysdev_resume(void)
-{
-	struct sysdev_class *cls;
-
-	WARN_ONCE(!irqs_disabled(),
-		"Interrupts enabled while resuming system devices\n");
-
-	pr_debug("Resuming System Devices\n");
-
-	list_for_each_entry(cls, &system_kset->list, kset.kobj.entry) {
-		struct sys_device *sysdev;
-
-		pr_debug("Resuming type '%s':\n",
-			 kobject_name(&cls->kset.kobj));
-
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
-			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
-
-			__sysdev_resume(sysdev);
-		}
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(sysdev_resume);
-#endif /* CONFIG_ARCH_NO_SYSDEV_OPS */
+EXPORT_SYMBOL_GPL(sysdev_register);
+EXPORT_SYMBOL_GPL(sysdev_unregister);
 
 int __init system_bus_init(void)
 {
@@ -534,9 +339,6 @@ int __init system_bus_init(void)
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(sysdev_register);
-EXPORT_SYMBOL_GPL(sysdev_unregister);
-
 #define to_ext_attr(x) container_of(x, struct sysdev_ext_attribute, attr)
 
 ssize_t sysdev_store_ulong(struct sys_device *sysdev,
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index a2eee574784e..0b5366b5be20 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -70,12 +70,7 @@ static int xen_suspend(void *data)
 
 	BUG_ON(!irqs_disabled());
 
-	err = sysdev_suspend(PMSG_FREEZE);
-	if (!err) {
-		err = syscore_suspend();
-		if (err)
-			sysdev_resume();
-	}
+	err = syscore_suspend();
 	if (err) {
 		printk(KERN_ERR "xen_suspend: system core suspend failed: %d\n",
 			err);
@@ -102,7 +97,6 @@ static int xen_suspend(void *data)
 	}
 
 	syscore_resume();
-	sysdev_resume();
 
 	return 0;
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index ab8dfc095709..ea9db9b0f248 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -633,13 +633,6 @@ static inline int devtmpfs_mount(const char *mountpoint) { return 0; }
 /* drivers/base/power/shutdown.c */
 extern void device_shutdown(void);
 
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-/* drivers/base/sys.c */
-extern void sysdev_shutdown(void);
-#else
-static inline void sysdev_shutdown(void) { }
-#endif
-
 /* debugging and troubleshooting/diagnostic helpers. */
 extern const char *dev_driver_string(const struct device *dev);
 
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 512e09177e57..3c053e2beb84 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -529,14 +529,6 @@ struct dev_power_domain {
  */
 
 #ifdef CONFIG_PM_SLEEP
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-extern int sysdev_suspend(pm_message_t state);
-extern int sysdev_resume(void);
-#else
-static inline int sysdev_suspend(pm_message_t state) { return 0; }
-static inline int sysdev_resume(void) { return 0; }
-#endif
-
 extern void device_pm_lock(void);
 extern void dpm_resume_noirq(pm_message_t state);
 extern void dpm_resume_end(pm_message_t state);
diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h
index dfb078db8ebb..d35e783a598c 100644
--- a/include/linux/sysdev.h
+++ b/include/linux/sysdev.h
@@ -34,12 +34,6 @@ struct sysdev_class {
 	struct list_head	drivers;
 	struct sysdev_class_attribute **attrs;
 	struct kset		kset;
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-	/* Default operations for these types of devices */
-	int	(*shutdown)(struct sys_device *);
-	int	(*suspend)(struct sys_device *, pm_message_t state);
-	int	(*resume)(struct sys_device *);
-#endif
 };
 
 struct sysdev_class_attribute {
@@ -77,11 +71,6 @@ struct sysdev_driver {
 	struct list_head	entry;
 	int	(*add)(struct sys_device *);
 	int	(*remove)(struct sys_device *);
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-	int	(*shutdown)(struct sys_device *);
-	int	(*suspend)(struct sys_device *, pm_message_t state);
-	int	(*resume)(struct sys_device *);
-#endif
 };
 
 
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 87b77de03dd3..8d814cbc8109 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1531,13 +1531,7 @@ int kernel_kexec(void)
 		if (error)
 			goto Enable_cpus;
 		local_irq_disable();
-		/* Suspend system devices */
-		error = sysdev_suspend(PMSG_FREEZE);
-		if (!error) {
-			error = syscore_suspend();
-			if (error)
-				sysdev_resume();
-		}
+		error = syscore_suspend();
 		if (error)
 			goto Enable_irqs;
 	} else
@@ -1553,7 +1547,6 @@ int kernel_kexec(void)
 #ifdef CONFIG_KEXEC_JUMP
 	if (kexec_image->preserve_context) {
 		syscore_resume();
-		sysdev_resume();
  Enable_irqs:
 		local_irq_enable();
  Enable_cpus:
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 50aae660174d..554d3b049f35 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -272,12 +272,7 @@ static int create_image(int platform_mode)
 
 	local_irq_disable();
 
-	error = sysdev_suspend(PMSG_FREEZE);
-	if (!error) {
-		error = syscore_suspend();
-		if (error)
-			sysdev_resume();
-	}
+	error = syscore_suspend();
 	if (error) {
 		printk(KERN_ERR "PM: Some system devices failed to power down, "
 			"aborting hibernation\n");
@@ -302,7 +297,6 @@ static int create_image(int platform_mode)
 
  Power_up:
 	syscore_resume();
-	sysdev_resume();
 	/* NOTE:  dpm_resume_noirq() is just a resume() for devices
 	 * that suspended with irqs off ... no overall powerup.
 	 */
@@ -409,12 +403,7 @@ static int resume_target_kernel(bool platform_mode)
 
 	local_irq_disable();
 
-	error = sysdev_suspend(PMSG_QUIESCE);
-	if (!error) {
-		error = syscore_suspend();
-		if (error)
-			sysdev_resume();
-	}
+	error = syscore_suspend();
 	if (error)
 		goto Enable_irqs;
 
@@ -442,7 +431,6 @@ static int resume_target_kernel(bool platform_mode)
 	touch_softlockup_watchdog();
 
 	syscore_resume();
-	sysdev_resume();
 
  Enable_irqs:
 	local_irq_enable();
@@ -528,7 +516,6 @@ int hibernation_platform_enter(void)
 		goto Platform_finish;
 
 	local_irq_disable();
-	sysdev_suspend(PMSG_HIBERNATE);
 	syscore_suspend();
 	if (pm_wakeup_pending()) {
 		error = -EAGAIN;
@@ -541,7 +528,6 @@ int hibernation_platform_enter(void)
 
  Power_up:
 	syscore_resume();
-	sysdev_resume();
 	local_irq_enable();
 	enable_nonboot_cpus();
 
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 8935369d503a..732d77a957e7 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -163,19 +163,13 @@ static int suspend_enter(suspend_state_t state)
 	arch_suspend_disable_irqs();
 	BUG_ON(!irqs_disabled());
 
-	error = sysdev_suspend(PMSG_SUSPEND);
-	if (!error) {
-		error = syscore_suspend();
-		if (error)
-			sysdev_resume();
-	}
+	error = syscore_suspend();
 	if (!error) {
 		if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) {
 			error = suspend_ops->enter(state);
 			events_check_enabled = false;
 		}
 		syscore_resume();
-		sysdev_resume();
 	}
 
 	arch_suspend_enable_irqs();
diff --git a/kernel/sys.c b/kernel/sys.c
index af468edf096a..f0c10385f30c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -315,7 +315,6 @@ void kernel_restart_prepare(char *cmd)
 	blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
 	system_state = SYSTEM_RESTART;
 	device_shutdown();
-	sysdev_shutdown();
 	syscore_shutdown();
 }
 
@@ -354,7 +353,6 @@ static void kernel_shutdown_prepare(enum system_states state)
 void kernel_halt(void)
 {
 	kernel_shutdown_prepare(SYSTEM_HALT);
-	sysdev_shutdown();
 	syscore_shutdown();
 	printk(KERN_EMERG "System halted.\n");
 	kmsg_dump(KMSG_DUMP_HALT);
@@ -374,7 +372,6 @@ void kernel_power_off(void)
 	if (pm_power_off_prepare)
 		pm_power_off_prepare();
 	disable_nonboot_cpus();
-	sysdev_shutdown();
 	syscore_shutdown();
 	printk(KERN_EMERG "Power down.\n");
 	kmsg_dump(KMSG_DUMP_POWEROFF);
-- 
cgit v1.2.3-71-gd317


From b48d4425b602f5f4978299474743dbea130d940d Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Tue, 19 Oct 2010 13:07:57 -0700
Subject: PCI: add ID-based ordering enable/disable support

Add support to allow drivers to enable/disable ID-based ordering.  Where
supported, ID-based ordering can significantly improve the latency of
individual requests by preventing them from queueing up behind unrelated
traffic.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c        | 53 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h      | 13 ++++++++++++
 include/linux/pci_regs.h |  2 ++
 3 files changed, 68 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 44d1c7c3876b..d0182bed7acc 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1834,6 +1834,59 @@ void pci_enable_ari(struct pci_dev *dev)
 	bridge->ari_enabled = 1;
 }
 
+/**
+ * pci_enable_ido - enable ID-based ordering on a device
+ * @dev: the PCI device
+ * @type: which types of IDO to enable
+ *
+ * Enable ID-based ordering on @dev.  @type can contain the bits
+ * %PCI_EXP_IDO_REQUEST and/or %PCI_EXP_IDO_COMPLETION to indicate
+ * which types of transactions are allowed to be re-ordered.
+ */
+void pci_enable_ido(struct pci_dev *dev, unsigned long type)
+{
+	int pos;
+	u16 ctrl;
+
+	pos = pci_pcie_cap(dev);
+	if (!pos)
+		return;
+
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+	if (type & PCI_EXP_IDO_REQUEST)
+		ctrl |= PCI_EXP_IDO_REQ_EN;
+	if (type & PCI_EXP_IDO_COMPLETION)
+		ctrl |= PCI_EXP_IDO_CMP_EN;
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+}
+EXPORT_SYMBOL(pci_enable_ido);
+
+/**
+ * pci_disable_ido - disable ID-based ordering on a device
+ * @dev: the PCI device
+ * @type: which types of IDO to disable
+ */
+void pci_disable_ido(struct pci_dev *dev, unsigned long type)
+{
+	int pos;
+	u16 ctrl;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	pos = pci_pcie_cap(dev);
+	if (!pos)
+		return;
+
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+	if (type & PCI_EXP_IDO_REQUEST)
+		ctrl &= ~PCI_EXP_IDO_REQ_EN;
+	if (type & PCI_EXP_IDO_COMPLETION)
+		ctrl &= ~PCI_EXP_IDO_CMP_EN;
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+}
+EXPORT_SYMBOL(pci_disable_ido);
+
 static int pci_acs_enable;
 
 /**
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 96f70d7e058d..551ddcb5f940 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -828,6 +828,11 @@ static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state,
 	return __pci_enable_wake(dev, state, false, enable);
 }
 
+#define PCI_EXP_IDO_REQUEST	(1<<0)
+#define PCI_EXP_IDO_COMPLETION	(1<<1)
+void pci_enable_ido(struct pci_dev *dev, unsigned long type);
+void pci_disable_ido(struct pci_dev *dev, unsigned long type);
+
 /* For use by arch with custom probe code */
 void set_pcie_port_type(struct pci_dev *pdev);
 void set_pcie_hotplug_bridge(struct pci_dev *pdev);
@@ -1207,6 +1212,14 @@ static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state,
 	return 0;
 }
 
+static inline void pci_enable_ido(struct pci_dev *dev, unsigned long type)
+{
+}
+
+static inline void pci_disable_ido(struct pci_dev *dev, unsigned long type)
+{
+}
+
 static inline int pci_request_regions(struct pci_dev *dev, const char *res_name)
 {
 	return -EIO;
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index be01380f798a..d9acf9b99814 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -510,6 +510,8 @@
 #define  PCI_EXP_DEVCAP2_ARI	0x20	/* Alternative Routing-ID */
 #define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_ARI	0x20	/* Alternative Routing-ID */
+#define  PCI_EXP_IDO_REQ_EN	0x100	/* ID-based ordering request enable */
+#define  PCI_EXP_IDO_CMP_EN	0x200	/* ID-based ordering completion enable */
 #define PCI_EXP_LNKCTL2		48	/* Link Control 2 */
 #define PCI_EXP_SLTCTL2		56	/* Slot Control 2 */
 
-- 
cgit v1.2.3-71-gd317


From 48a92a8179b3e677fac07db7bd109e68f020468c Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Mon, 10 Jan 2011 12:46:36 -0800
Subject: PCI: add OBFF enable/disable support

OBFF (optimized buffer flush/fill), where supported, can help improve
energy efficiency by giving devices information about when interrupts
and other activity will have a reduced power impact.  It requires
support from both the device and system (i.e. not only does the device
need to respond to OBFF messages, but the platform must be capable of
generating and routing them to the end point).

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c        | 92 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h      | 16 +++++++++
 include/linux/pci_regs.h |  6 ++++
 3 files changed, 114 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d0182bed7acc..01e4cab2e5cb 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1887,6 +1887,98 @@ void pci_disable_ido(struct pci_dev *dev, unsigned long type)
 }
 EXPORT_SYMBOL(pci_disable_ido);
 
+/**
+ * pci_enable_obff - enable optimized buffer flush/fill
+ * @dev: PCI device
+ * @type: type of signaling to use
+ *
+ * Try to enable @type OBFF signaling on @dev.  It will try using WAKE#
+ * signaling if possible, falling back to message signaling only if
+ * WAKE# isn't supported.  @type should indicate whether the PCIe link
+ * be brought out of L0s or L1 to send the message.  It should be either
+ * %PCI_EXP_OBFF_SIGNAL_ALWAYS or %PCI_OBFF_SIGNAL_L0.
+ *
+ * If your device can benefit from receiving all messages, even at the
+ * power cost of bringing the link back up from a low power state, use
+ * %PCI_EXP_OBFF_SIGNAL_ALWAYS.  Otherwise, use %PCI_OBFF_SIGNAL_L0 (the
+ * preferred type).
+ *
+ * RETURNS:
+ * Zero on success, appropriate error number on failure.
+ */
+int pci_enable_obff(struct pci_dev *dev, enum pci_obff_signal_type type)
+{
+	int pos;
+	u32 cap;
+	u16 ctrl;
+	int ret;
+
+	if (!pci_is_pcie(dev))
+		return -ENOTSUPP;
+
+	pos = pci_pcie_cap(dev);
+	if (!pos)
+		return -ENOTSUPP;
+
+	pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap);
+	if (!(cap & PCI_EXP_OBFF_MASK))
+		return -ENOTSUPP; /* no OBFF support at all */
+
+	/* Make sure the topology supports OBFF as well */
+	if (dev->bus) {
+		ret = pci_enable_obff(dev->bus->self, type);
+		if (ret)
+			return ret;
+	}
+
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+	if (cap & PCI_EXP_OBFF_WAKE)
+		ctrl |= PCI_EXP_OBFF_WAKE_EN;
+	else {
+		switch (type) {
+		case PCI_EXP_OBFF_SIGNAL_L0:
+			if (!(ctrl & PCI_EXP_OBFF_WAKE_EN))
+				ctrl |= PCI_EXP_OBFF_MSGA_EN;
+			break;
+		case PCI_EXP_OBFF_SIGNAL_ALWAYS:
+			ctrl &= ~PCI_EXP_OBFF_WAKE_EN;
+			ctrl |= PCI_EXP_OBFF_MSGB_EN;
+			break;
+		default:
+			WARN(1, "bad OBFF signal type\n");
+			return -ENOTSUPP;
+		}
+	}
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+
+	return 0;
+}
+EXPORT_SYMBOL(pci_enable_obff);
+
+/**
+ * pci_disable_obff - disable optimized buffer flush/fill
+ * @dev: PCI device
+ *
+ * Disable OBFF on @dev.
+ */
+void pci_disable_obff(struct pci_dev *dev)
+{
+	int pos;
+	u16 ctrl;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	pos = pci_pcie_cap(dev);
+	if (!pos)
+		return;
+
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+	ctrl &= ~PCI_EXP_OBFF_WAKE_EN;
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+}
+EXPORT_SYMBOL(pci_disable_obff);
+
 static int pci_acs_enable;
 
 /**
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 551ddcb5f940..45a035cccd93 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -833,6 +833,13 @@ static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state,
 void pci_enable_ido(struct pci_dev *dev, unsigned long type);
 void pci_disable_ido(struct pci_dev *dev, unsigned long type);
 
+enum pci_obff_signal_type {
+	PCI_EXP_OBFF_SIGNAL_L0,
+	PCI_EXP_OBFF_SIGNAL_ALWAYS,
+};
+int pci_enable_obff(struct pci_dev *dev, enum pci_obff_signal_type);
+void pci_disable_obff(struct pci_dev *dev);
+
 /* For use by arch with custom probe code */
 void set_pcie_port_type(struct pci_dev *pdev);
 void set_pcie_hotplug_bridge(struct pci_dev *pdev);
@@ -1220,6 +1227,15 @@ static inline void pci_disable_ido(struct pci_dev *dev, unsigned long type)
 {
 }
 
+static inline int pci_enable_obff(struct pci_dev *dev, unsigned long type)
+{
+	return 0;
+}
+
+static inline void pci_disable_obff(struct pci_dev *dev)
+{
+}
+
 static inline int pci_request_regions(struct pci_dev *dev, const char *res_name)
 {
 	return -EIO;
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index d9acf9b99814..aa420261843d 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -508,10 +508,16 @@
 #define PCI_EXP_RTSTA_PENDING	0x20000 /* PME pending */
 #define PCI_EXP_DEVCAP2		36	/* Device Capabilities 2 */
 #define  PCI_EXP_DEVCAP2_ARI	0x20	/* Alternative Routing-ID */
+#define  PCI_EXP_OBFF_MASK	0xc0000 /* OBFF support mechanism */
+#define  PCI_EXP_OBFF_MSG	0x40000 /* New message signaling */
+#define  PCI_EXP_OBFF_WAKE	0x80000 /* Re-use WAKE# for OBFF */
 #define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_ARI	0x20	/* Alternative Routing-ID */
 #define  PCI_EXP_IDO_REQ_EN	0x100	/* ID-based ordering request enable */
 #define  PCI_EXP_IDO_CMP_EN	0x200	/* ID-based ordering completion enable */
+#define  PCI_EXP_OBFF_MSGA_EN	0x2000	/* OBFF enable with Message type A */
+#define  PCI_EXP_OBFF_MSGB_EN	0x4000	/* OBFF enable with Message type B */
+#define  PCI_EXP_OBFF_WAKE_EN	0x6000	/* OBFF using WAKE# signaling */
 #define PCI_EXP_LNKCTL2		48	/* Link Control 2 */
 #define PCI_EXP_SLTCTL2		56	/* Slot Control 2 */
 
-- 
cgit v1.2.3-71-gd317


From 51c2e0a7e5bc7ed1384cc68cfb95e702571500c9 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Fri, 14 Jan 2011 08:53:04 -0800
Subject: PCI: add latency tolerance reporting enable/disable support

Latency tolerance reporting allows devices to send messages to the root
complex indicating their latency tolerance for snooped & unsnooped
memory transactions.  Add support for enabling & disabling this
feature, along with a routine to set the max latencies a device should
send upstream.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c        | 149 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h      |   5 ++
 include/linux/pci_regs.h |   9 +++
 3 files changed, 163 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 01e4cab2e5cb..53302cbdb94c 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1979,6 +1979,155 @@ void pci_disable_obff(struct pci_dev *dev)
 }
 EXPORT_SYMBOL(pci_disable_obff);
 
+/**
+ * pci_ltr_supported - check whether a device supports LTR
+ * @dev: PCI device
+ *
+ * RETURNS:
+ * True if @dev supports latency tolerance reporting, false otherwise.
+ */
+bool pci_ltr_supported(struct pci_dev *dev)
+{
+	int pos;
+	u32 cap;
+
+	if (!pci_is_pcie(dev))
+		return false;
+
+	pos = pci_pcie_cap(dev);
+	if (!pos)
+		return false;
+
+	pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap);
+
+	return cap & PCI_EXP_DEVCAP2_LTR;
+}
+EXPORT_SYMBOL(pci_ltr_supported);
+
+/**
+ * pci_enable_ltr - enable latency tolerance reporting
+ * @dev: PCI device
+ *
+ * Enable LTR on @dev if possible, which means enabling it first on
+ * upstream ports.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int pci_enable_ltr(struct pci_dev *dev)
+{
+	int pos;
+	u16 ctrl;
+	int ret;
+
+	if (!pci_ltr_supported(dev))
+		return -ENOTSUPP;
+
+	pos = pci_pcie_cap(dev);
+	if (!pos)
+		return -ENOTSUPP;
+
+	/* Only primary function can enable/disable LTR */
+	if (PCI_FUNC(dev->devfn) != 0)
+		return -EINVAL;
+
+	/* Enable upstream ports first */
+	if (dev->bus) {
+		ret = pci_enable_ltr(dev->bus->self);
+		if (ret)
+			return ret;
+	}
+
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+	ctrl |= PCI_EXP_LTR_EN;
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+
+	return 0;
+}
+EXPORT_SYMBOL(pci_enable_ltr);
+
+/**
+ * pci_disable_ltr - disable latency tolerance reporting
+ * @dev: PCI device
+ */
+void pci_disable_ltr(struct pci_dev *dev)
+{
+	int pos;
+	u16 ctrl;
+
+	if (!pci_ltr_supported(dev))
+		return;
+
+	pos = pci_pcie_cap(dev);
+	if (!pos)
+		return;
+
+	/* Only primary function can enable/disable LTR */
+	if (PCI_FUNC(dev->devfn) != 0)
+		return;
+
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+	ctrl &= ~PCI_EXP_LTR_EN;
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+}
+EXPORT_SYMBOL(pci_disable_ltr);
+
+static int __pci_ltr_scale(int *val)
+{
+	int scale = 0;
+
+	while (*val > 1023) {
+		*val = (*val + 31) / 32;
+		scale++;
+	}
+	return scale;
+}
+
+/**
+ * pci_set_ltr - set LTR latency values
+ * @dev: PCI device
+ * @snoop_lat_ns: snoop latency in nanoseconds
+ * @nosnoop_lat_ns: nosnoop latency in nanoseconds
+ *
+ * Figure out the scale and set the LTR values accordingly.
+ */
+int pci_set_ltr(struct pci_dev *dev, int snoop_lat_ns, int nosnoop_lat_ns)
+{
+	int pos, ret, snoop_scale, nosnoop_scale;
+	u16 val;
+
+	if (!pci_ltr_supported(dev))
+		return -ENOTSUPP;
+
+	snoop_scale = __pci_ltr_scale(&snoop_lat_ns);
+	nosnoop_scale = __pci_ltr_scale(&nosnoop_lat_ns);
+
+	if (snoop_lat_ns > PCI_LTR_VALUE_MASK ||
+	    nosnoop_lat_ns > PCI_LTR_VALUE_MASK)
+		return -EINVAL;
+
+	if ((snoop_scale > (PCI_LTR_SCALE_MASK >> PCI_LTR_SCALE_SHIFT)) ||
+	    (nosnoop_scale > (PCI_LTR_SCALE_MASK >> PCI_LTR_SCALE_SHIFT)))
+		return -EINVAL;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_LTR);
+	if (!pos)
+		return -ENOTSUPP;
+
+	val = (snoop_scale << PCI_LTR_SCALE_SHIFT) | snoop_lat_ns;
+	ret = pci_write_config_word(dev, pos + PCI_LTR_MAX_SNOOP_LAT, val);
+	if (ret != 4)
+		return -EIO;
+
+	val = (nosnoop_scale << PCI_LTR_SCALE_SHIFT) | nosnoop_lat_ns;
+	ret = pci_write_config_word(dev, pos + PCI_LTR_MAX_NOSNOOP_LAT, val);
+	if (ret != 4)
+		return -EIO;
+
+	return 0;
+}
+EXPORT_SYMBOL(pci_set_ltr);
+
 static int pci_acs_enable;
 
 /**
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 45a035cccd93..df4d69b82144 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -840,6 +840,11 @@ enum pci_obff_signal_type {
 int pci_enable_obff(struct pci_dev *dev, enum pci_obff_signal_type);
 void pci_disable_obff(struct pci_dev *dev);
 
+bool pci_ltr_supported(struct pci_dev *dev);
+int pci_enable_ltr(struct pci_dev *dev);
+void pci_disable_ltr(struct pci_dev *dev);
+int pci_set_ltr(struct pci_dev *dev, int snoop_lat_ns, int nosnoop_lat_ns);
+
 /* For use by arch with custom probe code */
 void set_pcie_port_type(struct pci_dev *pdev);
 void set_pcie_hotplug_bridge(struct pci_dev *pdev);
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index aa420261843d..e8840964aca1 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -508,6 +508,7 @@
 #define PCI_EXP_RTSTA_PENDING	0x20000 /* PME pending */
 #define PCI_EXP_DEVCAP2		36	/* Device Capabilities 2 */
 #define  PCI_EXP_DEVCAP2_ARI	0x20	/* Alternative Routing-ID */
+#define  PCI_EXP_DEVCAP2_LTR	0x800	/* Latency tolerance reporting */
 #define  PCI_EXP_OBFF_MASK	0xc0000 /* OBFF support mechanism */
 #define  PCI_EXP_OBFF_MSG	0x40000 /* New message signaling */
 #define  PCI_EXP_OBFF_WAKE	0x80000 /* Re-use WAKE# for OBFF */
@@ -515,6 +516,7 @@
 #define  PCI_EXP_DEVCTL2_ARI	0x20	/* Alternative Routing-ID */
 #define  PCI_EXP_IDO_REQ_EN	0x100	/* ID-based ordering request enable */
 #define  PCI_EXP_IDO_CMP_EN	0x200	/* ID-based ordering completion enable */
+#define  PCI_EXP_LTR_EN		0x400	/* Latency tolerance reporting */
 #define  PCI_EXP_OBFF_MSGA_EN	0x2000	/* OBFF enable with Message type A */
 #define  PCI_EXP_OBFF_MSGB_EN	0x4000	/* OBFF enable with Message type B */
 #define  PCI_EXP_OBFF_WAKE_EN	0x6000	/* OBFF using WAKE# signaling */
@@ -535,6 +537,7 @@
 #define PCI_EXT_CAP_ID_ARI	14
 #define PCI_EXT_CAP_ID_ATS	15
 #define PCI_EXT_CAP_ID_SRIOV	16
+#define PCI_EXT_CAP_ID_LTR	24
 
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
@@ -691,6 +694,12 @@
 #define  PCI_SRIOV_VFM_MO	0x2	/* Active.MigrateOut */
 #define  PCI_SRIOV_VFM_AV	0x3	/* Active.Available */
 
+#define PCI_LTR_MAX_SNOOP_LAT	0x4
+#define PCI_LTR_MAX_NOSNOOP_LAT	0x6
+#define  PCI_LTR_VALUE_MASK	0x000003ff
+#define  PCI_LTR_SCALE_MASK	0x00001c00
+#define  PCI_LTR_SCALE_SHIFT	10
+
 /* Access Control Service */
 #define PCI_ACS_CAP		0x04	/* ACS Capability Register */
 #define  PCI_ACS_SV		0x01	/* Source Validation */
-- 
cgit v1.2.3-71-gd317


From 8f389a99b652aab5b42297280bd94d95933ad12f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 11 May 2011 15:13:32 -0700
Subject: mm: use alloc_bootmem_node_nopanic() on really needed path

Stefan found nobootmem does not work on his system that has only 8M of
RAM.  This causes an early panic:

  BIOS-provided physical RAM map:
   BIOS-88: 0000000000000000 - 000000000009f000 (usable)
   BIOS-88: 0000000000100000 - 0000000000840000 (usable)
  bootconsole [earlyser0] enabled
  Notice: NX (Execute Disable) protection missing in CPU or disabled in BIOS!
  DMI not present or invalid.
  last_pfn = 0x840 max_arch_pfn = 0x100000
  init_memory_mapping: 0000000000000000-0000000000840000
  8MB LOWMEM available.
    mapped low ram: 0 - 00840000
    low ram: 0 - 00840000
  Zone PFN ranges:
    DMA      0x00000001 -> 0x00001000
    Normal   empty
  Movable zone start PFN for each node
  early_node_map[2] active PFN ranges
      0: 0x00000001 -> 0x0000009f
      0: 0x00000100 -> 0x00000840
  BUG: Int 6: CR2 (null)
       EDI c034663c  ESI (null)  EBP c0329f38  ESP c0329ef4
       EBX c0346380  EDX 00000006  ECX ffffffff  EAX fffffff4
       err (null)  EIP c0353191   CS c0320060  flg 00010082
  Stack: (null) c030c533 000007cd (null) c030c533 00000001 (null) (null)
         00000003 0000083f 00000018 00000002 00000002 c0329f6c c03534d6 (null)
         (null) 00000100 00000840 (null) c0329f64 00000001 00001000 (null)
  Pid: 0, comm: swapper Not tainted 2.6.36 #5
  Call Trace:
   [<c02e3707>] ? 0xc02e3707
   [<c035e6e5>] 0xc035e6e5
   [<c0353191>] ? 0xc0353191
   [<c03534d6>] 0xc03534d6
   [<c034f1cd>] 0xc034f1cd
   [<c034a824>] 0xc034a824
   [<c03513cb>] ? 0xc03513cb
   [<c0349432>] 0xc0349432
   [<c0349066>] 0xc0349066

It turns out that we should ignore the low limit of 16M.

Use alloc_bootmem_node_nopanic() in this case.

[akpm@linux-foundation.org: less mess]
Signed-off-by: Yinghai LU <yinghai@kernel.org>
Reported-by: Stefan Hellermann <stefan@the2masters.de>
Tested-by: Stefan Hellermann <stefan@the2masters.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: <stable@kernel.org>		[2.6.34+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 2 ++
 mm/page_alloc.c         | 7 ++++---
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index b8613e806aa9..01eca1794e14 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -111,6 +111,8 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 	__alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+#define alloc_bootmem_node_nopanic(pgdat, x) \
+	__alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages_node_nopanic(pgdat, x) \
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9f8a97b9a350..454191a25173 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3564,7 +3564,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
 
 	if (!slab_is_available()) {
 		zone->wait_table = (wait_queue_head_t *)
-			alloc_bootmem_node(pgdat, alloc_size);
+			alloc_bootmem_node_nopanic(pgdat, alloc_size);
 	} else {
 		/*
 		 * This case means that a zone whose size was 0 gets new memory
@@ -4141,7 +4141,8 @@ static void __init setup_usemap(struct pglist_data *pgdat,
 	unsigned long usemapsize = usemap_size(zonesize);
 	zone->pageblock_flags = NULL;
 	if (usemapsize)
-		zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize);
+		zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
+								   usemapsize);
 }
 #else
 static inline void setup_usemap(struct pglist_data *pgdat,
@@ -4307,7 +4308,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
 		size =  (end - start) * sizeof(struct page);
 		map = alloc_remap(pgdat->node_id, size);
 		if (!map)
-			map = alloc_bootmem_node(pgdat, size);
+			map = alloc_bootmem_node_nopanic(pgdat, size);
 		pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
 	}
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-- 
cgit v1.2.3-71-gd317


From ee85c2e1454603ebb9f8d87223ac79dcdc87fa32 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 11 May 2011 15:13:34 -0700
Subject: mm: add alloc_pages_exact_nid()

Add a alloc_pages_exact_nid() that allocates on a specific node.

The naming is quite broken, but fixing that would need a larger renaming
action.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: tweak comment]
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h |  2 ++
 mm/page_alloc.c     | 49 +++++++++++++++++++++++++++++++++++++------------
 2 files changed, 39 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index bfb8f934521e..56d8fc87fbbc 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -353,6 +353,8 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask);
 
 void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
 void free_pages_exact(void *virt, size_t size);
+/* This is different from alloc_pages_exact_node !!! */
+void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
 
 #define __get_free_page(gfp_mask) \
 		__get_free_pages((gfp_mask), 0)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 454191a25173..570d944daeb5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2317,6 +2317,21 @@ void free_pages(unsigned long addr, unsigned int order)
 
 EXPORT_SYMBOL(free_pages);
 
+static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
+{
+	if (addr) {
+		unsigned long alloc_end = addr + (PAGE_SIZE << order);
+		unsigned long used = addr + PAGE_ALIGN(size);
+
+		split_page(virt_to_page((void *)addr), order);
+		while (used < alloc_end) {
+			free_page(used);
+			used += PAGE_SIZE;
+		}
+	}
+	return (void *)addr;
+}
+
 /**
  * alloc_pages_exact - allocate an exact number physically-contiguous pages.
  * @size: the number of bytes to allocate
@@ -2336,21 +2351,31 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
 	unsigned long addr;
 
 	addr = __get_free_pages(gfp_mask, order);
-	if (addr) {
-		unsigned long alloc_end = addr + (PAGE_SIZE << order);
-		unsigned long used = addr + PAGE_ALIGN(size);
-
-		split_page(virt_to_page((void *)addr), order);
-		while (used < alloc_end) {
-			free_page(used);
-			used += PAGE_SIZE;
-		}
-	}
-
-	return (void *)addr;
+	return make_alloc_exact(addr, order, size);
 }
 EXPORT_SYMBOL(alloc_pages_exact);
 
+/**
+ * alloc_pages_exact_nid - allocate an exact number of physically-contiguous
+ *			   pages on a node.
+ * @size: the number of bytes to allocate
+ * @gfp_mask: GFP flags for the allocation
+ *
+ * Like alloc_pages_exact(), but try to allocate on node nid first before falling
+ * back.
+ * Note this is not alloc_pages_exact_node() which allocates on a specific node,
+ * but is not exact.
+ */
+void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
+{
+	unsigned order = get_order(size);
+	struct page *p = alloc_pages_node(nid, gfp_mask, order);
+	if (!p)
+		return NULL;
+	return make_alloc_exact((unsigned long)page_address(p), order, size);
+}
+EXPORT_SYMBOL(alloc_pages_exact_nid);
+
 /**
  * free_pages_exact - release memory allocated via alloc_pages_exact()
  * @virt: the value returned by alloc_pages_exact.
-- 
cgit v1.2.3-71-gd317


From a75b9df9d3bfc3cd1083974c045ae31ce5f3434f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 May 2011 18:00:51 -0400
Subject: NFSv4.1: Ensure that layoutget uses the correct gfp modes

Currently, writebacks may end up recursing back into the filesystem due to
GFP_KERNEL direct reclaims in the pnfs subsystem.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4filelayout.c    | 25 ++++++++++++++-----------
 fs/nfs/nfs4filelayout.h    |  2 +-
 fs/nfs/nfs4filelayoutdev.c | 34 +++++++++++++++++-----------------
 fs/nfs/pnfs.c              | 33 +++++++++++++++++++--------------
 fs/nfs/pnfs.h              |  6 +++---
 fs/nfs/read.c              |  4 ++--
 fs/nfs/write.c             |  4 ++--
 include/linux/nfs_xdr.h    |  1 +
 8 files changed, 59 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 7841ea603c91..be79dc9f386d 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -418,7 +418,8 @@ static int
 filelayout_check_layout(struct pnfs_layout_hdr *lo,
 			struct nfs4_filelayout_segment *fl,
 			struct nfs4_layoutget_res *lgr,
-			struct nfs4_deviceid *id)
+			struct nfs4_deviceid *id,
+			gfp_t gfp_flags)
 {
 	struct nfs4_file_layout_dsaddr *dsaddr;
 	int status = -EINVAL;
@@ -441,7 +442,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
 	/* find and reference the deviceid */
 	dsaddr = nfs4_fl_find_get_deviceid(id);
 	if (dsaddr == NULL) {
-		dsaddr = get_device_info(lo->plh_inode, id);
+		dsaddr = get_device_info(lo->plh_inode, id, gfp_flags);
 		if (dsaddr == NULL)
 			goto out;
 	}
@@ -502,7 +503,8 @@ static int
 filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 			 struct nfs4_filelayout_segment *fl,
 			 struct nfs4_layoutget_res *lgr,
-			 struct nfs4_deviceid *id)
+			 struct nfs4_deviceid *id,
+			 gfp_t gfp_flags)
 {
 	struct xdr_stream stream;
 	struct xdr_buf buf = {
@@ -518,7 +520,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 
 	dprintk("%s: set_layout_map Begin\n", __func__);
 
-	scratch = alloc_page(GFP_KERNEL);
+	scratch = alloc_page(gfp_flags);
 	if (!scratch)
 		return -ENOMEM;
 
@@ -556,13 +558,13 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 		goto out_err;
 
 	fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
-			       GFP_KERNEL);
+			       gfp_flags);
 	if (!fl->fh_array)
 		goto out_err;
 
 	for (i = 0; i < fl->num_fh; i++) {
 		/* Do we want to use a mempool here? */
-		fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
+		fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags);
 		if (!fl->fh_array[i])
 			goto out_err_free;
 
@@ -607,19 +609,20 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
 
 static struct pnfs_layout_segment *
 filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
-		      struct nfs4_layoutget_res *lgr)
+		      struct nfs4_layoutget_res *lgr,
+		      gfp_t gfp_flags)
 {
 	struct nfs4_filelayout_segment *fl;
 	int rc;
 	struct nfs4_deviceid id;
 
 	dprintk("--> %s\n", __func__);
-	fl = kzalloc(sizeof(*fl), GFP_KERNEL);
+	fl = kzalloc(sizeof(*fl), gfp_flags);
 	if (!fl)
 		return NULL;
 
-	rc = filelayout_decode_layout(layoutid, fl, lgr, &id);
-	if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) {
+	rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags);
+	if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) {
 		_filelayout_free_lseg(fl);
 		return NULL;
 	}
@@ -635,7 +638,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
 		int size = (fl->stripe_type == STRIPE_SPARSE) ?
 			fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
 
-		fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL);
+		fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags);
 		if (!fl->commit_buckets) {
 			filelayout_free_lseg(&fl->generic_hdr);
 			return NULL;
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 7c44579f5832..2b461d77b43a 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -104,6 +104,6 @@ extern struct nfs4_file_layout_dsaddr *
 nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id);
 extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
 struct nfs4_file_layout_dsaddr *
-get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
+get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
 
 #endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index de5350f2b249..db07c7af1395 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -225,11 +225,11 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
 }
 
 static struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
+nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags)
 {
 	struct nfs4_pnfs_ds *tmp_ds, *ds;
 
-	ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
+	ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
 	if (!ds)
 		goto out;
 
@@ -261,7 +261,7 @@ out:
  * Currently only support ipv4, and one multi-path address.
  */
 static struct nfs4_pnfs_ds *
-decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
+decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
 {
 	struct nfs4_pnfs_ds *ds = NULL;
 	char *buf;
@@ -303,7 +303,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
 			rlen);
 		goto out_err;
 	}
-	buf = kmalloc(rlen + 1, GFP_KERNEL);
+	buf = kmalloc(rlen + 1, gfp_flags);
 	if (!buf) {
 		dprintk("%s: Not enough memory\n", __func__);
 		goto out_err;
@@ -333,7 +333,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
 	sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
 	port = htons((tmp[0] << 8) | (tmp[1]));
 
-	ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
+	ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags);
 	dprintk("%s: Decoded address and port %s\n", __func__, buf);
 out_free:
 	kfree(buf);
@@ -343,7 +343,7 @@ out_err:
 
 /* Decode opaque device data and return the result */
 static struct nfs4_file_layout_dsaddr*
-decode_device(struct inode *ino, struct pnfs_device *pdev)
+decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
 {
 	int i;
 	u32 cnt, num;
@@ -362,7 +362,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
 	struct page *scratch;
 
 	/* set up xdr stream */
-	scratch = alloc_page(GFP_KERNEL);
+	scratch = alloc_page(gfp_flags);
 	if (!scratch)
 		goto out_err;
 
@@ -384,7 +384,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
 	}
 
 	/* read stripe indices */
-	stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL);
+	stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags);
 	if (!stripe_indices)
 		goto out_err_free_scratch;
 
@@ -423,7 +423,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
 
 	dsaddr = kzalloc(sizeof(*dsaddr) +
 			(sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
-			GFP_KERNEL);
+			gfp_flags);
 	if (!dsaddr)
 		goto out_err_free_stripe_indices;
 
@@ -452,7 +452,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
 		for (j = 0; j < mp_count; j++) {
 			if (j == 0) {
 				dsaddr->ds_list[i] = decode_and_add_ds(&stream,
-					ino);
+					ino, gfp_flags);
 				if (dsaddr->ds_list[i] == NULL)
 					goto out_err_free_deviceid;
 			} else {
@@ -503,12 +503,12 @@ out_err:
  * available devices.
  */
 static struct nfs4_file_layout_dsaddr *
-decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
+decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags)
 {
 	struct nfs4_file_layout_dsaddr *d, *new;
 	long hash;
 
-	new = decode_device(inode, dev);
+	new = decode_device(inode, dev, gfp_flags);
 	if (!new) {
 		printk(KERN_WARNING "%s: Could not decode or add device\n",
 			__func__);
@@ -537,7 +537,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
  * of available devices, and return it.
  */
 struct nfs4_file_layout_dsaddr *
-get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
+get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags)
 {
 	struct pnfs_device *pdev = NULL;
 	u32 max_resp_sz;
@@ -556,17 +556,17 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
 	dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
 		__func__, inode, max_resp_sz, max_pages);
 
-	pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
+	pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags);
 	if (pdev == NULL)
 		return NULL;
 
-	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
+	pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
 	if (pages == NULL) {
 		kfree(pdev);
 		return NULL;
 	}
 	for (i = 0; i < max_pages; i++) {
-		pages[i] = alloc_page(GFP_KERNEL);
+		pages[i] = alloc_page(gfp_flags);
 		if (!pages[i])
 			goto out_free;
 	}
@@ -587,7 +587,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
 	 * Found new device, need to decode it and then add it to the
 	 * list of known devices for this mountpoint.
 	 */
-	dsaddr = decode_and_add_device(inode, pdev);
+	dsaddr = decode_and_add_device(inode, pdev, gfp_flags);
 out_free:
 	for (i = 0; i < max_pages; i++)
 		__free_page(pages[i]);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 65455f58b109..f57f5281a520 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -467,7 +467,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
 static struct pnfs_layout_segment *
 send_layoutget(struct pnfs_layout_hdr *lo,
 	   struct nfs_open_context *ctx,
-	   u32 iomode)
+	   u32 iomode,
+	   gfp_t gfp_flags)
 {
 	struct inode *ino = lo->plh_inode;
 	struct nfs_server *server = NFS_SERVER(ino);
@@ -480,7 +481,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 	dprintk("--> %s\n", __func__);
 
 	BUG_ON(ctx == NULL);
-	lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
+	lgp = kzalloc(sizeof(*lgp), gfp_flags);
 	if (lgp == NULL)
 		return NULL;
 
@@ -488,12 +489,12 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
 	max_pages = max_resp_sz >> PAGE_SHIFT;
 
-	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
+	pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
 	if (!pages)
 		goto out_err_free;
 
 	for (i = 0; i < max_pages; i++) {
-		pages[i] = alloc_page(GFP_KERNEL);
+		pages[i] = alloc_page(gfp_flags);
 		if (!pages[i])
 			goto out_err_free;
 	}
@@ -509,6 +510,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 	lgp->args.layout.pages = pages;
 	lgp->args.layout.pglen = max_pages * PAGE_SIZE;
 	lgp->lsegpp = &lseg;
+	lgp->gfp_flags = gfp_flags;
 
 	/* Synchronously retrieve layout information from server and
 	 * store in lseg.
@@ -666,11 +668,11 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
 }
 
 static struct pnfs_layout_hdr *
-alloc_init_layout_hdr(struct inode *ino)
+alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags)
 {
 	struct pnfs_layout_hdr *lo;
 
-	lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
+	lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
 	if (!lo)
 		return NULL;
 	atomic_set(&lo->plh_refcount, 1);
@@ -682,7 +684,7 @@ alloc_init_layout_hdr(struct inode *ino)
 }
 
 static struct pnfs_layout_hdr *
-pnfs_find_alloc_layout(struct inode *ino)
+pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags)
 {
 	struct nfs_inode *nfsi = NFS_I(ino);
 	struct pnfs_layout_hdr *new = NULL;
@@ -697,7 +699,7 @@ pnfs_find_alloc_layout(struct inode *ino)
 			return nfsi->layout;
 	}
 	spin_unlock(&ino->i_lock);
-	new = alloc_init_layout_hdr(ino);
+	new = alloc_init_layout_hdr(ino, gfp_flags);
 	spin_lock(&ino->i_lock);
 
 	if (likely(nfsi->layout == NULL))	/* Won the race? */
@@ -757,7 +759,8 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
 struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino,
 		   struct nfs_open_context *ctx,
-		   enum pnfs_iomode iomode)
+		   enum pnfs_iomode iomode,
+		   gfp_t gfp_flags)
 {
 	struct nfs_inode *nfsi = NFS_I(ino);
 	struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
@@ -768,7 +771,7 @@ pnfs_update_layout(struct inode *ino,
 	if (!pnfs_enabled_sb(NFS_SERVER(ino)))
 		return NULL;
 	spin_lock(&ino->i_lock);
-	lo = pnfs_find_alloc_layout(ino);
+	lo = pnfs_find_alloc_layout(ino, gfp_flags);
 	if (lo == NULL) {
 		dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
 		goto out_unlock;
@@ -808,7 +811,7 @@ pnfs_update_layout(struct inode *ino,
 		spin_unlock(&clp->cl_lock);
 	}
 
-	lseg = send_layoutget(lo, ctx, iomode);
+	lseg = send_layoutget(lo, ctx, iomode, gfp_flags);
 	if (!lseg && first) {
 		spin_lock(&clp->cl_lock);
 		list_del_init(&lo->plh_layouts);
@@ -847,7 +850,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
 		goto out;
 	}
 	/* Inject layout blob into I/O device driver */
-	lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
+	lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
 	if (!lseg || IS_ERR(lseg)) {
 		if (!lseg)
 			status = -ENOMEM;
@@ -900,7 +903,8 @@ static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
 		/* This is first coelesce call for a series of nfs_pages */
 		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 						   prev->wb_context,
-						   IOMODE_READ);
+						   IOMODE_READ,
+						   GFP_KERNEL);
 	}
 	return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
 }
@@ -922,7 +926,8 @@ static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
 		/* This is first coelesce call for a series of nfs_pages */
 		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 						   prev->wb_context,
-						   IOMODE_RW);
+						   IOMODE_RW,
+						   GFP_NOFS);
 	}
 	return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
 }
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index bc4827202e7a..0c015bad9e7a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -70,7 +70,7 @@ struct pnfs_layoutdriver_type {
 	const u32 id;
 	const char *name;
 	struct module *owner;
-	struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
+	struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags);
 	void (*free_lseg) (struct pnfs_layout_segment *lseg);
 
 	/* test for nfs page cache coalescing */
@@ -126,7 +126,7 @@ void get_layout_hdr(struct pnfs_layout_hdr *lo);
 void put_lseg(struct pnfs_layout_segment *lseg);
 struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
-		   enum pnfs_iomode access_type);
+		   enum pnfs_iomode access_type, gfp_t gfp_flags);
 void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
 void unset_pnfs_layoutdriver(struct nfs_server *);
 enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
@@ -245,7 +245,7 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
 
 static inline struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
-		   enum pnfs_iomode access_type)
+		   enum pnfs_iomode access_type, gfp_t gfp_flags)
 {
 	return NULL;
 }
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7cded2b12a05..2bcf0dc306a1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -288,7 +288,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
 	atomic_set(&req->wb_complete, requests);
 
 	BUG_ON(desc->pg_lseg != NULL);
-	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
+	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
 	ClearPageError(page);
 	offset = 0;
 	nbytes = desc->pg_count;
@@ -351,7 +351,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
 	}
 	req = nfs_list_entry(data->pages.next);
 	if ((!lseg) && list_is_singular(&data->pages))
-		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
+		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
 
 	ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
 				0, lseg);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3bd5d7e80f6c..49c715b4ac92 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -939,7 +939,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
 	atomic_set(&req->wb_complete, requests);
 
 	BUG_ON(desc->pg_lseg);
-	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
+	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
 	ClearPageError(page);
 	offset = 0;
 	nbytes = desc->pg_count;
@@ -1013,7 +1013,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
 	}
 	req = nfs_list_entry(data->pages.next);
 	if ((!lseg) && list_is_singular(&data->pages))
-		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
+		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
 
 	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
 	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 890dce242639..7e371f7df9c4 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -233,6 +233,7 @@ struct nfs4_layoutget {
 	struct nfs4_layoutget_args args;
 	struct nfs4_layoutget_res res;
 	struct pnfs_layout_segment **lsegpp;
+	gfp_t gfp_flags;
 };
 
 struct nfs4_getdeviceinfo_args {
-- 
cgit v1.2.3-71-gd317


From 3e51e3edfd81bfd9853ad7de91167e4ce33d0fe7 Mon Sep 17 00:00:00 2001
From: Samir Bellabes <sam@synack.fr>
Date: Wed, 11 May 2011 18:18:05 +0200
Subject: sched: Remove unused parameters from sched_fork() and
 wake_up_new_task()

sched_fork() and wake_up_new_task() are defined with a parameter
'unsigned long clone_flags', which is unused.

This patch removes the parameters.

Signed-off-by: Samir Bellabes <sam@synack.fr>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1305130685-1047-1-git-send-email-sam@synack.fr
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 5 ++---
 kernel/fork.c         | 4 ++--
 kernel/sched.c        | 4 ++--
 3 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6b4280b23ee6..12211e1666e2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2051,14 +2051,13 @@ extern void xtime_update(unsigned long ticks);
 
 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
 extern int wake_up_process(struct task_struct *tsk);
-extern void wake_up_new_task(struct task_struct *tsk,
-				unsigned long clone_flags);
+extern void wake_up_new_task(struct task_struct *tsk);
 #ifdef CONFIG_SMP
  extern void kick_process(struct task_struct *tsk);
 #else
  static inline void kick_process(struct task_struct *tsk) { }
 #endif
-extern void sched_fork(struct task_struct *p, int clone_flags);
+extern void sched_fork(struct task_struct *p);
 extern void sched_dead(struct task_struct *p);
 
 extern void proc_caches_init(void);
diff --git a/kernel/fork.c b/kernel/fork.c
index aca62871a4f9..2b44d82b8237 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1152,7 +1152,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #endif
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
-	sched_fork(p, clone_flags);
+	sched_fork(p);
 
 	retval = perf_event_init_task(p);
 	if (retval)
@@ -1463,7 +1463,7 @@ long do_fork(unsigned long clone_flags,
 		 */
 		p->flags &= ~PF_STARTING;
 
-		wake_up_new_task(p, clone_flags);
+		wake_up_new_task(p);
 
 		tracehook_report_clone_complete(trace, regs,
 						clone_flags, nr, p);
diff --git a/kernel/sched.c b/kernel/sched.c
index da9338150484..f9778c0d91e2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2741,7 +2741,7 @@ static void __sched_fork(struct task_struct *p)
 /*
  * fork()/clone()-time setup:
  */
-void sched_fork(struct task_struct *p, int clone_flags)
+void sched_fork(struct task_struct *p)
 {
 	unsigned long flags;
 	int cpu = get_cpu();
@@ -2823,7 +2823,7 @@ void sched_fork(struct task_struct *p, int clone_flags)
  * that must be done for every newly created context, then puts the task
  * on the runqueue and wakes it.
  */
-void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
+void wake_up_new_task(struct task_struct *p)
 {
 	unsigned long flags;
 	struct rq *rq;
-- 
cgit v1.2.3-71-gd317


From 5db1256a5131d3b133946fa02ac9770a784e6eb2 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Thu, 12 May 2011 04:13:54 -0500
Subject: seqlock: Don't smp_rmb in seqlock reader spin loop

Move the smp_rmb after cpu_relax loop in read_seqlock and add
ACCESS_ONCE to make sure the test and return are consistent.

A multi-threaded core in the lab didn't like the update
from 2.6.35 to 2.6.36, to the point it would hang during
boot when multiple threads were active.  Bisection showed
af5ab277ded04bd9bc6b048c5a2f0e7d70ef0867 (clockevents:
Remove the per cpu tick skew) as the culprit and it is
supported with stack traces showing xtime_lock waits including
tick_do_update_jiffies64 and/or update_vsyscall.

Experimentation showed the combination of cpu_relax and smp_rmb
was significantly slowing the progress of other threads sharing
the core, and this patch is effective in avoiding the hang.

A theory is the rmb is affecting the whole core while the
cpu_relax is causing a resource rebalance flush, together they
cause an interfernce cadance that is unbroken when the seqlock
reader has interrupts disabled.

At first I was confused why the refactor in
3c22cd5709e8143444a6d08682a87f4c57902df3 (kernel: optimise
seqlock) didn't affect this patch application, but after some
study that affected seqcount not seqlock. The new seqcount was
not factored back into the seqlock.  I defer that the future.

While the removal of the timer interrupt offset created
contention for the xtime lock while a cpu does the
additonal work to update the system clock, the seqlock
implementation with the tight rmb spin loop goes back much
further, and is just waiting for the right trigger.

Cc: <stable@vger.kernel.org>
Signed-off-by: Milton Miller <miltonm@bga.com>
Cc: <linuxppc-dev@lists.ozlabs.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Link: http://lkml.kernel.org/r/%3Cseqlock-rmb%40mdm.bga.com%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/seqlock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index e98cd2e57194..06d69648fc86 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -88,12 +88,12 @@ static __always_inline unsigned read_seqbegin(const seqlock_t *sl)
 	unsigned ret;
 
 repeat:
-	ret = sl->sequence;
-	smp_rmb();
+	ret = ACCESS_ONCE(sl->sequence);
 	if (unlikely(ret & 1)) {
 		cpu_relax();
 		goto repeat;
 	}
+	smp_rmb();
 
 	return ret;
 }
-- 
cgit v1.2.3-71-gd317


From 698b368275c3fa98261159253cfc79653f9dffc6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 11 May 2011 14:49:36 -0700
Subject: fbcon: add lifetime refcount to opened frame buffers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This just adds the refcount and the new registration lock logic.  It
does not (for example) actually change the read/write/ioctl routines to
actually use the frame buffer that was opened: those function still end
up alway susing whatever the current frame buffer is at the time of the
call.

Without this, if something holds the frame buffer open over a
framebuffer switch, the close() operation after the switch will access a
fb_info that has been free'd by the unregistering of the old frame
buffer.

(The read/write/ioctl operations will normally not cause problems,
because they will - illogically - pick up the new fbcon instead.  But a
switch that happens just as one of those is going on might see problems
too, the window is just much smaller: one individual op rather than the
whole open-close sequence.)

This use-after-free is apparently fairly easily triggered by the Ubuntu
11.04 boot sequence.

Acked-by: Tim Gardner <tim.gardner@canonical.com>
Tested-by: Daniel J Blueman <daniel.blueman@gmail.com>
Tested-by: Anca Emanuel <anca.emanuel@gmail.com>
Cc: Bruno Prémont <bonbons@linux-vserver.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Andy Whitcroft <andy.whitcroft@canonical.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/fbmem.c | 56 ++++++++++++++++++++++++++++++++++++++++++---------
 include/linux/fb.h    |  1 +
 2 files changed, 47 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index e0c2284924b6..eec14d2ca1c7 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -42,9 +42,34 @@
 
 #define FBPIXMAPSIZE	(1024 * 8)
 
+static DEFINE_MUTEX(registration_lock);
 struct fb_info *registered_fb[FB_MAX] __read_mostly;
 int num_registered_fb __read_mostly;
 
+static struct fb_info *get_fb_info(unsigned int idx)
+{
+	struct fb_info *fb_info;
+
+	if (idx >= FB_MAX)
+		return ERR_PTR(-ENODEV);
+
+	mutex_lock(&registration_lock);
+	fb_info = registered_fb[idx];
+	if (fb_info)
+		atomic_inc(&fb_info->count);
+	mutex_unlock(&registration_lock);
+
+	return fb_info;
+}
+
+static void put_fb_info(struct fb_info *fb_info)
+{
+	if (!atomic_dec_and_test(&fb_info->count))
+		return;
+	if (fb_info->fbops->fb_destroy)
+		fb_info->fbops->fb_destroy(fb_info);
+}
+
 int lock_fb_info(struct fb_info *info)
 {
 	mutex_lock(&info->lock);
@@ -647,6 +672,7 @@ int fb_show_logo(struct fb_info *info, int rotate) { return 0; }
 
 static void *fb_seq_start(struct seq_file *m, loff_t *pos)
 {
+	mutex_lock(&registration_lock);
 	return (*pos < FB_MAX) ? pos : NULL;
 }
 
@@ -658,6 +684,7 @@ static void *fb_seq_next(struct seq_file *m, void *v, loff_t *pos)
 
 static void fb_seq_stop(struct seq_file *m, void *v)
 {
+	mutex_unlock(&registration_lock);
 }
 
 static int fb_seq_show(struct seq_file *m, void *v)
@@ -1361,14 +1388,16 @@ __releases(&info->lock)
 	struct fb_info *info;
 	int res = 0;
 
-	if (fbidx >= FB_MAX)
-		return -ENODEV;
-	info = registered_fb[fbidx];
-	if (!info)
+	info = get_fb_info(fbidx);
+	if (!info) {
 		request_module("fb%d", fbidx);
-	info = registered_fb[fbidx];
-	if (!info)
-		return -ENODEV;
+		info = get_fb_info(fbidx);
+		if (!info)
+			return -ENODEV;
+	}
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+
 	mutex_lock(&info->lock);
 	if (!try_module_get(info->fbops->owner)) {
 		res = -ENODEV;
@@ -1386,6 +1415,8 @@ __releases(&info->lock)
 #endif
 out:
 	mutex_unlock(&info->lock);
+	if (res)
+		put_fb_info(info);
 	return res;
 }
 
@@ -1401,6 +1432,7 @@ __releases(&info->lock)
 		info->fbops->fb_release(info,1);
 	module_put(info->fbops->owner);
 	mutex_unlock(&info->lock);
+	put_fb_info(info);
 	return 0;
 }
 
@@ -1542,11 +1574,13 @@ register_framebuffer(struct fb_info *fb_info)
 	remove_conflicting_framebuffers(fb_info->apertures, fb_info->fix.id,
 					 fb_is_primary_device(fb_info));
 
+	mutex_lock(&registration_lock);
 	num_registered_fb++;
 	for (i = 0 ; i < FB_MAX; i++)
 		if (!registered_fb[i])
 			break;
 	fb_info->node = i;
+	atomic_set(&fb_info->count, 1);
 	mutex_init(&fb_info->lock);
 	mutex_init(&fb_info->mm_lock);
 
@@ -1583,6 +1617,7 @@ register_framebuffer(struct fb_info *fb_info)
 	fb_var_to_videomode(&mode, &fb_info->var);
 	fb_add_videomode(&mode, &fb_info->modelist);
 	registered_fb[i] = fb_info;
+	mutex_unlock(&registration_lock);
 
 	event.info = fb_info;
 	if (!lock_fb_info(fb_info))
@@ -1616,6 +1651,7 @@ unregister_framebuffer(struct fb_info *fb_info)
 	struct fb_event event;
 	int i, ret = 0;
 
+	mutex_lock(&registration_lock);
 	i = fb_info->node;
 	if (!registered_fb[i]) {
 		ret = -EINVAL;
@@ -1638,7 +1674,7 @@ unregister_framebuffer(struct fb_info *fb_info)
 	    (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT))
 		kfree(fb_info->pixmap.addr);
 	fb_destroy_modelist(&fb_info->modelist);
-	registered_fb[i]=NULL;
+	registered_fb[i] = NULL;
 	num_registered_fb--;
 	fb_cleanup_device(fb_info);
 	device_destroy(fb_class, MKDEV(FB_MAJOR, i));
@@ -1646,9 +1682,9 @@ unregister_framebuffer(struct fb_info *fb_info)
 	fb_notifier_call_chain(FB_EVENT_FB_UNREGISTERED, &event);
 
 	/* this may free fb info */
-	if (fb_info->fbops->fb_destroy)
-		fb_info->fbops->fb_destroy(fb_info);
+	put_fb_info(fb_info);
 done:
+	mutex_unlock(&registration_lock);
 	return ret;
 }
 
diff --git a/include/linux/fb.h b/include/linux/fb.h
index df728c1c29ed..6a8274877171 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -832,6 +832,7 @@ struct fb_tile_ops {
 #define FBINFO_CAN_FORCE_OUTPUT     0x200000
 
 struct fb_info {
+	atomic_t count;
 	int node;
 	int flags;
 	struct mutex lock;		/* Lock for open/release/ioctl funcs */
-- 
cgit v1.2.3-71-gd317


From 7f267051bd7a280265b1b5ead58e9c6e4e1ac3a4 Mon Sep 17 00:00:00 2001
From: Yi Zou <yi.zou@intel.com>
Date: Mon, 9 May 2011 11:53:27 +0000
Subject: net: group FCoE related feature flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Michał Mirosław's patch (http://patchwork.ozlabs.org/patch/94421/) fixes the
issue (http://patchwork.ozlabs.org/patch/94188/) about not populating FCoE related
flags correctly on vlan devices. However, only NETIF_F_FCOE_CRC is part of the
NETIF_F_ALL_TX_OFFLOADS right now, where weed NETIF_F_FCOE_MTU and NETIF_F_FSO
as well.

Therefore, add NETIF_F_ALL_FCOE to indicate feature flags used by FCoE TX offloads.
These include NETIF_F_FCOE_CRC, NETIF_F_FCOE_MTU, and NETIF_F_FSO and add them to
be part of NETIF_F_ALL_TX_OFFLOADS. This would eventually make sure all FCoE needed
flags are populated properly to vlan devices.

Signed-off-by: Yi Zou <yi.zou@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e7244ed1f9a8..00d650c74800 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1097,10 +1097,14 @@ struct net_device {
 
 #define NETIF_F_ALL_TSO 	(NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
 
+#define NETIF_F_ALL_FCOE	(NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \
+				 NETIF_F_FSO)
+
 #define NETIF_F_ALL_TX_OFFLOADS	(NETIF_F_ALL_CSUM | NETIF_F_SG | \
 				 NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
 				 NETIF_F_HIGHDMA | \
-				 NETIF_F_SCTP_CSUM | NETIF_F_FCOE_CRC)
+				 NETIF_F_SCTP_CSUM | \
+				 NETIF_F_ALL_FCOE)
 
 	/*
 	 * If one device supports one of these features, then enable them
-- 
cgit v1.2.3-71-gd317


From afe12cc86b0ba545a01ad8716539ab07ab6e9e89 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Sat, 7 May 2011 03:22:17 +0000
Subject: net: introduce netdev_change_features()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It will be needed by bonding and other drivers changing vlan_features
after ndo_init callback.

As a bonus, this includes kernel-doc for netdev_update_features().

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 00d650c74800..1d9696a9ee4d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2565,6 +2565,7 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask);
 u32 netdev_fix_features(struct net_device *dev, u32 features);
 int __netdev_update_features(struct net_device *dev);
 void netdev_update_features(struct net_device *dev);
+void netdev_change_features(struct net_device *dev);
 
 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 					struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index 75898a32c038..ea23353e6251 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5289,6 +5289,14 @@ int __netdev_update_features(struct net_device *dev)
 	return 1;
 }
 
+/**
+ *	netdev_update_features - recalculate device features
+ *	@dev: the device to check
+ *
+ *	Recalculate dev->features set and send notifications if it
+ *	has changed. Should be called after driver or hardware dependent
+ *	conditions might have changed that influence the features.
+ */
 void netdev_update_features(struct net_device *dev)
 {
 	if (__netdev_update_features(dev))
@@ -5296,6 +5304,23 @@ void netdev_update_features(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_update_features);
 
+/**
+ *	netdev_change_features - recalculate device features
+ *	@dev: the device to check
+ *
+ *	Recalculate dev->features set and send notifications even
+ *	if they have not changed. Should be called instead of
+ *	netdev_update_features() if also dev->vlan_features might
+ *	have changed to allow the changes to be propagated to stacked
+ *	VLAN devices.
+ */
+void netdev_change_features(struct net_device *dev)
+{
+	__netdev_update_features(dev);
+	netdev_features_change(dev);
+}
+EXPORT_SYMBOL(netdev_change_features);
+
 /**
  *	netif_stacked_transfer_operstate -	transfer operstate
  *	@rootdev: the root or lower level device to transfer state from
-- 
cgit v1.2.3-71-gd317


From bdc220da3209d50b8c295490dec94845c88670a2 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 9 May 2011 17:42:46 +0000
Subject: netdevice.h: Align struct net_device members

Save a bit of space.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1d9696a9ee4d..a134d809125b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1020,9 +1020,6 @@ struct net_device {
 	 *	part of the usual set specified in Space.c.
 	 */
 
-	unsigned char		if_port;	/* Selectable AUI, TP,..*/
-	unsigned char		dma;		/* DMA channel		*/
-
 	unsigned long		state;
 
 	struct list_head	dev_list;
@@ -1146,13 +1143,16 @@ struct net_device {
 	const struct header_ops *header_ops;
 
 	unsigned int		flags;	/* interface flags (a la BSD)	*/
+	unsigned int		priv_flags; /* Like 'flags' but invisible to userspace. */
 	unsigned short		gflags;
-        unsigned int            priv_flags; /* Like 'flags' but invisible to userspace. */
 	unsigned short		padded;	/* How much padding added by alloc_netdev() */
 
 	unsigned char		operstate; /* RFC2863 operstate */
 	unsigned char		link_mode; /* mapping policy to operstate */
 
+	unsigned char		if_port;	/* Selectable AUI, TP,..*/
+	unsigned char		dma;		/* DMA channel		*/
+
 	unsigned int		mtu;	/* interface MTU value		*/
 	unsigned short		type;	/* interface hardware type	*/
 	unsigned short		hard_header_len;	/* hardware hdr length	*/
-- 
cgit v1.2.3-71-gd317


From 29dd54b72ba8c5ad0dd6dd33584449b5953f700b Mon Sep 17 00:00:00 2001
From: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Date: Thu, 12 May 2011 12:48:32 +0000
Subject: ethtool: Added support for FW dump

Added code to take FW dump via ethtool. Dump level can be controlled via setting the
dump flag. A get function is provided to query the current setting of the dump flag.
Dump data is obtained from the driver via a separate get function.

Changes from v3:
Fixed buffer length issue in ethtool_get_dump_data function.
Updated kernel doc for ethtool_dump struct and get_dump_flag function.

Changes from v2:
Provided separate commands for get flag and data.
Check for minimum of the two buffer length obtained via ethtool and driver and
use that for dump buffer
Pass up the driver return error codes up to the caller.
Added kernel doc comments.

Signed-off-by: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 31 +++++++++++++++++
 net/core/ethtool.c      | 90 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index bd0b50b85f06..c6a850ab2ec5 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -601,6 +601,26 @@ struct ethtool_flash {
 	char	data[ETHTOOL_FLASH_MAX_FILENAME];
 };
 
+/**
+ * struct ethtool_dump - used for retrieving, setting device dump
+ * @cmd: Command number - %ETHTOOL_GET_DUMP_FLAG, %ETHTOOL_GET_DUMP_DATA, or
+ * 	%ETHTOOL_SET_DUMP
+ * @version: FW version of the dump, filled in by driver
+ * @flag: driver dependent flag for dump setting, filled in by driver during
+ * 	  get and filled in by ethtool for set operation
+ * @len: length of dump data, used as the length of the user buffer on entry to
+ * 	 %ETHTOOL_GET_DUMP_DATA and this is returned as dump length by driver
+ * 	 for %ETHTOOL_GET_DUMP_FLAG command
+ * @data: data collected for get dump data operation
+ */
+struct ethtool_dump {
+	__u32	cmd;
+	__u32	version;
+	__u32	flag;
+	__u32	len;
+	__u8	data[0];
+};
+
 /* for returning and changing feature sets */
 
 /**
@@ -853,6 +873,10 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  * @get_channels: Get number of channels.
  * @set_channels: Set number of channels.  Returns a negative error code or
  *	zero.
+ * @get_dump_flag: Get dump flag indicating current dump length, version,
+ * 		   and flag of the device.
+ * @get_dump_data: Get dump data.
+ * @set_dump: Set dump specific flags to the device.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -927,6 +951,10 @@ struct ethtool_ops {
 				  const struct ethtool_rxfh_indir *);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*set_channels)(struct net_device *, struct ethtool_channels *);
+	int	(*get_dump_flag)(struct net_device *, struct ethtool_dump *);
+	int	(*get_dump_data)(struct net_device *,
+				 struct ethtool_dump *, void *);
+	int	(*set_dump)(struct net_device *, struct ethtool_dump *);
 
 };
 #endif /* __KERNEL__ */
@@ -998,6 +1026,9 @@ struct ethtool_ops {
 #define ETHTOOL_SFEATURES	0x0000003b /* Change device offload settings */
 #define ETHTOOL_GCHANNELS	0x0000003c /* Get no of channels */
 #define ETHTOOL_SCHANNELS	0x0000003d /* Set no of channels */
+#define ETHTOOL_SET_DUMP	0x0000003e /* Set dump settings */
+#define ETHTOOL_GET_DUMP_FLAG	0x0000003f /* Get dump settings */
+#define ETHTOOL_GET_DUMP_DATA	0x00000040 /* Get dump data */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index b8c2b10f397a..b8c2bcfee6af 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1823,6 +1823,87 @@ static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
 	return dev->ethtool_ops->flash_device(dev, &efl);
 }
 
+static int ethtool_set_dump(struct net_device *dev,
+			void __user *useraddr)
+{
+	struct ethtool_dump dump;
+
+	if (!dev->ethtool_ops->set_dump)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&dump, useraddr, sizeof(dump)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_dump(dev, &dump);
+}
+
+static int ethtool_get_dump_flag(struct net_device *dev,
+				void __user *useraddr)
+{
+	int ret;
+	struct ethtool_dump dump;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+
+	if (!dev->ethtool_ops->get_dump_flag)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&dump, useraddr, sizeof(dump)))
+		return -EFAULT;
+
+	ret = ops->get_dump_flag(dev, &dump);
+	if (ret)
+		return ret;
+
+	if (copy_to_user(useraddr, &dump, sizeof(dump)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_get_dump_data(struct net_device *dev,
+				void __user *useraddr)
+{
+	int ret;
+	__u32 len;
+	struct ethtool_dump dump, tmp;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	void *data = NULL;
+
+	if (!dev->ethtool_ops->get_dump_data ||
+		!dev->ethtool_ops->get_dump_flag)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&dump, useraddr, sizeof(dump)))
+		return -EFAULT;
+
+	memset(&tmp, 0, sizeof(tmp));
+	tmp.cmd = ETHTOOL_GET_DUMP_FLAG;
+	ret = ops->get_dump_flag(dev, &tmp);
+	if (ret)
+		return ret;
+
+	len = (tmp.len > dump.len) ? dump.len : tmp.len;
+	if (!len)
+		return -EFAULT;
+
+	data = vzalloc(tmp.len);
+	if (!data)
+		return -ENOMEM;
+	ret = ops->get_dump_data(dev, &dump, data);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(useraddr, &dump, sizeof(dump))) {
+		ret = -EFAULT;
+		goto out;
+	}
+	useraddr += offsetof(struct ethtool_dump, data);
+	if (copy_to_user(useraddr, data, len))
+		ret = -EFAULT;
+out:
+	vfree(data);
+	return ret;
+}
+
 /* The main entry point in this file.  Called from net/core/dev.c */
 
 int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -2039,6 +2120,15 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_SCHANNELS:
 		rc = ethtool_set_channels(dev, useraddr);
 		break;
+	case ETHTOOL_SET_DUMP:
+		rc = ethtool_set_dump(dev, useraddr);
+		break;
+	case ETHTOOL_GET_DUMP_FLAG:
+		rc = ethtool_get_dump_flag(dev, useraddr);
+		break;
+	case ETHTOOL_GET_DUMP_DATA:
+		rc = ethtool_get_dump_data(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3-71-gd317


From 47a150edc2ae734c0f4bf50aa19499e23b9a46f8 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serge.hallyn@canonical.com>
Date: Fri, 13 May 2011 04:27:54 +0100
Subject: Cache user_ns in struct cred

If !CONFIG_USERNS, have current_user_ns() defined to (&init_user_ns).

Get rid of _current_user_ns.  This requires nsown_capable() to be
defined in capability.c rather than as static inline in capability.h,
so do that.

Request_key needs init_user_ns defined at current_user_ns if
!CONFIG_USERNS, so forward-declare that in cred.h if !CONFIG_USERNS
at current_user_ns() define.

Compile-tested with and without CONFIG_USERNS.

Signed-off-by: Serge E. Hallyn <serge.hallyn@canonical.com>
[ This makes a huge performance difference for acl_permission_check(),
  up to 30%.  And that is one of the hottest kernel functions for loads
  that are pathname-lookup heavy.  ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/capability.h | 13 +------------
 include/linux/cred.h       | 10 ++++++++--
 kernel/capability.c        | 12 ++++++++++++
 kernel/cred.c              | 12 ++++++------
 4 files changed, 27 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 16ee8b49a200..d4675af963fa 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -546,18 +546,7 @@ extern bool has_capability_noaudit(struct task_struct *t, int cap);
 extern bool capable(int cap);
 extern bool ns_capable(struct user_namespace *ns, int cap);
 extern bool task_ns_capable(struct task_struct *t, int cap);
-
-/**
- * nsown_capable - Check superior capability to one's own user_ns
- * @cap: The capability in question
- *
- * Return true if the current task has the given superior capability
- * targeted at its own user namespace.
- */
-static inline bool nsown_capable(int cap)
-{
-	return ns_capable(current_user_ns(), cap);
-}
+extern bool nsown_capable(int cap);
 
 /* audit system wants to get cap info from files as well */
 extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 9aeeb0ba2003..be16b61283cc 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -146,6 +146,7 @@ struct cred {
 	void		*security;	/* subjective LSM security */
 #endif
 	struct user_struct *user;	/* real user ID subscription */
+	struct user_namespace *user_ns; /* cached user->user_ns */
 	struct group_info *group_info;	/* supplementary groups for euid/fsgid */
 	struct rcu_head	rcu;		/* RCU deletion hook */
 };
@@ -354,10 +355,15 @@ static inline void put_cred(const struct cred *_cred)
 #define current_fsgid() 	(current_cred_xxx(fsgid))
 #define current_cap()		(current_cred_xxx(cap_effective))
 #define current_user()		(current_cred_xxx(user))
-#define _current_user_ns()	(current_cred_xxx(user)->user_ns)
 #define current_security()	(current_cred_xxx(security))
 
-extern struct user_namespace *current_user_ns(void);
+#ifdef CONFIG_USER_NS
+#define current_user_ns() (current_cred_xxx(user_ns))
+#else
+extern struct user_namespace init_user_ns;
+#define current_user_ns() (&init_user_ns)
+#endif
+
 
 #define current_uid_gid(_uid, _gid)		\
 do {						\
diff --git a/kernel/capability.c b/kernel/capability.c
index bf0c734d0c12..32a80e08ff4b 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -399,3 +399,15 @@ bool task_ns_capable(struct task_struct *t, int cap)
 	return ns_capable(task_cred_xxx(t, user)->user_ns, cap);
 }
 EXPORT_SYMBOL(task_ns_capable);
+
+/**
+ * nsown_capable - Check superior capability to one's own user_ns
+ * @cap: The capability in question
+ *
+ * Return true if the current task has the given superior capability
+ * targeted at its own user namespace.
+ */
+bool nsown_capable(int cap)
+{
+	return ns_capable(current_user_ns(), cap);
+}
diff --git a/kernel/cred.c b/kernel/cred.c
index 5557b55048df..8093c16b84b1 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -54,6 +54,7 @@ struct cred init_cred = {
 	.cap_effective		= CAP_INIT_EFF_SET,
 	.cap_bset		= CAP_INIT_BSET,
 	.user			= INIT_USER,
+	.user_ns		= &init_user_ns,
 	.group_info		= &init_groups,
 #ifdef CONFIG_KEYS
 	.tgcred			= &init_tgcred,
@@ -410,6 +411,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 			goto error_put;
 	}
 
+	/* cache user_ns in cred.  Doesn't need a refcount because it will
+	 * stay pinned by cred->user
+	 */
+	new->user_ns = new->user->user_ns;
+
 #ifdef CONFIG_KEYS
 	/* new threads get their own thread keyrings if their parent already
 	 * had one */
@@ -741,12 +747,6 @@ int set_create_files_as(struct cred *new, struct inode *inode)
 }
 EXPORT_SYMBOL(set_create_files_as);
 
-struct user_namespace *current_user_ns(void)
-{
-	return _current_user_ns();
-}
-EXPORT_SYMBOL(current_user_ns);
-
 #ifdef CONFIG_DEBUG_CREDENTIALS
 
 bool creds_are_invalid(const struct cred *cred)
-- 
cgit v1.2.3-71-gd317


From 82a3242e11d9e63c8195be46c954efaefee35e22 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 12 May 2011 16:01:02 -0700
Subject: sysfs: remove "last sysfs file:" line from the oops messages

On some arches (x86, sh, arm, unicore, powerpc) the oops message would
print out the last sysfs file accessed.

This was very useful in finding a number of sysfs and driver core bugs
in the 2.5 and early 2.6 development days, but it has been a number of
years since this file has actually helped in debugging anything that
couldn't also be trivially determined from the stack traceback.

So it's time to delete the line.  This is good as we need all the space
we can get for oops messages at times on consoles.

Acked-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/arm/kernel/traps.c       |  1 -
 arch/powerpc/kernel/traps.c   |  1 -
 arch/sh/kernel/traps_32.c     |  1 -
 arch/unicore32/kernel/traps.c |  1 -
 arch/x86/kernel/dumpstack.c   |  1 -
 fs/sysfs/file.c               | 12 ------------
 include/linux/sysfs.h         |  5 -----
 7 files changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 3b54ad19d489..d52eec268b47 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -234,7 +234,6 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt
 
 	printk(KERN_EMERG "Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n",
 	       str, err, ++die_counter);
-	sysfs_printk_last_file();
 
 	/* trap and error numbers are mostly meaningless on ARM */
 	ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, SIGSEGV);
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 5ddb801bc154..d782cd71c07c 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -143,7 +143,6 @@ int die(const char *str, struct pt_regs *regs, long err)
 #endif
 		printk("%s\n", ppc_md.name ? ppc_md.name : "");
 
-		sysfs_printk_last_file();
 		if (notify_die(DIE_OOPS, str, regs, err, 255,
 			       SIGSEGV) == NOTIFY_STOP)
 			return 1;
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 3484c2f65aba..b51a17104b5f 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -87,7 +87,6 @@ void die(const char * str, struct pt_regs * regs, long err)
 	bust_spinlocks(1);
 
 	printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
-	sysfs_printk_last_file();
 	print_modules();
 	show_regs(regs);
 
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
index 254e36fa9513..b9a26465e728 100644
--- a/arch/unicore32/kernel/traps.c
+++ b/arch/unicore32/kernel/traps.c
@@ -192,7 +192,6 @@ static int __die(const char *str, int err, struct thread_info *thread,
 
 	printk(KERN_EMERG "Internal error: %s: %x [#%d]\n",
 	       str, err, ++die_counter);
-	sysfs_printk_last_file();
 
 	/* trap and error numbers are mostly meaningless on UniCore */
 	ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, \
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index e2a3f0606da4..f72e7193acc5 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -279,7 +279,6 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 	printk("DEBUG_PAGEALLOC");
 #endif
 	printk("\n");
-	sysfs_printk_last_file();
 	if (notify_die(DIE_OOPS, str, regs, err,
 			current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
 		return 1;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index da3fefe91a8f..1ad8c93c1b85 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -24,13 +24,6 @@
 
 #include "sysfs.h"
 
-/* used in crash dumps to help with debugging */
-static char last_sysfs_file[PATH_MAX];
-void sysfs_printk_last_file(void)
-{
-	printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file);
-}
-
 /*
  * There's one sysfs_buffer for each open file and one
  * sysfs_open_dirent for each sysfs_dirent with one or more open
@@ -337,11 +330,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
 	struct sysfs_buffer *buffer;
 	const struct sysfs_ops *ops;
 	int error = -EACCES;
-	char *p;
-
-	p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
-	if (!IS_ERR(p))
-		memmove(last_sysfs_file, p, strlen(p) + 1);
 
 	/* need attr_sd for attr and ops, its parent for kobj */
 	if (!sysfs_get_active(attr_sd))
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 30b881555fa5..c3acda60eee0 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -176,7 +176,6 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
 				      const unsigned char *name);
 struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd);
 void sysfs_put(struct sysfs_dirent *sd);
-void sysfs_printk_last_file(void);
 
 /* Called to clear a ns tag when it is no longer valid */
 void sysfs_exit_ns(enum kobj_ns_type type, const void *tag);
@@ -348,10 +347,6 @@ static inline int __must_check sysfs_init(void)
 	return 0;
 }
 
-static inline void sysfs_printk_last_file(void)
-{
-}
-
 #endif /* CONFIG_SYSFS */
 
 #endif /* _SYSFS_H_ */
-- 
cgit v1.2.3-71-gd317


From 0b61d2acb1ea48d8eba798ed92759b7f1b0f4209 Mon Sep 17 00:00:00 2001
From: J Freyensee <james_p_freyensee@linux.intel.com>
Date: Fri, 6 May 2011 16:56:49 -0700
Subject: Intel PTI implementaiton of MIPI 1149.7.

The PTI (Parallel Trace Interface) driver directs
trace data routed from various parts in the system out
through an Intel Penwell PTI port and out of the mobile
device for analysis with a debugging tool (Lauterbach or Fido).
Though n_tracesink and n_tracerouter line discipline drivers
are used to extract modem tracing data to the PTI driver
and other parts of an Intel mobile solution, the PTI driver
can be used independent of n_tracesink and n_tracerouter.

You should select this driver if the target kernel is meant for
an Intel Atom (non-netbook) mobile device containing a MIPI
P1149.7 standard implementation.

Signed-off-by: J Freyensee <james_p_freyensee@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/misc/Kconfig  |  13 +
 drivers/misc/Makefile |   1 +
 drivers/misc/pti.c    | 980 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pti.h   |  42 +++
 4 files changed, 1036 insertions(+)
 create mode 100644 drivers/misc/pti.c
 create mode 100644 include/linux/pti.h

(limited to 'include/linux')

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 915c31ef84cd..ec33d939548c 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -144,6 +144,19 @@ config PHANTOM
 	  If you choose to build module, its name will be phantom. If unsure,
 	  say N here.
 
+config INTEL_MID_PTI
+	tristate "Parallel Trace Interface for MIPI P1149.7 cJTAG standard"
+	default n
+	help
+	  The PTI (Parallel Trace Interface) driver directs
+	  trace data routed from various parts in the system out
+	  through an Intel Penwell PTI port and out of the mobile
+	  device for analysis with a debugging tool (Lauterbach or Fido).
+
+	  You should select this driver if the target kernel is meant for
+	  an Intel Atom (non-netbook) mobile device containing a MIPI
+	  P1149.7 standard implementation.
+
 config SGI_IOC4
 	tristate "SGI IOC4 Base IO support"
 	depends on PCI
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index f5468602961f..662aa3c71d05 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_IBM_ASM)		+= ibmasm/
 obj-$(CONFIG_AD525X_DPOT)	+= ad525x_dpot.o
 obj-$(CONFIG_AD525X_DPOT_I2C)	+= ad525x_dpot-i2c.o
 obj-$(CONFIG_AD525X_DPOT_SPI)	+= ad525x_dpot-spi.o
+0bj-$(CONFIG_INTEL_MID_PTI)	+= pti.o
 obj-$(CONFIG_ATMEL_PWM)		+= atmel_pwm.o
 obj-$(CONFIG_ATMEL_SSC)		+= atmel-ssc.o
 obj-$(CONFIG_ATMEL_TCLIB)	+= atmel_tclib.o
diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c
new file mode 100644
index 000000000000..bb6f9255c17c
--- /dev/null
+++ b/drivers/misc/pti.c
@@ -0,0 +1,980 @@
+/*
+ *  pti.c - PTI driver for cJTAG data extration
+ *
+ *  Copyright (C) Intel 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The PTI (Parallel Trace Interface) driver directs trace data routed from
+ * various parts in the system out through the Intel Penwell PTI port and
+ * out of the mobile device for analysis with a debugging tool
+ * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
+ * compact JTAG, standard.
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/pci.h>
+#include <linux/mutex.h>
+#include <linux/miscdevice.h>
+#include <linux/pti.h>
+
+#define DRIVERNAME		"pti"
+#define PCINAME			"pciPTI"
+#define TTYNAME			"ttyPTI"
+#define CHARNAME		"pti"
+#define PTITTY_MINOR_START	0
+#define PTITTY_MINOR_NUM	2
+#define MAX_APP_IDS		16   /* 128 channel ids / u8 bit size */
+#define MAX_OS_IDS		16   /* 128 channel ids / u8 bit size */
+#define MAX_MODEM_IDS		16   /* 128 channel ids / u8 bit size */
+#define MODEM_BASE_ID		71   /* modem master ID address    */
+#define CONTROL_ID		72   /* control master ID address  */
+#define CONSOLE_ID		73   /* console master ID address  */
+#define OS_BASE_ID		74   /* base OS master ID address  */
+#define APP_BASE_ID		80   /* base App master ID address */
+#define CONTROL_FRAME_LEN	32   /* PTI control frame maximum size */
+#define USER_COPY_SIZE		8192 /* 8Kb buffer for user space copy */
+#define APERTURE_14		0x3800000 /* offset to first OS write addr */
+#define APERTURE_LEN		0x400000  /* address length */
+
+struct pti_tty {
+	struct pti_masterchannel *mc;
+};
+
+struct pti_dev {
+	struct tty_port port;
+	unsigned long pti_addr;
+	unsigned long aperture_base;
+	void __iomem *pti_ioaddr;
+	u8 ia_app[MAX_APP_IDS];
+	u8 ia_os[MAX_OS_IDS];
+	u8 ia_modem[MAX_MODEM_IDS];
+};
+
+/*
+ * This protects access to ia_app, ia_os, and ia_modem,
+ * which keeps track of channels allocated in
+ * an aperture write id.
+ */
+static DEFINE_MUTEX(alloclock);
+
+static struct pci_device_id pci_ids[] __devinitconst = {
+		{PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x82B)},
+		{0}
+};
+
+static struct tty_driver *pti_tty_driver;
+static struct pti_dev *drv_data;
+
+static unsigned int pti_console_channel;
+static unsigned int pti_control_channel;
+
+/**
+ *  pti_write_to_aperture()- The private write function to PTI HW.
+ *
+ *  @mc: The 'aperture'. It's part of a write address that holds
+ *       a master and channel ID.
+ *  @buf: Data being written to the HW that will ultimately be seen
+ *        in a debugging tool (Fido, Lauterbach).
+ *  @len: Size of buffer.
+ *
+ *  Since each aperture is specified by a unique
+ *  master/channel ID, no two processes will be writing
+ *  to the same aperture at the same time so no lock is required. The
+ *  PTI-Output agent will send these out in the order that they arrived, and
+ *  thus, it will intermix these messages. The debug tool can then later
+ *  regroup the appropriate message segments together reconstituting each
+ *  message.
+ */
+static void pti_write_to_aperture(struct pti_masterchannel *mc,
+				  u8 *buf,
+				  int len)
+{
+	int dwordcnt;
+	int final;
+	int i;
+	u32 ptiword;
+	u32 __iomem *aperture;
+	u8 *p = buf;
+
+	/*
+	 * calculate the aperture offset from the base using the master and
+	 * channel id's.
+	 */
+	aperture = drv_data->pti_ioaddr + (mc->master << 15)
+		+ (mc->channel << 8);
+
+	dwordcnt = len >> 2;
+	final = len - (dwordcnt << 2);	    /* final = trailing bytes    */
+	if (final == 0 && dwordcnt != 0) {  /* always need a final dword */
+		final += 4;
+		dwordcnt--;
+	}
+
+	for (i = 0; i < dwordcnt; i++) {
+		ptiword = be32_to_cpu(*(u32 *)p);
+		p += 4;
+		iowrite32(ptiword, aperture);
+	}
+
+	aperture += PTI_LASTDWORD_DTS;	/* adding DTS signals that is EOM */
+
+	ptiword = 0;
+	for (i = 0; i < final; i++)
+		ptiword |= *p++ << (24-(8*i));
+
+	iowrite32(ptiword, aperture);
+	return;
+}
+
+/**
+ *  pti_control_frame_built_and_sent()- control frame build and send function.
+ *
+ *  @mc: The master / channel structure on which the function
+ *       built a control frame.
+ *
+ *  To be able to post process the PTI contents on host side, a control frame
+ *  is added before sending any PTI content. So the host side knows on
+ *  each PTI frame the name of the thread using a dedicated master / channel.
+ *  The thread name is retrieved from the 'current' global variable.
+ *  This function builds this frame and sends it to a master ID CONTROL_ID.
+ *  The overhead is only 32 bytes since the driver only writes to HW
+ *  in 32 byte chunks.
+ */
+
+static void pti_control_frame_built_and_sent(struct pti_masterchannel *mc)
+{
+	struct pti_masterchannel mccontrol = {.master = CONTROL_ID,
+					      .channel = 0};
+	const char *control_format = "%3d %3d %s";
+	u8 control_frame[CONTROL_FRAME_LEN];
+
+	/*
+	 * Since we access the comm member in current's task_struct,
+	 * we only need to be as large as what 'comm' in that
+	 * structure is.
+	 */
+	char comm[TASK_COMM_LEN];
+
+	if (!in_interrupt())
+		get_task_comm(comm, current);
+	else
+		strncpy(comm, "Interrupt", TASK_COMM_LEN);
+
+	/* Absolutely ensure our buffer is zero terminated. */
+	comm[TASK_COMM_LEN-1] = 0;
+
+	mccontrol.channel = pti_control_channel;
+	pti_control_channel = (pti_control_channel + 1) & 0x7f;
+
+	snprintf(control_frame, CONTROL_FRAME_LEN, control_format, mc->master,
+		mc->channel, comm);
+	pti_write_to_aperture(&mccontrol, control_frame, strlen(control_frame));
+}
+
+/**
+ *  pti_write_full_frame_to_aperture()- high level function to
+ *					write to PTI.
+ *
+ *  @mc:  The 'aperture'. It's part of a write address that holds
+ *        a master and channel ID.
+ *  @buf: Data being written to the HW that will ultimately be seen
+ *        in a debugging tool (Fido, Lauterbach).
+ *  @len: Size of buffer.
+ *
+ *  All threads sending data (either console, user space application, ...)
+ *  are calling the high level function to write to PTI meaning that it is
+ *  possible to add a control frame before sending the content.
+ */
+static void pti_write_full_frame_to_aperture(struct pti_masterchannel *mc,
+						const unsigned char *buf,
+						int len)
+{
+	pti_control_frame_built_and_sent(mc);
+	pti_write_to_aperture(mc, (u8 *)buf, len);
+}
+
+/**
+ * get_id()- Allocate a master and channel ID.
+ *
+ * @id_array: an array of bits representing what channel
+ *            id's are allocated for writing.
+ * @max_ids:  The max amount of available write IDs to use.
+ * @base_id:  The starting SW channel ID, based on the Intel
+ *            PTI arch.
+ *
+ * Returns:
+ *	pti_masterchannel struct with master, channel ID address
+ *	0 for error
+ *
+ * Each bit in the arrays ia_app and ia_os correspond to a master and
+ * channel id. The bit is one if the id is taken and 0 if free. For
+ * every master there are 128 channel id's.
+ */
+static struct pti_masterchannel *get_id(u8 *id_array, int max_ids, int base_id)
+{
+	struct pti_masterchannel *mc;
+	int i, j, mask;
+
+	mc = kmalloc(sizeof(struct pti_masterchannel), GFP_KERNEL);
+	if (mc == NULL)
+		return NULL;
+
+	/* look for a byte with a free bit */
+	for (i = 0; i < max_ids; i++)
+		if (id_array[i] != 0xff)
+			break;
+	if (i == max_ids) {
+		kfree(mc);
+		return NULL;
+	}
+	/* find the bit in the 128 possible channel opportunities */
+	mask = 0x80;
+	for (j = 0; j < 8; j++) {
+		if ((id_array[i] & mask) == 0)
+			break;
+		mask >>= 1;
+	}
+
+	/* grab it */
+	id_array[i] |= mask;
+	mc->master  = base_id;
+	mc->channel = ((i & 0xf)<<3) + j;
+	/* write new master Id / channel Id allocation to channel control */
+	pti_control_frame_built_and_sent(mc);
+	return mc;
+}
+
+/*
+ * The following three functions:
+ * pti_request_mastercahannel(), mipi_release_masterchannel()
+ * and pti_writedata() are an API for other kernel drivers to
+ * access PTI.
+ */
+
+/**
+ * pti_request_masterchannel()- Kernel API function used to allocate
+ *				a master, channel ID address
+ *				to write to PTI HW.
+ *
+ * @type: 0- request Application  master, channel aperture ID write address.
+ *        1- request OS master, channel aperture ID write
+ *           address.
+ *        2- request Modem master, channel aperture ID
+ *           write address.
+ *        Other values, error.
+ *
+ * Returns:
+ *	pti_masterchannel struct
+ *	0 for error
+ */
+struct pti_masterchannel *pti_request_masterchannel(u8 type)
+{
+	struct pti_masterchannel *mc;
+
+	mutex_lock(&alloclock);
+
+	switch (type) {
+
+	case 0:
+		mc = get_id(drv_data->ia_app, MAX_APP_IDS, APP_BASE_ID);
+		break;
+
+	case 1:
+		mc = get_id(drv_data->ia_os, MAX_OS_IDS, OS_BASE_ID);
+		break;
+
+	case 2:
+		mc = get_id(drv_data->ia_modem, MAX_MODEM_IDS, MODEM_BASE_ID);
+		break;
+	default:
+		mc = NULL;
+	}
+
+	mutex_unlock(&alloclock);
+	return mc;
+}
+EXPORT_SYMBOL_GPL(pti_request_masterchannel);
+
+/**
+ * pti_release_masterchannel()- Kernel API function used to release
+ *				a master, channel ID address
+ *				used to write to PTI HW.
+ *
+ * @mc: master, channel apeture ID address to be released.
+ */
+void pti_release_masterchannel(struct pti_masterchannel *mc)
+{
+	u8 master, channel, i;
+
+	mutex_lock(&alloclock);
+
+	if (mc) {
+		master = mc->master;
+		channel = mc->channel;
+
+		if (master == APP_BASE_ID) {
+			i = channel >> 3;
+			drv_data->ia_app[i] &=  ~(0x80>>(channel & 0x7));
+		} else if (master == OS_BASE_ID) {
+			i = channel >> 3;
+			drv_data->ia_os[i] &= ~(0x80>>(channel & 0x7));
+		} else {
+			i = channel >> 3;
+			drv_data->ia_modem[i] &= ~(0x80>>(channel & 0x7));
+		}
+
+		kfree(mc);
+	}
+
+	mutex_unlock(&alloclock);
+}
+EXPORT_SYMBOL_GPL(pti_release_masterchannel);
+
+/**
+ * pti_writedata()- Kernel API function used to write trace
+ *                  debugging data to PTI HW.
+ *
+ * @mc:    Master, channel aperture ID address to write to.
+ *         Null value will return with no write occurring.
+ * @buf:   Trace debuging data to write to the PTI HW.
+ *         Null value will return with no write occurring.
+ * @count: Size of buf. Value of 0 or a negative number will
+ *         return with no write occuring.
+ */
+void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count)
+{
+	/*
+	 * since this function is exported, this is treated like an
+	 * API function, thus, all parameters should
+	 * be checked for validity.
+	 */
+	if ((mc != NULL) && (buf != NULL) && (count > 0))
+		pti_write_to_aperture(mc, buf, count);
+	return;
+}
+EXPORT_SYMBOL_GPL(pti_writedata);
+
+/**
+ * pti_pci_remove()- Driver exit method to remove PTI from
+ *		   PCI bus.
+ * @pdev: variable containing pci info of PTI.
+ */
+static void __devexit pti_pci_remove(struct pci_dev *pdev)
+{
+	struct pti_dev *drv_data;
+
+	drv_data = pci_get_drvdata(pdev);
+	if (drv_data != NULL) {
+		pci_iounmap(pdev, drv_data->pti_ioaddr);
+		pci_set_drvdata(pdev, NULL);
+		kfree(drv_data);
+		pci_release_region(pdev, 1);
+		pci_disable_device(pdev);
+	}
+}
+
+/*
+ * for the tty_driver_*() basic function descriptions, see tty_driver.h.
+ * Specific header comments made for PTI-related specifics.
+ */
+
+/**
+ * pti_tty_driver_open()- Open an Application master, channel aperture
+ * ID to the PTI device via tty device.
+ *
+ * @tty: tty interface.
+ * @filp: filp interface pased to tty_port_open() call.
+ *
+ * Returns:
+ *	int, 0 for success
+ *	otherwise, fail value
+ *
+ * The main purpose of using the tty device interface is for
+ * each tty port to have a unique PTI write aperture.  In an
+ * example use case, ttyPTI0 gets syslogd and an APP aperture
+ * ID and ttyPTI1 is where the n_tracesink ldisc hooks to route
+ * modem messages into PTI.  Modem trace data does not have to
+ * go to ttyPTI1, but ttyPTI0 and ttyPTI1 do need to be distinct
+ * master IDs.  These messages go through the PTI HW and out of
+ * the handheld platform and to the Fido/Lauterbach device.
+ */
+static int pti_tty_driver_open(struct tty_struct *tty, struct file *filp)
+{
+	/*
+	 * we actually want to allocate a new channel per open, per
+	 * system arch.  HW gives more than plenty channels for a single
+	 * system task to have its own channel to write trace data. This
+	 * also removes a locking requirement for the actual write
+	 * procedure.
+	 */
+	return tty_port_open(&drv_data->port, tty, filp);
+}
+
+/**
+ * pti_tty_driver_close()- close tty device and release Application
+ * master, channel aperture ID to the PTI device via tty device.
+ *
+ * @tty: tty interface.
+ * @filp: filp interface pased to tty_port_close() call.
+ *
+ * The main purpose of using the tty device interface is to route
+ * syslog daemon messages to the PTI HW and out of the handheld platform
+ * and to the Fido/Lauterbach device.
+ */
+static void pti_tty_driver_close(struct tty_struct *tty, struct file *filp)
+{
+	tty_port_close(&drv_data->port, tty, filp);
+}
+
+/**
+ * pti_tty_intstall()- Used to set up specific master-channels
+ *		       to tty ports for organizational purposes when
+ *		       tracing viewed from debuging tools.
+ *
+ * @driver: tty driver information.
+ * @tty: tty struct containing pti information.
+ *
+ * Returns:
+ *	0 for success
+ *	otherwise, error
+ */
+static int pti_tty_install(struct tty_driver *driver, struct tty_struct *tty)
+{
+	int idx = tty->index;
+	struct pti_tty *pti_tty_data;
+	int ret = tty_init_termios(tty);
+
+	if (ret == 0) {
+		tty_driver_kref_get(driver);
+		tty->count++;
+		driver->ttys[idx] = tty;
+
+		pti_tty_data = kmalloc(sizeof(struct pti_tty), GFP_KERNEL);
+		if (pti_tty_data == NULL)
+			return -ENOMEM;
+
+		if (idx == PTITTY_MINOR_START)
+			pti_tty_data->mc = pti_request_masterchannel(0);
+		else
+			pti_tty_data->mc = pti_request_masterchannel(2);
+
+		if (pti_tty_data->mc == NULL)
+			return -ENXIO;
+		tty->driver_data = pti_tty_data;
+	}
+
+	return ret;
+}
+
+/**
+ * pti_tty_cleanup()- Used to de-allocate master-channel resources
+ *		      tied to tty's of this driver.
+ *
+ * @tty: tty struct containing pti information.
+ */
+static void pti_tty_cleanup(struct tty_struct *tty)
+{
+	struct pti_tty *pti_tty_data = tty->driver_data;
+	if (pti_tty_data == NULL)
+		return;
+	pti_release_masterchannel(pti_tty_data->mc);
+	kfree(tty->driver_data);
+	tty->driver_data = NULL;
+}
+
+/**
+ * pti_tty_driver_write()-  Write trace debugging data through the char
+ * interface to the PTI HW.  Part of the misc device implementation.
+ *
+ * @filp: Contains private data which is used to obtain
+ *        master, channel write ID.
+ * @data: trace data to be written.
+ * @len:  # of byte to write.
+ *
+ * Returns:
+ *	int, # of bytes written
+ *	otherwise, error
+ */
+static int pti_tty_driver_write(struct tty_struct *tty,
+	const unsigned char *buf, int len)
+{
+	struct pti_tty *pti_tty_data = tty->driver_data;
+	if ((pti_tty_data != NULL) && (pti_tty_data->mc != NULL)) {
+		pti_write_to_aperture(pti_tty_data->mc, (u8 *)buf, len);
+		return len;
+	}
+	/*
+	 * we can't write to the pti hardware if the private driver_data
+	 * and the mc address is not there.
+	 */
+	else
+		return -EFAULT;
+}
+
+/**
+ * pti_tty_write_room()- Always returns 2048.
+ *
+ * @tty: contains tty info of the pti driver.
+ */
+static int pti_tty_write_room(struct tty_struct *tty)
+{
+	return 2048;
+}
+
+/**
+ * pti_char_open()- Open an Application master, channel aperture
+ * ID to the PTI device. Part of the misc device implementation.
+ *
+ * @inode: not used.
+ * @filp:  Output- will have a masterchannel struct set containing
+ *                 the allocated application PTI aperture write address.
+ *
+ * Returns:
+ *	int, 0 for success
+ *	otherwise, a fail value
+ */
+static int pti_char_open(struct inode *inode, struct file *filp)
+{
+	struct pti_masterchannel *mc;
+
+	/*
+	 * We really do want to fail immediately if
+	 * pti_request_masterchannel() fails,
+	 * before assigning the value to filp->private_data.
+	 * Slightly easier to debug if this driver needs debugging.
+	 */
+	mc = pti_request_masterchannel(0);
+	if (mc == NULL)
+		return -ENOMEM;
+	filp->private_data = mc;
+	return 0;
+}
+
+/**
+ * pti_char_release()-  Close a char channel to the PTI device. Part
+ * of the misc device implementation.
+ *
+ * @inode: Not used in this implementaiton.
+ * @filp:  Contains private_data that contains the master, channel
+ *         ID to be released by the PTI device.
+ *
+ * Returns:
+ *	always 0
+ */
+static int pti_char_release(struct inode *inode, struct file *filp)
+{
+	pti_release_masterchannel(filp->private_data);
+	kfree(filp->private_data);
+	return 0;
+}
+
+/**
+ * pti_char_write()-  Write trace debugging data through the char
+ * interface to the PTI HW.  Part of the misc device implementation.
+ *
+ * @filp:  Contains private data which is used to obtain
+ *         master, channel write ID.
+ * @data:  trace data to be written.
+ * @len:   # of byte to write.
+ * @ppose: Not used in this function implementation.
+ *
+ * Returns:
+ *	int, # of bytes written
+ *	otherwise, error value
+ *
+ * Notes: From side discussions with Alan Cox and experimenting
+ * with PTI debug HW like Nokia's Fido box and Lauterbach
+ * devices, 8192 byte write buffer used by USER_COPY_SIZE was
+ * deemed an appropriate size for this type of usage with
+ * debugging HW.
+ */
+static ssize_t pti_char_write(struct file *filp, const char __user *data,
+			      size_t len, loff_t *ppose)
+{
+	struct pti_masterchannel *mc;
+	void *kbuf;
+	const char __user *tmp;
+	size_t size = USER_COPY_SIZE;
+	size_t n = 0;
+
+	tmp = data;
+	mc = filp->private_data;
+
+	kbuf = kmalloc(size, GFP_KERNEL);
+	if (kbuf == NULL)  {
+		pr_err("%s(%d): buf allocation failed\n",
+			__func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	do {
+		if (len - n > USER_COPY_SIZE)
+			size = USER_COPY_SIZE;
+		else
+			size = len - n;
+
+		if (copy_from_user(kbuf, tmp, size)) {
+			kfree(kbuf);
+			return n ? n : -EFAULT;
+		}
+
+		pti_write_to_aperture(mc, kbuf, size);
+		n  += size;
+		tmp += size;
+
+	} while (len > n);
+
+	kfree(kbuf);
+	return len;
+}
+
+static const struct tty_operations pti_tty_driver_ops = {
+	.open		= pti_tty_driver_open,
+	.close		= pti_tty_driver_close,
+	.write		= pti_tty_driver_write,
+	.write_room	= pti_tty_write_room,
+	.install	= pti_tty_install,
+	.cleanup	= pti_tty_cleanup
+};
+
+static const struct file_operations pti_char_driver_ops = {
+	.owner		= THIS_MODULE,
+	.write		= pti_char_write,
+	.open		= pti_char_open,
+	.release	= pti_char_release,
+};
+
+static struct miscdevice pti_char_driver = {
+	.minor		= MISC_DYNAMIC_MINOR,
+	.name		= CHARNAME,
+	.fops		= &pti_char_driver_ops
+};
+
+/**
+ * pti_console_write()-  Write to the console that has been acquired.
+ *
+ * @c:   Not used in this implementaiton.
+ * @buf: Data to be written.
+ * @len: Length of buf.
+ */
+static void pti_console_write(struct console *c, const char *buf, unsigned len)
+{
+	static struct pti_masterchannel mc = {.master  = CONSOLE_ID,
+					      .channel = 0};
+
+	mc.channel = pti_console_channel;
+	pti_console_channel = (pti_console_channel + 1) & 0x7f;
+
+	pti_write_full_frame_to_aperture(&mc, buf, len);
+}
+
+/**
+ * pti_console_device()-  Return the driver tty structure and set the
+ *			  associated index implementation.
+ *
+ * @c:     Console device of the driver.
+ * @index: index associated with c.
+ *
+ * Returns:
+ *	always value of pti_tty_driver structure when this function
+ *	is called.
+ */
+static struct tty_driver *pti_console_device(struct console *c, int *index)
+{
+	*index = c->index;
+	return pti_tty_driver;
+}
+
+/**
+ * pti_console_setup()-  Initialize console variables used by the driver.
+ *
+ * @c:     Not used.
+ * @opts:  Not used.
+ *
+ * Returns:
+ *	always 0.
+ */
+static int pti_console_setup(struct console *c, char *opts)
+{
+	pti_console_channel = 0;
+	pti_control_channel = 0;
+	return 0;
+}
+
+/*
+ * pti_console struct, used to capture OS printk()'s and shift
+ * out to the PTI device for debugging.  This cannot be
+ * enabled upon boot because of the possibility of eating
+ * any serial console printk's (race condition discovered).
+ * The console should be enabled upon when the tty port is
+ * used for the first time.  Since the primary purpose for
+ * the tty port is to hook up syslog to it, the tty port
+ * will be open for a really long time.
+ */
+static struct console pti_console = {
+	.name		= TTYNAME,
+	.write		= pti_console_write,
+	.device		= pti_console_device,
+	.setup		= pti_console_setup,
+	.flags		= CON_PRINTBUFFER,
+	.index		= 0,
+};
+
+/**
+ * pti_port_activate()- Used to start/initialize any items upon
+ * first opening of tty_port().
+ *
+ * @port- The tty port number of the PTI device.
+ * @tty-  The tty struct associated with this device.
+ *
+ * Returns:
+ *	always returns 0
+ *
+ * Notes: The primary purpose of the PTI tty port 0 is to hook
+ * the syslog daemon to it; thus this port will be open for a
+ * very long time.
+ */
+static int pti_port_activate(struct tty_port *port, struct tty_struct *tty)
+{
+	if (port->tty->index == PTITTY_MINOR_START)
+		console_start(&pti_console);
+	return 0;
+}
+
+/**
+ * pti_port_shutdown()- Used to stop/shutdown any items upon the
+ * last tty port close.
+ *
+ * @port- The tty port number of the PTI device.
+ *
+ * Notes: The primary purpose of the PTI tty port 0 is to hook
+ * the syslog daemon to it; thus this port will be open for a
+ * very long time.
+ */
+static void pti_port_shutdown(struct tty_port *port)
+{
+	if (port->tty->index == PTITTY_MINOR_START)
+		console_stop(&pti_console);
+}
+
+static const struct tty_port_operations tty_port_ops = {
+	.activate = pti_port_activate,
+	.shutdown = pti_port_shutdown,
+};
+
+/*
+ * Note the _probe() call sets everything up and ties the char and tty
+ * to successfully detecting the PTI device on the pci bus.
+ */
+
+/**
+ * pti_pci_probe()- Used to detect pti on the pci bus and set
+ *		    things up in the driver.
+ *
+ * @pdev- pci_dev struct values for pti.
+ * @ent-  pci_device_id struct for pti driver.
+ *
+ * Returns:
+ *	0 for success
+ *	otherwise, error
+ */
+static int __devinit pti_pci_probe(struct pci_dev *pdev,
+		const struct pci_device_id *ent)
+{
+	int retval = -EINVAL;
+	int pci_bar = 1;
+
+	dev_dbg(&pdev->dev, "%s %s(%d): PTI PCI ID %04x:%04x\n", __FILE__,
+			__func__, __LINE__, pdev->vendor, pdev->device);
+
+	retval = misc_register(&pti_char_driver);
+	if (retval) {
+		pr_err("%s(%d): CHAR registration failed of pti driver\n",
+			__func__, __LINE__);
+		pr_err("%s(%d): Error value returned: %d\n",
+			__func__, __LINE__, retval);
+		return retval;
+	}
+
+	retval = pci_enable_device(pdev);
+	if (retval != 0) {
+		dev_err(&pdev->dev,
+			"%s: pci_enable_device() returned error %d\n",
+			__func__, retval);
+		return retval;
+	}
+
+	drv_data = kzalloc(sizeof(*drv_data), GFP_KERNEL);
+
+	if (drv_data == NULL) {
+		retval = -ENOMEM;
+		dev_err(&pdev->dev,
+			"%s(%d): kmalloc() returned NULL memory.\n",
+			__func__, __LINE__);
+		return retval;
+	}
+	drv_data->pti_addr = pci_resource_start(pdev, pci_bar);
+
+	retval = pci_request_region(pdev, pci_bar, dev_name(&pdev->dev));
+	if (retval != 0) {
+		dev_err(&pdev->dev,
+			"%s(%d): pci_request_region() returned error %d\n",
+			__func__, __LINE__, retval);
+		kfree(drv_data);
+		return retval;
+	}
+	drv_data->aperture_base = drv_data->pti_addr+APERTURE_14;
+	drv_data->pti_ioaddr =
+		ioremap_nocache((u32)drv_data->aperture_base,
+		APERTURE_LEN);
+	if (!drv_data->pti_ioaddr) {
+		pci_release_region(pdev, pci_bar);
+		retval = -ENOMEM;
+		kfree(drv_data);
+		return retval;
+	}
+
+	pci_set_drvdata(pdev, drv_data);
+
+	tty_port_init(&drv_data->port);
+	drv_data->port.ops = &tty_port_ops;
+
+	tty_register_device(pti_tty_driver, 0, &pdev->dev);
+	tty_register_device(pti_tty_driver, 1, &pdev->dev);
+
+	register_console(&pti_console);
+
+	return retval;
+}
+
+static struct pci_driver pti_pci_driver = {
+	.name		= PCINAME,
+	.id_table	= pci_ids,
+	.probe		= pti_pci_probe,
+	.remove		= pti_pci_remove,
+};
+
+/**
+ *
+ * pti_init()- Overall entry/init call to the pti driver.
+ *             It starts the registration process with the kernel.
+ *
+ * Returns:
+ *	int __init, 0 for success
+ *	otherwise value is an error
+ *
+ */
+static int __init pti_init(void)
+{
+	int retval = -EINVAL;
+
+	/* First register module as tty device */
+
+	pti_tty_driver = alloc_tty_driver(1);
+	if (pti_tty_driver == NULL) {
+		pr_err("%s(%d): Memory allocation failed for ptiTTY driver\n",
+			__func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	pti_tty_driver->owner			= THIS_MODULE;
+	pti_tty_driver->magic			= TTY_DRIVER_MAGIC;
+	pti_tty_driver->driver_name		= DRIVERNAME;
+	pti_tty_driver->name			= TTYNAME;
+	pti_tty_driver->major			= 0;
+	pti_tty_driver->minor_start		= PTITTY_MINOR_START;
+	pti_tty_driver->minor_num		= PTITTY_MINOR_NUM;
+	pti_tty_driver->num			= PTITTY_MINOR_NUM;
+	pti_tty_driver->type			= TTY_DRIVER_TYPE_SYSTEM;
+	pti_tty_driver->subtype			= SYSTEM_TYPE_SYSCONS;
+	pti_tty_driver->flags			= TTY_DRIVER_REAL_RAW |
+						  TTY_DRIVER_DYNAMIC_DEV;
+	pti_tty_driver->init_termios		= tty_std_termios;
+
+	tty_set_operations(pti_tty_driver, &pti_tty_driver_ops);
+
+	retval = tty_register_driver(pti_tty_driver);
+	if (retval) {
+		pr_err("%s(%d): TTY registration failed of pti driver\n",
+			__func__, __LINE__);
+		pr_err("%s(%d): Error value returned: %d\n",
+			__func__, __LINE__, retval);
+
+		pti_tty_driver = NULL;
+		return retval;
+	}
+
+	retval = pci_register_driver(&pti_pci_driver);
+
+	if (retval) {
+		pr_err("%s(%d): PCI registration failed of pti driver\n",
+			__func__, __LINE__);
+		pr_err("%s(%d): Error value returned: %d\n",
+			__func__, __LINE__, retval);
+
+		tty_unregister_driver(pti_tty_driver);
+		pr_err("%s(%d): Unregistering TTY part of pti driver\n",
+			__func__, __LINE__);
+		pti_tty_driver = NULL;
+		return retval;
+	}
+
+	return retval;
+}
+
+/**
+ * pti_exit()- Unregisters this module as a tty and pci driver.
+ */
+static void __exit pti_exit(void)
+{
+	int retval;
+
+	tty_unregister_device(pti_tty_driver, 0);
+	tty_unregister_device(pti_tty_driver, 1);
+
+	retval = tty_unregister_driver(pti_tty_driver);
+	if (retval) {
+		pr_err("%s(%d): TTY unregistration failed of pti driver\n",
+			__func__, __LINE__);
+		pr_err("%s(%d): Error value returned: %d\n",
+			__func__, __LINE__, retval);
+	}
+
+	pci_unregister_driver(&pti_pci_driver);
+
+	retval = misc_deregister(&pti_char_driver);
+	if (retval) {
+		pr_err("%s(%d): CHAR unregistration failed of pti driver\n",
+			__func__, __LINE__);
+		pr_err("%s(%d): Error value returned: %d\n",
+			__func__, __LINE__, retval);
+	}
+
+	unregister_console(&pti_console);
+	return;
+}
+
+module_init(pti_init);
+module_exit(pti_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ken Mills, Jay Freyensee");
+MODULE_DESCRIPTION("PTI Driver");
+
diff --git a/include/linux/pti.h b/include/linux/pti.h
new file mode 100644
index 000000000000..81af667bb2d5
--- /dev/null
+++ b/include/linux/pti.h
@@ -0,0 +1,42 @@
+/*
+ *  Copyright (C) Intel 2011
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The PTI (Parallel Trace Interface) driver directs trace data routed from
+ * various parts in the system out through the Intel Penwell PTI port and
+ * out of the mobile device for analysis with a debugging tool
+ * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
+ * compact JTAG, standard.
+ *
+ * This header file will allow other parts of the OS to use the
+ * interface to write out it's contents for debugging a mobile system.
+ */
+
+#ifndef PTI_H_
+#define PTI_H_
+
+/* offset for last dword of any PTI message. Part of MIPI P1149.7 */
+#define PTI_LASTDWORD_DTS	0x30
+
+/* basic structure used as a write address to the PTI HW */
+struct pti_masterchannel {
+	u8 master;
+	u8 channel;
+};
+
+/* the following functions are defined in misc/pti.c */
+void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count);
+struct pti_masterchannel *pti_request_masterchannel(u8 type);
+void pti_release_masterchannel(struct pti_masterchannel *mc);
+
+#endif /*PTI_H_*/
-- 
cgit v1.2.3-71-gd317


From ee4f6b4b89665b92ead67deaa2e5d2ffa1af2b5f Mon Sep 17 00:00:00 2001
From: J Freyensee <james_p_freyensee@linux.intel.com>
Date: Fri, 6 May 2011 16:56:50 -0700
Subject: n_tracerouter and n_tracesink ldisc additions.

The n_tracerouter and n_tracesink line discpline drivers use the
Linux tty line discpline framework to route trace data coming
from a tty port (say UART for example) to the trace sink line
discipline driver and to another tty port(say USB).  Those
these two line discipline drivers can be used together,
independently from pti.c, they are part of the original
implementation solution of the MIPI P1149.7, compact JTAG, PTI
solution for Intel mobile platforms starting with the
Medfield platform.

Signed-off-by: J Freyensee <james_p_freyensee@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/Kconfig         |  31 ++++++
 drivers/tty/Makefile        |   2 +
 drivers/tty/n_tracerouter.c | 243 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/tty/n_tracesink.c   | 238 +++++++++++++++++++++++++++++++++++++++++++
 drivers/tty/n_tracesink.h   |  36 +++++++
 include/linux/tty.h         |   2 +
 6 files changed, 552 insertions(+)
 create mode 100644 drivers/tty/n_tracerouter.c
 create mode 100644 drivers/tty/n_tracesink.c
 create mode 100644 drivers/tty/n_tracesink.h

(limited to 'include/linux')

diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index 3fd7199301b6..bd7cc0527999 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -319,3 +319,34 @@ config N_GSM
 	  This line discipline provides support for the GSM MUX protocol and
 	  presents the mux as a set of 61 individual tty devices.
 
+config TRACE_ROUTER
+	tristate "Trace data router for MIPI P1149.7 cJTAG standard"
+	depends on TRACE_SINK
+	default n
+	help
+	  The trace router uses the Linux tty line discipline framework to
+	  route trace data coming from a tty port (say UART for example) to
+	  the trace sink line discipline driver and to another tty port (say
+	  USB). This is part of a solution for the MIPI P1149.7, compact JTAG,
+	  standard, which is for debugging mobile devices. The PTI driver in
+	  drivers/misc/pti.c defines the majority of this MIPI solution.
+
+	  You should select this driver if the target kernel is meant for
+	  a mobile device containing a modem.  Then you will need to select
+	  "Trace data sink for MIPI P1149.7 cJTAG standard" line discipline
+	  driver.
+
+config TRACE_SINK
+	tristate "Trace data sink for MIPI P1149.7 cJTAG standard"
+	default n
+	help
+	  The trace sink uses the Linux line discipline framework to receive
+	  trace data coming from the trace router line discipline driver
+	  to a user-defined tty port target, like USB.
+	  This is to provide a way to extract modem trace data on
+	  devices that do not have a PTI HW module, or just need modem
+	  trace data to come out of a different HW output port.
+	  This is part of a solution for the P1149.7, compact JTAG, standard.
+
+	  If you select this option, you need to select
+	  "Trace data router for MIPI P1149.7 cJTAG standard".
diff --git a/drivers/tty/Makefile b/drivers/tty/Makefile
index 690522fcb338..ea89b0bd15fe 100644
--- a/drivers/tty/Makefile
+++ b/drivers/tty/Makefile
@@ -6,6 +6,8 @@ obj-$(CONFIG_AUDIT)		+= tty_audit.o
 obj-$(CONFIG_MAGIC_SYSRQ)	+= sysrq.o
 obj-$(CONFIG_N_HDLC)		+= n_hdlc.o
 obj-$(CONFIG_N_GSM)		+= n_gsm.o
+obj-$(CONFIG_TRACE_ROUTER)	+= n_tracerouter.o
+obj-$(CONFIG_TRACE_SINK)	+= n_tracesink.o
 obj-$(CONFIG_R3964)		+= n_r3964.o
 
 obj-y				+= vt/
diff --git a/drivers/tty/n_tracerouter.c b/drivers/tty/n_tracerouter.c
new file mode 100644
index 000000000000..1f063d3aa32f
--- /dev/null
+++ b/drivers/tty/n_tracerouter.c
@@ -0,0 +1,243 @@
+/*
+ *  n_tracerouter.c - Trace data router through tty space
+ *
+ *  Copyright (C) Intel 2011
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This trace router uses the Linux line discipline framework to route
+ * trace data coming from a HW Modem to a PTI (Parallel Trace Module) port.
+ * The solution is not specific to a HW modem and this line disciple can
+ * be used to route any stream of data in kernel space.
+ * This is part of a solution for the P1149.7, compact JTAG, standard.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <linux/tty.h>
+#include <linux/tty_ldisc.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <asm-generic/bug.h>
+#include "n_tracesink.h"
+
+/*
+ * Other ldisc drivers use 65536 which basically means,
+ * 'I can always accept 64k' and flow control is off.
+ * This number is deemed appropriate for this driver.
+ */
+#define RECEIVE_ROOM	65536
+#define DRIVERNAME	"n_tracerouter"
+
+/*
+ * struct to hold private configuration data for this ldisc.
+ * opencalled is used to hold if this ldisc has been opened.
+ * kref_tty holds the tty reference the ldisc sits on top of.
+ */
+struct tracerouter_data {
+	u8 opencalled;
+	struct tty_struct *kref_tty;
+};
+static struct tracerouter_data *tr_data;
+
+/* lock for when tty reference is being used */
+static DEFINE_MUTEX(routelock);
+
+/**
+ * n_tracerouter_open() - Called when a tty is opened by a SW entity.
+ * @tty: terminal device to the ldisc.
+ *
+ * Return:
+ *      0 for success.
+ *
+ * Caveats: This should only be opened one time per SW entity.
+ */
+static int n_tracerouter_open(struct tty_struct *tty)
+{
+	int retval = -EEXIST;
+
+	mutex_lock(&routelock);
+	if (tr_data->opencalled == 0) {
+
+		tr_data->kref_tty = tty_kref_get(tty);
+		if (tr_data->kref_tty == NULL) {
+			retval = -EFAULT;
+		} else {
+			tr_data->opencalled = 1;
+			tty->disc_data      = tr_data;
+			tty->receive_room   = RECEIVE_ROOM;
+			tty_driver_flush_buffer(tty);
+			retval = 0;
+		}
+	}
+	mutex_unlock(&routelock);
+	return retval;
+}
+
+/**
+ * n_tracerouter_close() - close connection
+ * @tty: terminal device to the ldisc.
+ *
+ * Called when a software entity wants to close a connection.
+ */
+static void n_tracerouter_close(struct tty_struct *tty)
+{
+	struct tracerouter_data *tptr = tty->disc_data;
+
+	mutex_lock(&routelock);
+	WARN_ON(tptr->kref_tty != tr_data->kref_tty);
+	tty_driver_flush_buffer(tty);
+	tty_kref_put(tr_data->kref_tty);
+	tr_data->kref_tty = NULL;
+	tr_data->opencalled = 0;
+	tty->disc_data = NULL;
+	mutex_unlock(&routelock);
+}
+
+/**
+ * n_tracerouter_read() - read request from user space
+ * @tty:  terminal device passed into the ldisc.
+ * @file: pointer to open file object.
+ * @buf:  pointer to the data buffer that gets eventually returned.
+ * @nr:   number of bytes of the data buffer that is returned.
+ *
+ * function that allows read() functionality in userspace. By default if this
+ * is not implemented it returns -EIO. This module is functioning like a
+ * router via n_tracerouter_receivebuf(), and there is no real requirement
+ * to implement this function. However, an error return value other than
+ * -EIO should be used just to show that there was an intent not to have
+ * this function implemented.  Return value based on read() man pages.
+ *
+ * Return:
+ *	 -EINVAL
+ */
+static ssize_t n_tracerouter_read(struct tty_struct *tty, struct file *file,
+				  unsigned char __user *buf, size_t nr) {
+	return -EINVAL;
+}
+
+/**
+ * n_tracerouter_write() - Function that allows write() in userspace.
+ * @tty:  terminal device passed into the ldisc.
+ * @file: pointer to open file object.
+ * @buf:  pointer to the data buffer that gets eventually returned.
+ * @nr:   number of bytes of the data buffer that is returned.
+ *
+ * By default if this is not implemented, it returns -EIO.
+ * This should not be implemented, ever, because
+ * 1. this driver is functioning like a router via
+ *    n_tracerouter_receivebuf()
+ * 2. No writes to HW will ever go through this line discpline driver.
+ * However, an error return value other than -EIO should be used
+ * just to show that there was an intent not to have this function
+ * implemented.  Return value based on write() man pages.
+ *
+ * Return:
+ *	-EINVAL
+ */
+static ssize_t n_tracerouter_write(struct tty_struct *tty, struct file *file,
+				   const unsigned char *buf, size_t nr) {
+	return -EINVAL;
+}
+
+/**
+ * n_tracerouter_receivebuf() - Routing function for driver.
+ * @tty: terminal device passed into the ldisc.  It's assumed
+ *       tty will never be NULL.
+ * @cp:  buffer, block of characters to be eventually read by
+ *       someone, somewhere (user read() call or some kernel function).
+ * @fp:  flag buffer.
+ * @count: number of characters (aka, bytes) in cp.
+ *
+ * This function takes the input buffer, cp, and passes it to
+ * an external API function for processing.
+ */
+static void n_tracerouter_receivebuf(struct tty_struct *tty,
+					const unsigned char *cp,
+					char *fp, int count)
+{
+	mutex_lock(&routelock);
+	n_tracesink_datadrain((u8 *) cp, count);
+	mutex_unlock(&routelock);
+}
+
+/*
+ * Flush buffer is not impelemented as the ldisc has no internal buffering
+ * so the tty_driver_flush_buffer() is sufficient for this driver's needs.
+ */
+
+static struct tty_ldisc_ops tty_ptirouter_ldisc = {
+	.owner		= THIS_MODULE,
+	.magic		= TTY_LDISC_MAGIC,
+	.name		= DRIVERNAME,
+	.open		= n_tracerouter_open,
+	.close		= n_tracerouter_close,
+	.read		= n_tracerouter_read,
+	.write		= n_tracerouter_write,
+	.receive_buf	= n_tracerouter_receivebuf
+};
+
+/**
+ * n_tracerouter_init -	module initialisation
+ *
+ * Registers this module as a line discipline driver.
+ *
+ * Return:
+ *	0 for success, any other value error.
+ */
+static int __init n_tracerouter_init(void)
+{
+	int retval;
+
+	tr_data = kzalloc(sizeof(struct tracerouter_data), GFP_KERNEL);
+	if (tr_data == NULL)
+		return -ENOMEM;
+
+
+	/* Note N_TRACEROUTER is defined in linux/tty.h */
+	retval = tty_register_ldisc(N_TRACEROUTER, &tty_ptirouter_ldisc);
+	if (retval < 0) {
+		pr_err("%s: Registration failed: %d\n", __func__, retval);
+		kfree(tr_data);
+	}
+	return retval;
+}
+
+/**
+ * n_tracerouter_exit -	module unload
+ *
+ * Removes this module as a line discipline driver.
+ */
+static void __exit n_tracerouter_exit(void)
+{
+	int retval = tty_unregister_ldisc(N_TRACEROUTER);
+
+	if (retval < 0)
+		pr_err("%s: Unregistration failed: %d\n", __func__,  retval);
+	else
+		kfree(tr_data);
+}
+
+module_init(n_tracerouter_init);
+module_exit(n_tracerouter_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jay Freyensee");
+MODULE_ALIAS_LDISC(N_TRACEROUTER);
+MODULE_DESCRIPTION("Trace router ldisc driver");
diff --git a/drivers/tty/n_tracesink.c b/drivers/tty/n_tracesink.c
new file mode 100644
index 000000000000..ddce58b973d2
--- /dev/null
+++ b/drivers/tty/n_tracesink.c
@@ -0,0 +1,238 @@
+/*
+ *  n_tracesink.c - Trace data router and sink path through tty space.
+ *
+ *  Copyright (C) Intel 2011
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The trace sink uses the Linux line discipline framework to receive
+ * trace data coming from the PTI source line discipline driver
+ * to a user-desired tty port, like USB.
+ * This is to provide a way to extract modem trace data on
+ * devices that do not have a PTI HW module, or just need modem
+ * trace data to come out of a different HW output port.
+ * This is part of a solution for the P1149.7, compact JTAG, standard.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <linux/tty.h>
+#include <linux/tty_ldisc.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <asm-generic/bug.h>
+#include "n_tracesink.h"
+
+/*
+ * Other ldisc drivers use 65536 which basically means,
+ * 'I can always accept 64k' and flow control is off.
+ * This number is deemed appropriate for this driver.
+ */
+#define RECEIVE_ROOM	65536
+#define DRIVERNAME	"n_tracesink"
+
+/*
+ * there is a quirk with this ldisc is he can write data
+ * to a tty from anyone calling his kernel API, which
+ * meets customer requirements in the drivers/misc/pti.c
+ * project.  So he needs to know when he can and cannot write when
+ * the API is called. In theory, the API can be called
+ * after an init() but before a successful open() which
+ * would crash the system if tty is not checked.
+ */
+static struct tty_struct *this_tty;
+static DEFINE_MUTEX(writelock);
+
+/**
+ * n_tracesink_open() - Called when a tty is opened by a SW entity.
+ * @tty: terminal device to the ldisc.
+ *
+ * Return:
+ *      0 for success,
+ *      -EFAULT = couldn't get a tty kref n_tracesink will sit
+ *       on top of
+ *      -EEXIST = open() called successfully once and it cannot
+ *      be called again.
+ *
+ * Caveats: open() should only be successful the first time a
+ * SW entity calls it.
+ */
+static int n_tracesink_open(struct tty_struct *tty)
+{
+	int retval = -EEXIST;
+
+	mutex_lock(&writelock);
+	if (this_tty == NULL) {
+		this_tty = tty_kref_get(tty);
+		if (this_tty == NULL) {
+			retval = -EFAULT;
+		} else {
+			tty->disc_data = this_tty;
+			tty_driver_flush_buffer(tty);
+			retval = 0;
+		}
+	}
+	mutex_unlock(&writelock);
+
+	return retval;
+}
+
+/**
+ * n_tracesink_close() - close connection
+ * @tty: terminal device to the ldisc.
+ *
+ * Called when a software entity wants to close a connection.
+ */
+static void n_tracesink_close(struct tty_struct *tty)
+{
+	mutex_lock(&writelock);
+	tty_driver_flush_buffer(tty);
+	tty_kref_put(this_tty);
+	this_tty = NULL;
+	tty->disc_data = NULL;
+	mutex_unlock(&writelock);
+}
+
+/**
+ * n_tracesink_read() - read request from user space
+ * @tty:  terminal device passed into the ldisc.
+ * @file: pointer to open file object.
+ * @buf:  pointer to the data buffer that gets eventually returned.
+ * @nr:   number of bytes of the data buffer that is returned.
+ *
+ * function that allows read() functionality in userspace. By default if this
+ * is not implemented it returns -EIO. This module is functioning like a
+ * router via n_tracesink_receivebuf(), and there is no real requirement
+ * to implement this function. However, an error return value other than
+ * -EIO should be used just to show that there was an intent not to have
+ * this function implemented.  Return value based on read() man pages.
+ *
+ * Return:
+ *	 -EINVAL
+ */
+static ssize_t n_tracesink_read(struct tty_struct *tty, struct file *file,
+				unsigned char __user *buf, size_t nr) {
+	return -EINVAL;
+}
+
+/**
+ * n_tracesink_write() - Function that allows write() in userspace.
+ * @tty:  terminal device passed into the ldisc.
+ * @file: pointer to open file object.
+ * @buf:  pointer to the data buffer that gets eventually returned.
+ * @nr:   number of bytes of the data buffer that is returned.
+ *
+ * By default if this is not implemented, it returns -EIO.
+ * This should not be implemented, ever, because
+ * 1. this driver is functioning like a router via
+ *    n_tracesink_receivebuf()
+ * 2. No writes to HW will ever go through this line discpline driver.
+ * However, an error return value other than -EIO should be used
+ * just to show that there was an intent not to have this function
+ * implemented.  Return value based on write() man pages.
+ *
+ * Return:
+ *	-EINVAL
+ */
+static ssize_t n_tracesink_write(struct tty_struct *tty, struct file *file,
+				 const unsigned char *buf, size_t nr) {
+	return -EINVAL;
+}
+
+/**
+ * n_tracesink_datadrain() - Kernel API function used to route
+ *			     trace debugging data to user-defined
+ *			     port like USB.
+ *
+ * @buf:   Trace debuging data buffer to write to tty target
+ *         port. Null value will return with no write occurring.
+ * @count: Size of buf. Value of 0 or a negative number will
+ *         return with no write occuring.
+ *
+ * Caveat: If this line discipline does not set the tty it sits
+ * on top of via an open() call, this API function will not
+ * call the tty's write() call because it will have no pointer
+ * to call the write().
+ */
+void n_tracesink_datadrain(u8 *buf, int count)
+{
+	mutex_lock(&writelock);
+
+	if ((buf != NULL) && (count > 0) && (this_tty != NULL))
+		this_tty->ops->write(this_tty, buf, count);
+
+	mutex_unlock(&writelock);
+}
+EXPORT_SYMBOL_GPL(n_tracesink_datadrain);
+
+/*
+ * Flush buffer is not impelemented as the ldisc has no internal buffering
+ * so the tty_driver_flush_buffer() is sufficient for this driver's needs.
+ */
+
+/*
+ * tty_ldisc function operations for this driver.
+ */
+static struct tty_ldisc_ops tty_n_tracesink = {
+	.owner		= THIS_MODULE,
+	.magic		= TTY_LDISC_MAGIC,
+	.name		= DRIVERNAME,
+	.open		= n_tracesink_open,
+	.close		= n_tracesink_close,
+	.read		= n_tracesink_read,
+	.write		= n_tracesink_write
+};
+
+/**
+ * n_tracesink_init-	module initialisation
+ *
+ * Registers this module as a line discipline driver.
+ *
+ * Return:
+ *	0 for success, any other value error.
+ */
+static int __init n_tracesink_init(void)
+{
+	/* Note N_TRACESINK is defined in linux/tty.h */
+	int retval = tty_register_ldisc(N_TRACESINK, &tty_n_tracesink);
+
+	if (retval < 0)
+		pr_err("%s: Registration failed: %d\n", __func__, retval);
+
+	return retval;
+}
+
+/**
+ * n_tracesink_exit -	module unload
+ *
+ * Removes this module as a line discipline driver.
+ */
+static void __exit n_tracesink_exit(void)
+{
+	int retval = tty_unregister_ldisc(N_TRACESINK);
+
+	if (retval < 0)
+		pr_err("%s: Unregistration failed: %d\n", __func__,  retval);
+}
+
+module_init(n_tracesink_init);
+module_exit(n_tracesink_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jay Freyensee");
+MODULE_ALIAS_LDISC(N_TRACESINK);
+MODULE_DESCRIPTION("Trace sink ldisc driver");
diff --git a/drivers/tty/n_tracesink.h b/drivers/tty/n_tracesink.h
new file mode 100644
index 000000000000..a68bb44f1ef5
--- /dev/null
+++ b/drivers/tty/n_tracesink.h
@@ -0,0 +1,36 @@
+/*
+ *  n_tracesink.h - Kernel driver API to route trace data in kernel space.
+ *
+ *  Copyright (C) Intel 2011
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The PTI (Parallel Trace Interface) driver directs trace data routed from
+ * various parts in the system out through the Intel Penwell PTI port and
+ * out of the mobile device for analysis with a debugging tool
+ * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
+ * compact JTAG, standard.
+ *
+ * This header file is used by n_tracerouter to be able to send the
+ * data of it's tty port to the tty port this module sits.  This
+ * mechanism can also be used independent of the PTI module.
+ *
+ */
+
+#ifndef N_TRACESINK_H_
+#define N_TRACESINK_H_
+
+void n_tracesink_datadrain(u8 *buf, int count);
+
+#endif
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 4db4ca79f895..d6f05292e456 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -50,6 +50,8 @@
 #define N_CAIF		20      /* CAIF protocol for talking to modems */
 #define N_GSM0710	21	/* GSM 0710 Mux */
 #define N_TI_WL		22	/* for TI's WL BT, FM, GPS combo chips */
+#define N_TRACESINK	23	/* Trace data routing for MIPI P1149.7 */
+#define N_TRACEROUTER	24	/* Trace data routing for MIPI P1149.7 */
 
 /*
  * This character is the same as _POSIX_VDISABLE: it cannot be used as
-- 
cgit v1.2.3-71-gd317


From 8c414ff3f4dcdde228c6a668385218290d73a265 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sun, 8 May 2011 18:50:20 +0100
Subject: clocksource: convert footbridge to generic i8253 clocksource

Convert the footbridge isa-timer code to use generic i8253 clocksource.

Acked-by: John Stultz <john.stultz@linaro.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/i8253.h         | 15 ++++++++++++
 arch/arm/mach-footbridge/Kconfig     |  2 ++
 arch/arm/mach-footbridge/isa-timer.c | 45 ++++--------------------------------
 include/linux/clocksource.h          |  2 ++
 4 files changed, 23 insertions(+), 41 deletions(-)
 create mode 100644 arch/arm/include/asm/i8253.h

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/i8253.h b/arch/arm/include/asm/i8253.h
new file mode 100644
index 000000000000..70656b69d5ce
--- /dev/null
+++ b/arch/arm/include/asm/i8253.h
@@ -0,0 +1,15 @@
+#ifndef __ASMARM_I8253_H
+#define __ASMARM_I8253_H
+
+/* i8253A PIT registers */
+#define PIT_MODE	0x43
+#define PIT_CH0		0x40
+
+#define PIT_LATCH	((PIT_TICK_RATE + HZ / 2) / HZ)
+
+extern raw_spinlock_t i8253_lock;
+
+#define outb_pit	outb_p
+#define inb_pit		inb_p
+
+#endif
diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig
index bdd257921cfb..46adca068f2c 100644
--- a/arch/arm/mach-footbridge/Kconfig
+++ b/arch/arm/mach-footbridge/Kconfig
@@ -4,6 +4,7 @@ menu "Footbridge Implementations"
 
 config ARCH_CATS
 	bool "CATS"
+	select CLKSRC_I8253
 	select FOOTBRIDGE_HOST
 	select ISA
 	select ISA_DMA
@@ -59,6 +60,7 @@ config ARCH_EBSA285_HOST
 
 config ARCH_NETWINDER
 	bool "NetWinder"
+	select CLKSRC_I8253
 	select FOOTBRIDGE_HOST
 	select ISA
 	select ISA_DMA
diff --git a/arch/arm/mach-footbridge/isa-timer.c b/arch/arm/mach-footbridge/isa-timer.c
index 441c6ce0d555..7020f1a3feca 100644
--- a/arch/arm/mach-footbridge/isa-timer.c
+++ b/arch/arm/mach-footbridge/isa-timer.c
@@ -10,53 +10,16 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/io.h>
+#include <linux/spinlock.h>
 #include <linux/timex.h>
 
 #include <asm/irq.h>
-
+#include <asm/i8253.h>
 #include <asm/mach/time.h>
 
 #include "common.h"
 
-#define PIT_MODE	0x43
-#define PIT_CH0		0x40
-
-#define PIT_LATCH	((PIT_TICK_RATE + HZ / 2) / HZ)
-
-static cycle_t pit_read(struct clocksource *cs)
-{
-	unsigned long flags;
-	static int old_count;
-	static u32 old_jifs;
-	int count;
-	u32 jifs;
-
-	raw_local_irq_save(flags);
-
-	jifs = jiffies;
-	outb_p(0x00, PIT_MODE);		/* latch the count */
-	count = inb_p(PIT_CH0);		/* read the latched count */
-	count |= inb_p(PIT_CH0) << 8;
-
-	if (count > old_count && jifs == old_jifs)
-		count = old_count;
-
-	old_count = count;
-	old_jifs = jifs;
-
-	raw_local_irq_restore(flags);
-
-	count = (PIT_LATCH - 1) - count;
-
-	return (cycle_t)(jifs * PIT_LATCH) + count;
-}
-
-static struct clocksource pit_cs = {
-	.name		= "pit",
-	.rating		= 110,
-	.read		= pit_read,
-	.mask		= CLOCKSOURCE_MASK(32),
-};
+DEFINE_RAW_SPINLOCK(i8253_lock);
 
 static void pit_set_mode(enum clock_event_mode mode,
 	struct clock_event_device *evt)
@@ -121,7 +84,7 @@ static void __init isa_timer_init(void)
 	pit_ce.max_delta_ns = clockevent_delta2ns(0x7fff, &pit_ce);
 	pit_ce.min_delta_ns = clockevent_delta2ns(0x000f, &pit_ce);
 
-	clocksource_register_hz(&pit_cs, PIT_TICK_RATE);
+	clocksource_i8253_init();
 
 	setup_irq(pit_ce.irq, &pit_timer_irq);
 	clockevents_register_device(&pit_ce);
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index c37b21ad5a3b..f13469b3df86 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -341,4 +341,6 @@ static inline void update_vsyscall_tz(void)
 
 extern void timekeeping_notify(struct clocksource *clock);
 
+extern int clocksource_i8253_init(void);
+
 #endif /* _LINUX_CLOCKSOURCE_H */
-- 
cgit v1.2.3-71-gd317


From 9281b16caac1276817b77033c5b8a1f5ca30102c Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@suse.de>
Date: Sun, 24 Apr 2011 14:30:14 -0500
Subject: pata_cm64x: fix boot crash on parisc

The old IDE cmd64x checks the status of the CNTRL register to see if
the ports are enabled before probing them.  pata_cmd64x doesn't do
this, which causes a HPMC on parisc when it tries to poke at the
secondary port because apparently the BAR isn't wired up (and a
non-responding piece of memory causes a HPMC).

Fix this by porting the CNTRL register port detection logic from IDE
cmd64x.  In addition, following converns from Alan Cox, add a check to
see if a mobility electronics bridge is the immediate parent and forgo
the check if it is (prevents problems on hotplug controllers).

Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/ata/pata_cmd64x.c | 42 ++++++++++++++++++++++++++++++++++++++----
 include/linux/pci_ids.h   |  2 ++
 2 files changed, 40 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/pata_cmd64x.c b/drivers/ata/pata_cmd64x.c
index 905ff76d3cbb..7bafc16cf5e0 100644
--- a/drivers/ata/pata_cmd64x.c
+++ b/drivers/ata/pata_cmd64x.c
@@ -41,6 +41,9 @@
 enum {
 	CFR 		= 0x50,
 		CFR_INTR_CH0  = 0x04,
+	CNTRL		= 0x51,
+		CNTRL_CH0     = 0x04,
+		CNTRL_CH1     = 0x08,
 	CMDTIM 		= 0x52,
 	ARTTIM0 	= 0x53,
 	DRWTIM0 	= 0x54,
@@ -328,9 +331,19 @@ static int cmd64x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 			.port_ops = &cmd648_port_ops
 		}
 	};
-	const struct ata_port_info *ppi[] = { &cmd_info[id->driver_data], NULL };
-	u8 mrdmode;
+	const struct ata_port_info *ppi[] = { 
+		&cmd_info[id->driver_data],
+		&cmd_info[id->driver_data],
+		NULL
+	};
+	u8 mrdmode, reg;
 	int rc;
+	struct pci_dev *bridge = pdev->bus->self;
+	/* mobility split bridges don't report enabled ports correctly */
+	int port_ok = !(bridge && bridge->vendor ==
+			PCI_VENDOR_ID_MOBILITY_ELECTRONICS);
+	/* all (with exceptions below) apart from 643 have CNTRL_CH0 bit */
+	int cntrl_ch0_ok = (id->driver_data != 0);
 
 	rc = pcim_enable_device(pdev);
 	if (rc)
@@ -341,11 +354,18 @@ static int cmd64x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	if (pdev->device == PCI_DEVICE_ID_CMD_646) {
 		/* Does UDMA work ? */
-		if (pdev->revision > 4)
+		if (pdev->revision > 4) {
 			ppi[0] = &cmd_info[2];
+			ppi[1] = &cmd_info[2];
+		}
 		/* Early rev with other problems ? */
-		else if (pdev->revision == 1)
+		else if (pdev->revision == 1) {
 			ppi[0] = &cmd_info[3];
+			ppi[1] = &cmd_info[3];
+		}
+		/* revs 1,2 have no CNTRL_CH0 */
+		if (pdev->revision < 3)
+			cntrl_ch0_ok = 0;
 	}
 
 	pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 64);
@@ -354,6 +374,20 @@ static int cmd64x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	mrdmode |= 0x02;	/* Memory read line enable */
 	pci_write_config_byte(pdev, MRDMODE, mrdmode);
 
+	/* check for enabled ports */
+	pci_read_config_byte(pdev, CNTRL, &reg);
+	if (!port_ok)
+		dev_printk(KERN_NOTICE, &pdev->dev, "Mobility Bridge detected, ignoring CNTRL port enable/disable\n");
+	if (port_ok && cntrl_ch0_ok && !(reg & CNTRL_CH0)) {
+		dev_printk(KERN_NOTICE, &pdev->dev, "Primary port is disabled\n");
+		ppi[0] = &ata_dummy_port_info;
+		
+	}
+	if (port_ok && !(reg & CNTRL_CH1)) {
+		dev_printk(KERN_NOTICE, &pdev->dev, "Secondary port is disabled\n");
+		ppi[1] = &ata_dummy_port_info;
+	}
+
 	/* Force PIO 0 here.. */
 
 	/* PPC specific fixup copied from old driver */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 8abe8d78c4bf..8652a4fa3fe2 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -608,6 +608,8 @@
 #define PCI_DEVICE_ID_MATROX_G550	0x2527
 #define PCI_DEVICE_ID_MATROX_VIA	0x4536
 
+#define PCI_VENDOR_ID_MOBILITY_ELECTRONICS	0x14f2
+
 #define PCI_VENDOR_ID_CT		0x102c
 #define PCI_DEVICE_ID_CT_69000		0x00c0
 #define PCI_DEVICE_ID_CT_65545		0x00d8
-- 
cgit v1.2.3-71-gd317


From e1e8fb6a1ff3f9487e03a4cbf85b81d1316068ce Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 15 Apr 2011 03:02:49 +0000
Subject: fs: remove FS_COW_FL

FS_COW_FL and FS_NOCOW_FL were newly introduced to control per file
COW in btrfs, but FS_NOCOW_FL is sufficient.

The fact is we don't have corresponding BTRFS_INODE_COW flag.

COW is default, and FS_NOCOW_FL can be used to switch off COW for
a single file.

If we mount btrfs with nodatacow, a newly created file will be set with
the FS_NOCOW_FL flag. So to turn on COW for it, we can just clear the
FS_NOCOW_FL flag.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ioctl.c   | 15 ++++++---------
 include/linux/fs.h |  1 -
 2 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index f580a3a5d2fc..3240dd90da42 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -144,16 +144,13 @@ static int check_flags(unsigned int flags)
 	if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
 		      FS_NOATIME_FL | FS_NODUMP_FL | \
 		      FS_SYNC_FL | FS_DIRSYNC_FL | \
-		      FS_NOCOMP_FL | FS_COMPR_FL | \
-		      FS_NOCOW_FL | FS_COW_FL))
+		      FS_NOCOMP_FL | FS_COMPR_FL |
+		      FS_NOCOW_FL))
 		return -EOPNOTSUPP;
 
 	if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
 		return -EINVAL;
 
-	if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL))
-		return -EINVAL;
-
 	return 0;
 }
 
@@ -218,6 +215,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 		ip->flags |= BTRFS_INODE_DIRSYNC;
 	else
 		ip->flags &= ~BTRFS_INODE_DIRSYNC;
+	if (flags & FS_NOCOW_FL)
+		ip->flags |= BTRFS_INODE_NODATACOW;
+	else
+		ip->flags &= ~BTRFS_INODE_NODATACOW;
 
 	/*
 	 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
@@ -231,10 +232,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 		ip->flags |= BTRFS_INODE_COMPRESS;
 		ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
 	}
-	if (flags & FS_NOCOW_FL)
-		ip->flags |= BTRFS_INODE_NODATACOW;
-	else if (flags & FS_COW_FL)
-		ip->flags &= ~BTRFS_INODE_NODATACOW;
 
 	trans = btrfs_join_transaction(root, 1);
 	BUG_ON(IS_ERR(trans));
diff --git a/include/linux/fs.h b/include/linux/fs.h
index de9dd8119b71..56a41412903d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -365,7 +365,6 @@ struct inodes_stat_t {
 #define FS_EXTENT_FL			0x00080000 /* Extents */
 #define FS_DIRECTIO_FL			0x00100000 /* Use direct i/o */
 #define FS_NOCOW_FL			0x00800000 /* Do not cow file */
-#define FS_COW_FL			0x02000000 /* Cow file */
 #define FS_RESERVED_FL			0x80000000 /* reserved for ext2 lib */
 
 #define FS_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
-- 
cgit v1.2.3-71-gd317


From 4c3b512c6b054e8c0bf14b1d61b20d4569de0a21 Mon Sep 17 00:00:00 2001
From: Dave Martin <dave.martin@linaro.org>
Date: Mon, 18 Apr 2011 14:48:22 +0100
Subject: ARM: 6882/1: ELF: Define new core note type for VFP registers

The VFP registers are not currently included in coredumps,
and there's no existing note type where they can sensibly be
included, so this patch defines a dedicated note type for them.

Signed-off-by: Dave Martin <dave.martin@linaro.org>
Acked-by: Will Deacon <Will.Deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/elf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/elf.h b/include/linux/elf.h
index 4d608014753a..110821cb6ea5 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -395,6 +395,7 @@ typedef struct elf64_shdr {
 #define NT_S390_CTRS	0x304		/* s390 control registers */
 #define NT_S390_PREFIX	0x305		/* s390 prefix register */
 #define NT_S390_LAST_BREAK	0x306	/* s390 breaking event address */
+#define NT_ARM_VFP	0x400		/* ARM VFP/NEON registers */
 
 
 /* Note header in a PT_NOTE section */
-- 
cgit v1.2.3-71-gd317


From 86f315bbb2374f1f077500ad131dd9b71856e697 Mon Sep 17 00:00:00 2001
From: Chris Ball <cjb@laptop.org>
Date: Mon, 16 May 2011 11:32:26 -0400
Subject: Revert "mmc: fix a race between card-detect rescan and clock-gate
 work instances"

This reverts commit 26fc8775b51484d8c0a671198639c6d5ae60533e, which has
been reported to cause boot/resume-time crashes for some users:

https://bbs.archlinux.org/viewtopic.php?id=118751.

Signed-off-by: Chris Ball <cjb@laptop.org>
Cc: <stable@kernel.org>
---
 drivers/mmc/core/host.c  | 9 +++++----
 include/linux/mmc/host.h | 1 +
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 2b200c1cfbba..461e6a17fb90 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -94,7 +94,7 @@ static void mmc_host_clk_gate_delayed(struct mmc_host *host)
 		spin_unlock_irqrestore(&host->clk_lock, flags);
 		return;
 	}
-	mmc_claim_host(host);
+	mutex_lock(&host->clk_gate_mutex);
 	spin_lock_irqsave(&host->clk_lock, flags);
 	if (!host->clk_requests) {
 		spin_unlock_irqrestore(&host->clk_lock, flags);
@@ -104,7 +104,7 @@ static void mmc_host_clk_gate_delayed(struct mmc_host *host)
 		pr_debug("%s: gated MCI clock\n", mmc_hostname(host));
 	}
 	spin_unlock_irqrestore(&host->clk_lock, flags);
-	mmc_release_host(host);
+	mutex_unlock(&host->clk_gate_mutex);
 }
 
 /*
@@ -130,7 +130,7 @@ void mmc_host_clk_ungate(struct mmc_host *host)
 {
 	unsigned long flags;
 
-	mmc_claim_host(host);
+	mutex_lock(&host->clk_gate_mutex);
 	spin_lock_irqsave(&host->clk_lock, flags);
 	if (host->clk_gated) {
 		spin_unlock_irqrestore(&host->clk_lock, flags);
@@ -140,7 +140,7 @@ void mmc_host_clk_ungate(struct mmc_host *host)
 	}
 	host->clk_requests++;
 	spin_unlock_irqrestore(&host->clk_lock, flags);
-	mmc_release_host(host);
+	mutex_unlock(&host->clk_gate_mutex);
 }
 
 /**
@@ -215,6 +215,7 @@ static inline void mmc_host_clk_init(struct mmc_host *host)
 	host->clk_gated = false;
 	INIT_WORK(&host->clk_gate_work, mmc_host_clk_gate_work);
 	spin_lock_init(&host->clk_lock);
+	mutex_init(&host->clk_gate_mutex);
 }
 
 /**
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index eb792cb6d745..bcb793ec7374 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -183,6 +183,7 @@ struct mmc_host {
 	struct work_struct	clk_gate_work; /* delayed clock gate */
 	unsigned int		clk_old;	/* old clock value cache */
 	spinlock_t		clk_lock;	/* lock for clk fields */
+	struct mutex		clk_gate_mutex;	/* mutex for clock gating */
 #endif
 
 	/* host specific block data */
-- 
cgit v1.2.3-71-gd317


From 8d38d74b648513dd8ed8bd2b67d899208ef4e09e Mon Sep 17 00:00:00 2001
From: Chen Gong <gong.chen@linux.intel.com>
Date: Mon, 16 May 2011 10:58:57 -0700
Subject: pstore: fix one type of return value in pstore

the return type of function _read_ in pstore is size_t,
but in the callback function of _read_, the logic doesn't
consider it too much, which means if negative value (assuming
error here) is returned, it will be converted to positive because
of type casting. ssize_t is enough for this function.

Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/apei/erst.c | 4 ++--
 fs/pstore/platform.c     | 4 ++--
 include/linux/pstore.h   | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index d6cb0ff6988e..d5a89d067f98 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -929,7 +929,7 @@ static int erst_check_table(struct acpi_table_erst *erst_tab)
 	return 0;
 }
 
-static size_t erst_reader(u64 *id, enum pstore_type_id *type,
+static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
 		       struct timespec *time);
 static u64 erst_writer(enum pstore_type_id type, size_t size);
 
@@ -957,7 +957,7 @@ struct cper_pstore_record {
 	char data[];
 } __packed;
 
-static size_t erst_reader(u64 *id, enum pstore_type_id *type,
+static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
 		       struct timespec *time)
 {
 	int rc;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index f835a25625ff..912403c2a93d 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -152,7 +152,7 @@ EXPORT_SYMBOL_GPL(pstore_register);
 void pstore_get_records(void)
 {
 	struct pstore_info *psi = psinfo;
-	size_t			size;
+	ssize_t			size;
 	u64			id;
 	enum pstore_type_id	type;
 	struct timespec		time;
@@ -163,7 +163,7 @@ void pstore_get_records(void)
 
 	mutex_lock(&psinfo->buf_mutex);
 	while ((size = psi->read(&id, &type, &time)) > 0) {
-		if (pstore_mkfile(type, psi->name, id, psi->buf, size,
+		if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
 				  time, psi->erase))
 			failed++;
 	}
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 41977737bb7d..14ce2f5d08af 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -35,7 +35,7 @@ struct pstore_info {
 	struct mutex	buf_mutex;	/* serialize access to 'buf' */
 	char		*buf;
 	size_t		bufsize;
-	size_t		(*read)(u64 *id, enum pstore_type_id *type,
+	ssize_t		(*read)(u64 *id, enum pstore_type_id *type,
 			struct timespec *time);
 	u64		(*write)(enum pstore_type_id type, size_t size);
 	int		(*erase)(u64 id);
-- 
cgit v1.2.3-71-gd317


From 06cf91b4b4aafa50ee0a94c81d2c6922a18af242 Mon Sep 17 00:00:00 2001
From: Chen Gong <gong.chen@linux.intel.com>
Date: Mon, 16 May 2011 11:00:27 -0700
Subject: pstore: fix pstore filesystem mount/remount issue

Currently after mount/remount operation on pstore filesystem,
the content on pstore will be lost. It is because current ERST
implementation doesn't support multi-user usage, which moves
internal pointer to the end after accessing it. Adding
multi-user support for pstore usage.

Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/apei/erst.c | 48 +++++++++++++++++++++++++++++++++++-------------
 fs/pstore/platform.c     |  8 +++++++-
 include/linux/pstore.h   |  2 ++
 3 files changed, 44 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index d5a89d067f98..ddb68c4f8d3e 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -929,6 +929,8 @@ static int erst_check_table(struct acpi_table_erst *erst_tab)
 	return 0;
 }
 
+static int erst_open_pstore(struct pstore_info *psi);
+static int erst_close_pstore(struct pstore_info *psi);
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
 		       struct timespec *time);
 static u64 erst_writer(enum pstore_type_id type, size_t size);
@@ -936,6 +938,8 @@ static u64 erst_writer(enum pstore_type_id type, size_t size);
 static struct pstore_info erst_info = {
 	.owner		= THIS_MODULE,
 	.name		= "erst",
+	.open		= erst_open_pstore,
+	.close		= erst_close_pstore,
 	.read		= erst_reader,
 	.write		= erst_writer,
 	.erase		= erst_clear
@@ -957,12 +961,32 @@ struct cper_pstore_record {
 	char data[];
 } __packed;
 
+static int reader_pos;
+
+static int erst_open_pstore(struct pstore_info *psi)
+{
+	int rc;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	rc = erst_get_record_id_begin(&reader_pos);
+
+	return rc;
+}
+
+static int erst_close_pstore(struct pstore_info *psi)
+{
+	erst_get_record_id_end();
+
+	return 0;
+}
+
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
 		       struct timespec *time)
 {
 	int rc;
-	ssize_t len;
-	unsigned long flags;
+	ssize_t len = 0;
 	u64 record_id;
 	struct cper_pstore_record *rcd = (struct cper_pstore_record *)
 					(erst_info.buf - sizeof(*rcd));
@@ -970,24 +994,21 @@ static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
 	if (erst_disable)
 		return -ENODEV;
 
-	raw_spin_lock_irqsave(&erst_lock, flags);
 skip:
-	rc = __erst_get_next_record_id(&record_id);
-	if (rc) {
-		raw_spin_unlock_irqrestore(&erst_lock, flags);
-		return rc;
-	}
+	rc = erst_get_record_id_next(&reader_pos, &record_id);
+	if (rc)
+		goto out;
+
 	/* no more record */
 	if (record_id == APEI_ERST_INVALID_RECORD_ID) {
-		raw_spin_unlock_irqrestore(&erst_lock, flags);
-		return 0;
+		rc = -1;
+		goto out;
 	}
 
-	len = __erst_read(record_id, &rcd->hdr, sizeof(*rcd) +
+	len = erst_read(record_id, &rcd->hdr, sizeof(*rcd) +
 			  erst_erange.size);
 	if (uuid_le_cmp(rcd->hdr.creator_id, CPER_CREATOR_PSTORE) != 0)
 		goto skip;
-	raw_spin_unlock_irqrestore(&erst_lock, flags);
 
 	*id = record_id;
 	if (uuid_le_cmp(rcd->sec_hdr.section_type,
@@ -1005,7 +1026,8 @@ skip:
 		time->tv_sec = 0;
 	time->tv_nsec = 0;
 
-	return len - sizeof(*rcd);
+out:
+	return (rc < 0) ? rc : (len - sizeof(*rcd));
 }
 
 static u64 erst_writer(enum pstore_type_id type, size_t size)
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 912403c2a93d..f2c3ff20ea68 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -156,17 +156,23 @@ void pstore_get_records(void)
 	u64			id;
 	enum pstore_type_id	type;
 	struct timespec		time;
-	int			failed = 0;
+	int			failed = 0, rc;
 
 	if (!psi)
 		return;
 
 	mutex_lock(&psinfo->buf_mutex);
+	rc = psi->open(psi);
+	if (rc)
+		goto out;
+
 	while ((size = psi->read(&id, &type, &time)) > 0) {
 		if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
 				  time, psi->erase))
 			failed++;
 	}
+	psi->close(psi);
+out:
 	mutex_unlock(&psinfo->buf_mutex);
 
 	if (failed)
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 14ce2f5d08af..2455ef2683f0 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -35,6 +35,8 @@ struct pstore_info {
 	struct mutex	buf_mutex;	/* serialize access to 'buf' */
 	char		*buf;
 	size_t		bufsize;
+	int		(*open)(struct pstore_info *psi);
+	int		(*close)(struct pstore_info *psi);
 	ssize_t		(*read)(u64 *id, enum pstore_type_id *type,
 			struct timespec *time);
 	u64		(*write)(enum pstore_type_id type, size_t size);
-- 
cgit v1.2.3-71-gd317


From 7527a782e187d1214a5b3dc2897ce441033bb4ef Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 13 May 2011 10:58:57 +0200
Subject: cfg80211: advertise possible interface combinations

Add the ability to advertise interface combinations in nl80211.
This allows the driver to indicate what the combinations are
that it supports. "Combinations" of just a single interface are
implicit, as previously. Note that cfg80211 will enforce that
the restrictions are met, but not for all drivers yet (once all
drivers are updated, we can remove the flag and enforce for all).

When no combinations are actually supported, an empty list will
be exported so that userspace can know if the kernel exported
this info or not (although it isn't clear to me what tools using
the info should do if the kernel didn't export it).

Since some interface types are purely virtual/software and don't
fit the restrictions, those are exposed in a new list of pure SW
types, not subject to restrictions. This mainly exists to handle
AP-VLAN and monitor interfaces in mac80211.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 122 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/net/cfg80211.h  |  88 ++++++++++++++++++++++++++++++++++
 net/mac80211/main.c     |  16 +++++--
 net/wireless/core.c     |  69 +++++++++++++++++++++++++++
 net/wireless/core.h     |  11 +++++
 net/wireless/nl80211.c  | 105 +++++++++++++++++++++++++++++++++++------
 net/wireless/util.c     |  80 +++++++++++++++++++++++++++++++
 7 files changed, 472 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 281a2bb6a6ec..0ea497cb607c 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -76,6 +76,39 @@
  * below.
  */
 
+/**
+ * DOC: Virtual interface / concurrency capabilities
+ *
+ * Some devices are able to operate with virtual MACs, they can have
+ * more than one virtual interface. The capability handling for this
+ * is a bit complex though, as there may be a number of restrictions
+ * on the types of concurrency that are supported.
+ *
+ * To start with, each device supports the interface types listed in
+ * the %NL80211_ATTR_SUPPORTED_IFTYPES attribute, but by listing the
+ * types there no concurrency is implied.
+ *
+ * Once concurrency is desired, more attributes must be observed:
+ * To start with, since some interface types are purely managed in
+ * software, like the AP-VLAN type in mac80211 for example, there's
+ * an additional list of these, they can be added at any time and
+ * are only restricted by some semantic restrictions (e.g. AP-VLAN
+ * cannot be added without a corresponding AP interface). This list
+ * is exported in the %NL80211_ATTR_SOFTWARE_IFTYPES attribute.
+ *
+ * Further, the list of supported combinations is exported. This is
+ * in the %NL80211_ATTR_INTERFACE_COMBINATIONS attribute. Basically,
+ * it exports a list of "groups", and at any point in time the
+ * interfaces that are currently active must fall into any one of
+ * the advertised groups. Within each group, there are restrictions
+ * on the number of interfaces of different types that are supported
+ * and also the number of different channels, along with potentially
+ * some other restrictions. See &enum nl80211_if_combination_attrs.
+ *
+ * All together, these attributes define the concurrency of virtual
+ * interfaces that a given device supports.
+ */
+
 /**
  * enum nl80211_commands - supported nl80211 commands
  *
@@ -954,6 +987,14 @@ enum nl80211_commands {
  * @NL80211_ATTR_SCHED_SCAN_INTERVAL: Interval between scheduled scan
  *	cycles, in msecs.
  *
+ * @NL80211_ATTR_INTERFACE_COMBINATIONS: Nested attribute listing the supported
+ *	interface combinations. In each nested item, it contains attributes
+ *	defined in &enum nl80211_if_combination_attrs.
+ * @NL80211_ATTR_SOFTWARE_IFTYPES: Nested attribute (just like
+ *	%NL80211_ATTR_SUPPORTED_IFTYPES) containing the interface types that
+ *	are managed in software: interfaces of these types aren't subject to
+ *	any restrictions in their number or combinations.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1149,6 +1190,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_SCHED_SCAN_INTERVAL,
 
+	NL80211_ATTR_INTERFACE_COMBINATIONS,
+	NL80211_ATTR_SOFTWARE_IFTYPES,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1201,7 +1245,9 @@ enum nl80211_attrs {
  * @NL80211_IFTYPE_ADHOC: independent BSS member
  * @NL80211_IFTYPE_STATION: managed BSS member
  * @NL80211_IFTYPE_AP: access point
- * @NL80211_IFTYPE_AP_VLAN: VLAN interface for access points
+ * @NL80211_IFTYPE_AP_VLAN: VLAN interface for access points; VLAN interfaces
+ *	are a bit special in that they must always be tied to a pre-existing
+ *	AP type interface.
  * @NL80211_IFTYPE_WDS: wireless distribution interface
  * @NL80211_IFTYPE_MONITOR: monitor interface receiving all frames
  * @NL80211_IFTYPE_MESH_POINT: mesh point
@@ -2206,4 +2252,78 @@ enum nl80211_wowlan_triggers {
 	MAX_NL80211_WOWLAN_TRIG = NUM_NL80211_WOWLAN_TRIG - 1
 };
 
+/**
+ * enum nl80211_iface_limit_attrs - limit attributes
+ * @NL80211_IFACE_LIMIT_UNSPEC: (reserved)
+ * @NL80211_IFACE_LIMIT_MAX: maximum number of interfaces that
+ *	can be chosen from this set of interface types (u32)
+ * @NL80211_IFACE_LIMIT_TYPES: nested attribute containing a
+ *	flag attribute for each interface type in this set
+ * @NUM_NL80211_IFACE_LIMIT: number of attributes
+ * @MAX_NL80211_IFACE_LIMIT: highest attribute number
+ */
+enum nl80211_iface_limit_attrs {
+	NL80211_IFACE_LIMIT_UNSPEC,
+	NL80211_IFACE_LIMIT_MAX,
+	NL80211_IFACE_LIMIT_TYPES,
+
+	/* keep last */
+	NUM_NL80211_IFACE_LIMIT,
+	MAX_NL80211_IFACE_LIMIT = NUM_NL80211_IFACE_LIMIT - 1
+};
+
+/**
+ * enum nl80211_if_combination_attrs -- interface combination attributes
+ *
+ * @NL80211_IFACE_COMB_UNSPEC: (reserved)
+ * @NL80211_IFACE_COMB_LIMITS: Nested attributes containing the limits
+ *	for given interface types, see &enum nl80211_iface_limit_attrs.
+ * @NL80211_IFACE_COMB_MAXNUM: u32 attribute giving the total number of
+ *	interfaces that can be created in this group. This number doesn't
+ *	apply to interfaces purely managed in software, which are listed
+ *	in a separate attribute %NL80211_ATTR_INTERFACES_SOFTWARE.
+ * @NL80211_IFACE_COMB_STA_AP_BI_MATCH: flag attribute specifying that
+ *	beacon intervals within this group must be all the same even for
+ *	infrastructure and AP/GO combinations, i.e. the GO(s) must adopt
+ *	the infrastructure network's beacon interval.
+ * @NL80211_IFACE_COMB_NUM_CHANNELS: u32 attribute specifying how many
+ *	different channels may be used within this group.
+ * @NUM_NL80211_IFACE_COMB: number of attributes
+ * @MAX_NL80211_IFACE_COMB: highest attribute number
+ *
+ * Examples:
+ *	limits = [ #{STA} <= 1, #{AP} <= 1 ], matching BI, channels = 1, max = 2
+ *	=> allows an AP and a STA that must match BIs
+ *
+ *	numbers = [ #{AP, P2P-GO} <= 8 ], channels = 1, max = 8
+ *	=> allows 8 of AP/GO
+ *
+ *	numbers = [ #{STA} <= 2 ], channels = 2, max = 2
+ *	=> allows two STAs on different channels
+ *
+ *	numbers = [ #{STA} <= 1, #{P2P-client,P2P-GO} <= 3 ], max = 4
+ *	=> allows a STA plus three P2P interfaces
+ *
+ * The list of these four possiblities could completely be contained
+ * within the %NL80211_ATTR_INTERFACE_COMBINATIONS attribute to indicate
+ * that any of these groups must match.
+ *
+ * "Combinations" of just a single interface will not be listed here,
+ * a single interface of any valid interface type is assumed to always
+ * be possible by itself. This means that implicitly, for each valid
+ * interface type, the following group always exists:
+ *	numbers = [ #{<type>} <= 1 ], channels = 1, max = 1
+ */
+enum nl80211_if_combination_attrs {
+	NL80211_IFACE_COMB_UNSPEC,
+	NL80211_IFACE_COMB_LIMITS,
+	NL80211_IFACE_COMB_MAXNUM,
+	NL80211_IFACE_COMB_STA_AP_BI_MATCH,
+	NL80211_IFACE_COMB_NUM_CHANNELS,
+
+	/* keep last */
+	NUM_NL80211_IFACE_COMB,
+	MAX_NL80211_IFACE_COMB = NUM_NL80211_IFACE_COMB - 1
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e1f1b41f7b13..04afcfb9eaf4 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1547,6 +1547,10 @@ struct cfg80211_ops {
  *	hints read the documenation for regulatory_hint_found_beacon()
  * @WIPHY_FLAG_NETNS_OK: if not set, do not allow changing the netns of this
  *	wiphy at all
+ * @WIPHY_FLAG_ENFORCE_COMBINATIONS: Set this flag to enforce interface
+ *	combinations for this device. This flag is used for backward
+ *	compatibility only until all drivers advertise combinations and
+ *	they will always be enforced.
  * @WIPHY_FLAG_PS_ON_BY_DEFAULT: if set to true, powersave will be enabled
  *	by default -- this flag will be set depending on the kernel's default
  *	on wiphy_new(), but can be changed by the driver if it has a good
@@ -1574,6 +1578,81 @@ enum wiphy_flags {
 	WIPHY_FLAG_IBSS_RSN			= BIT(8),
 	WIPHY_FLAG_MESH_AUTH			= BIT(10),
 	WIPHY_FLAG_SUPPORTS_SCHED_SCAN		= BIT(11),
+	WIPHY_FLAG_ENFORCE_COMBINATIONS		= BIT(12),
+};
+
+/**
+ * struct ieee80211_iface_limit - limit on certain interface types
+ * @max: maximum number of interfaces of these types
+ * @types: interface types (bits)
+ */
+struct ieee80211_iface_limit {
+	u16 max;
+	u16 types;
+};
+
+/**
+ * struct ieee80211_iface_combination - possible interface combination
+ * @limits: limits for the given interface types
+ * @n_limits: number of limitations
+ * @num_different_channels: can use up to this many different channels
+ * @max_interfaces: maximum number of interfaces in total allowed in this
+ *	group
+ * @beacon_int_infra_match: In this combination, the beacon intervals
+ *	between infrastructure and AP types must match. This is required
+ *	only in special cases.
+ *
+ * These examples can be expressed as follows:
+ *
+ * Allow #STA <= 1, #AP <= 1, matching BI, channels = 1, 2 total:
+ *
+ *  struct ieee80211_iface_limit limits1[] = {
+ *	{ .max = 1, .types = BIT(NL80211_IFTYPE_STATION), },
+ *	{ .max = 1, .types = BIT(NL80211_IFTYPE_AP}, },
+ *  };
+ *  struct ieee80211_iface_combination combination1 = {
+ *	.limits = limits1,
+ *	.n_limits = ARRAY_SIZE(limits1),
+ *	.max_interfaces = 2,
+ *	.beacon_int_infra_match = true,
+ *  };
+ *
+ *
+ * Allow #{AP, P2P-GO} <= 8, channels = 1, 8 total:
+ *
+ *  struct ieee80211_iface_limit limits2[] = {
+ *	{ .max = 8, .types = BIT(NL80211_IFTYPE_AP) |
+ *			     BIT(NL80211_IFTYPE_P2P_GO), },
+ *  };
+ *  struct ieee80211_iface_combination combination2 = {
+ *	.limits = limits2,
+ *	.n_limits = ARRAY_SIZE(limits2),
+ *	.max_interfaces = 8,
+ *	.num_different_channels = 1,
+ *  };
+ *
+ *
+ * Allow #STA <= 1, #{P2P-client,P2P-GO} <= 3 on two channels, 4 total.
+ * This allows for an infrastructure connection and three P2P connections.
+ *
+ *  struct ieee80211_iface_limit limits3[] = {
+ *	{ .max = 1, .types = BIT(NL80211_IFTYPE_STATION), },
+ *	{ .max = 3, .types = BIT(NL80211_IFTYPE_P2P_GO) |
+ *			     BIT(NL80211_IFTYPE_P2P_CLIENT), },
+ *  };
+ *  struct ieee80211_iface_combination combination3 = {
+ *	.limits = limits3,
+ *	.n_limits = ARRAY_SIZE(limits3),
+ *	.max_interfaces = 4,
+ *	.num_different_channels = 2,
+ *  };
+ */
+struct ieee80211_iface_combination {
+	const struct ieee80211_iface_limit *limits;
+	u32 num_different_channels;
+	u16 max_interfaces;
+	u8 n_limits;
+	bool beacon_int_infra_match;
 };
 
 struct mac_address {
@@ -1653,6 +1732,11 @@ struct wiphy_wowlan_support {
  * @priv: driver private data (sized according to wiphy_new() parameter)
  * @interface_modes: bitmask of interfaces types valid for this wiphy,
  *	must be set by driver
+ * @iface_combinations: Valid interface combinations array, should not
+ *	list single interface types.
+ * @n_iface_combinations: number of entries in @iface_combinations array.
+ * @software_iftypes: bitmask of software interface types, these are not
+ *	subject to any restrictions since they are purely managed in SW.
  * @flags: wiphy flags, see &enum wiphy_flags
  * @bss_priv_size: each BSS struct has private data allocated with it,
  *	this variable determines its size
@@ -1697,6 +1781,10 @@ struct wiphy {
 
 	const struct ieee80211_txrx_stypes *mgmt_stypes;
 
+	const struct ieee80211_iface_combination *iface_combinations;
+	int n_iface_combinations;
+	u16 software_iftypes;
+
 	u16 n_addresses;
 
 	/* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 7f89011fa22d..79a2281678bf 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -685,7 +685,7 @@ EXPORT_SYMBOL(ieee80211_alloc_hw);
 int ieee80211_register_hw(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	int result;
+	int result, i;
 	enum ieee80211_band band;
 	int channels, max_bitrates;
 	bool supp_ht;
@@ -743,11 +743,19 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		return -ENOMEM;
 
 	/* if low-level driver supports AP, we also support VLAN */
-	if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP))
-		local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN);
+	if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP)) {
+		hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN);
+		hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_AP_VLAN);
+	}
 
 	/* mac80211 always supports monitor */
-	local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR);
+	hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR);
+	hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_MONITOR);
+
+	/* mac80211 doesn't support more than 1 channel */
+	for (i = 0; i < hw->wiphy->n_iface_combinations; i++)
+		if (hw->wiphy->iface_combinations[i].num_different_channels > 1)
+			return -EINVAL;
 
 #ifndef CONFIG_MAC80211_MESH
 	/* mesh depends on Kconfig, but drivers should set it if they want */
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 18b002f16860..c22ef3492ee6 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -416,6 +416,67 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 }
 EXPORT_SYMBOL(wiphy_new);
 
+static int wiphy_verify_combinations(struct wiphy *wiphy)
+{
+	const struct ieee80211_iface_combination *c;
+	int i, j;
+
+	/* If we have combinations enforce them */
+	if (wiphy->n_iface_combinations)
+		wiphy->flags |= WIPHY_FLAG_ENFORCE_COMBINATIONS;
+
+	for (i = 0; i < wiphy->n_iface_combinations; i++) {
+		u32 cnt = 0;
+		u16 all_iftypes = 0;
+
+		c = &wiphy->iface_combinations[i];
+
+		/* Combinations with just one interface aren't real */
+		if (WARN_ON(c->max_interfaces < 2))
+			return -EINVAL;
+
+		/* Need at least one channel */
+		if (WARN_ON(!c->num_different_channels))
+			return -EINVAL;
+
+		if (WARN_ON(!c->n_limits))
+			return -EINVAL;
+
+		for (j = 0; j < c->n_limits; j++) {
+			u16 types = c->limits[j].types;
+
+			/*
+			 * interface types shouldn't overlap, this is
+			 * used in cfg80211_can_change_interface()
+			 */
+			if (WARN_ON(types & all_iftypes))
+				return -EINVAL;
+			all_iftypes |= types;
+
+			if (WARN_ON(!c->limits[j].max))
+				return -EINVAL;
+
+			/* Shouldn't list software iftypes in combinations! */
+			if (WARN_ON(wiphy->software_iftypes & types))
+				return -EINVAL;
+
+			cnt += c->limits[j].max;
+			/*
+			 * Don't advertise an unsupported type
+			 * in a combination.
+			 */
+			if (WARN_ON((wiphy->interface_modes & types) != types))
+				return -EINVAL;
+		}
+
+		/* You can't even choose that many! */
+		if (WARN_ON(cnt < c->max_interfaces))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 int wiphy_register(struct wiphy *wiphy)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
@@ -444,6 +505,10 @@ int wiphy_register(struct wiphy *wiphy)
 	if (WARN_ON(ifmodes != wiphy->interface_modes))
 		wiphy->interface_modes = ifmodes;
 
+	res = wiphy_verify_combinations(wiphy);
+	if (res)
+		return res;
+
 	/* sanity check supported bands/channels */
 	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
 		sband = wiphy->bands[band];
@@ -698,6 +763,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 	struct net_device *dev = ndev;
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev;
+	int ret;
 
 	if (!wdev)
 		return NOTIFY_DONE;
@@ -893,6 +959,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			return notifier_from_errno(-EOPNOTSUPP);
 		if (rfkill_blocked(rdev->rfkill))
 			return notifier_from_errno(-ERFKILL);
+		ret = cfg80211_can_add_interface(rdev, wdev->iftype);
+		if (ret)
+			return notifier_from_errno(ret);
 		break;
 	}
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index d4b8f4c0bbbb..bf0fb40e3c8b 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -422,6 +422,17 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 			  u32 *flags, struct vif_params *params);
 void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
 
+int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
+				  struct wireless_dev *wdev,
+				  enum nl80211_iftype iftype);
+
+static inline int
+cfg80211_can_add_interface(struct cfg80211_registered_device *rdev,
+			   enum nl80211_iftype iftype)
+{
+	return cfg80211_can_change_interface(rdev, NULL, iftype);
+}
+
 struct ieee80211_channel *
 rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
 		  int freq, enum nl80211_channel_type channel_type);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9ef8e287d61b..beac296b1fde 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -564,6 +564,88 @@ static int nl80211_key_allowed(struct wireless_dev *wdev)
 	return 0;
 }
 
+static int nl80211_put_iftypes(struct sk_buff *msg, u32 attr, u16 ifmodes)
+{
+	struct nlattr *nl_modes = nla_nest_start(msg, attr);
+	int i;
+
+	if (!nl_modes)
+		goto nla_put_failure;
+
+	i = 0;
+	while (ifmodes) {
+		if (ifmodes & 1)
+			NLA_PUT_FLAG(msg, i);
+		ifmodes >>= 1;
+		i++;
+	}
+
+	nla_nest_end(msg, nl_modes);
+	return 0;
+
+nla_put_failure:
+	return -ENOBUFS;
+}
+
+static int nl80211_put_iface_combinations(struct wiphy *wiphy,
+					  struct sk_buff *msg)
+{
+	struct nlattr *nl_combis;
+	int i, j;
+
+	nl_combis = nla_nest_start(msg,
+				NL80211_ATTR_INTERFACE_COMBINATIONS);
+	if (!nl_combis)
+		goto nla_put_failure;
+
+	for (i = 0; i < wiphy->n_iface_combinations; i++) {
+		const struct ieee80211_iface_combination *c;
+		struct nlattr *nl_combi, *nl_limits;
+
+		c = &wiphy->iface_combinations[i];
+
+		nl_combi = nla_nest_start(msg, i + 1);
+		if (!nl_combi)
+			goto nla_put_failure;
+
+		nl_limits = nla_nest_start(msg, NL80211_IFACE_COMB_LIMITS);
+		if (!nl_limits)
+			goto nla_put_failure;
+
+		for (j = 0; j < c->n_limits; j++) {
+			struct nlattr *nl_limit;
+
+			nl_limit = nla_nest_start(msg, j + 1);
+			if (!nl_limit)
+				goto nla_put_failure;
+			NLA_PUT_U32(msg, NL80211_IFACE_LIMIT_MAX,
+				    c->limits[j].max);
+			if (nl80211_put_iftypes(msg, NL80211_IFACE_LIMIT_TYPES,
+						c->limits[j].types))
+				goto nla_put_failure;
+			nla_nest_end(msg, nl_limit);
+		}
+
+		nla_nest_end(msg, nl_limits);
+
+		if (c->beacon_int_infra_match)
+			NLA_PUT_FLAG(msg,
+				NL80211_IFACE_COMB_STA_AP_BI_MATCH);
+		NLA_PUT_U32(msg, NL80211_IFACE_COMB_NUM_CHANNELS,
+			    c->num_different_channels);
+		NLA_PUT_U32(msg, NL80211_IFACE_COMB_MAXNUM,
+			    c->max_interfaces);
+
+		nla_nest_end(msg, nl_combi);
+	}
+
+	nla_nest_end(msg, nl_combis);
+
+	return 0;
+nla_put_failure:
+	return -ENOBUFS;
+}
+
 static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 			      struct cfg80211_registered_device *dev)
 {
@@ -571,13 +653,11 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	struct nlattr *nl_bands, *nl_band;
 	struct nlattr *nl_freqs, *nl_freq;
 	struct nlattr *nl_rates, *nl_rate;
-	struct nlattr *nl_modes;
 	struct nlattr *nl_cmds;
 	enum ieee80211_band band;
 	struct ieee80211_channel *chan;
 	struct ieee80211_rate *rate;
 	int i;
-	u16 ifmodes = dev->wiphy.interface_modes;
 	const struct ieee80211_txrx_stypes *mgmt_stypes =
 				dev->wiphy.mgmt_stypes;
 
@@ -637,20 +717,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		}
 	}
 
-	nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
-	if (!nl_modes)
+	if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES,
+				dev->wiphy.interface_modes))
 		goto nla_put_failure;
 
-	i = 0;
-	while (ifmodes) {
-		if (ifmodes & 1)
-			NLA_PUT_FLAG(msg, i);
-		ifmodes >>= 1;
-		i++;
-	}
-
-	nla_nest_end(msg, nl_modes);
-
 	nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS);
 	if (!nl_bands)
 		goto nla_put_failure;
@@ -865,6 +935,13 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		nla_nest_end(msg, nl_wowlan);
 	}
 
+	if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES,
+				dev->wiphy.software_iftypes))
+		goto nla_put_failure;
+
+	if (nl80211_put_iface_combinations(&dev->wiphy, msg))
+		goto nla_put_failure;
+
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 414c9f604df6..95e4e254da0a 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -803,6 +803,11 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 		return -EBUSY;
 
 	if (ntype != otype) {
+		err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr,
+						    ntype);
+		if (err)
+			return err;
+
 		dev->ieee80211_ptr->use_4addr = false;
 		dev->ieee80211_ptr->mesh_id_up_len = 0;
 
@@ -921,3 +926,78 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
 
 	return res;
 }
+
+int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
+				  struct wireless_dev *wdev,
+				  enum nl80211_iftype iftype)
+{
+	struct wireless_dev *wdev_iter;
+	int num[NUM_NL80211_IFTYPES];
+	int total = 1;
+	int i, j;
+
+	ASSERT_RTNL();
+
+	/* Always allow software iftypes */
+	if (rdev->wiphy.software_iftypes & BIT(iftype))
+		return 0;
+
+	/*
+	 * Drivers will gradually all set this flag, until all
+	 * have it we only enforce for those that set it.
+	 */
+	if (!(rdev->wiphy.flags & WIPHY_FLAG_ENFORCE_COMBINATIONS))
+		return 0;
+
+	memset(num, 0, sizeof(num));
+
+	num[iftype] = 1;
+
+	mutex_lock(&rdev->devlist_mtx);
+	list_for_each_entry(wdev_iter, &rdev->netdev_list, list) {
+		if (wdev_iter == wdev)
+			continue;
+		if (!netif_running(wdev_iter->netdev))
+			continue;
+
+		if (rdev->wiphy.software_iftypes & BIT(wdev_iter->iftype))
+			continue;
+
+		num[wdev_iter->iftype]++;
+		total++;
+	}
+	mutex_unlock(&rdev->devlist_mtx);
+
+	for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) {
+		const struct ieee80211_iface_combination *c;
+		struct ieee80211_iface_limit *limits;
+
+		c = &rdev->wiphy.iface_combinations[i];
+
+		limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits,
+				 GFP_KERNEL);
+		if (!limits)
+			return -ENOMEM;
+		if (total > c->max_interfaces)
+			goto cont;
+
+		for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
+			if (rdev->wiphy.software_iftypes & BIT(iftype))
+				continue;
+			for (j = 0; j < c->n_limits; j++) {
+				if (!(limits[j].types & iftype))
+					continue;
+				if (limits[j].max < num[iftype])
+					goto cont;
+				limits[j].max -= num[iftype];
+			}
+		}
+		/* yay, it fits */
+		kfree(limits);
+		return 0;
+ cont:
+		kfree(limits);
+	}
+
+	return -EBUSY;
+}
-- 
cgit v1.2.3-71-gd317


From 57cf8043a64b56a10b9f194572548a3dfb62e596 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Fri, 13 May 2011 10:45:43 -0700
Subject: nl80211: Move peer link state definition to nl80211

These definitions need to be exposed now that we can set the peer link
states via NL80211_ATTR_STA_PLINK_STATE.  They were already being
(opaquely) reported by NL80211_STA_INFO_PLINK_STATE.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h   | 41 +++++++++++++++++++++++++--
 include/net/cfg80211.h    | 27 ------------------
 net/mac80211/cfg.c        |  6 ++--
 net/mac80211/mesh_plink.c | 72 +++++++++++++++++++++++------------------------
 net/mac80211/rx.c         |  2 +-
 net/mac80211/sta_info.c   |  2 +-
 net/mac80211/sta_info.h   |  6 ++--
 net/wireless/nl80211.c    |  2 +-
 8 files changed, 83 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 0ea497cb607c..fc3a94821b1e 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -973,9 +973,10 @@ enum nl80211_commands {
  * @NL80211_ATTR_SUPPORT_MESH_AUTH: Currently, this means the underlying driver
  *	allows auth frames in a mesh to be passed to userspace for processing via
  *	the @NL80211_MESH_SETUP_USERSPACE_AUTH flag.
- * @NL80211_ATTR_STA_PLINK_STATE: The state of a mesh peer link. Used when
- *      userspace is driving the peer link management state machine.
- *      @NL80211_MESH_SETUP_USERSPACE_AMPE must be enabled.
+ * @NL80211_ATTR_STA_PLINK_STATE: The state of a mesh peer link as
+ *	defined in &enum nl80211_plink_state. Used when userspace is
+ *	driving the peer link management state machine.
+ *	@NL80211_MESH_SETUP_USERSPACE_AMPE must be enabled.
  *
  * @NL80211_ATTR_WOWLAN_SUPPORTED: indicates, as part of the wiphy capabilities,
  *	the supported WoWLAN triggers
@@ -1396,6 +1397,7 @@ enum nl80211_sta_bss_param {
  * @NL80211_STA_INFO_LLID: the station's mesh LLID
  * @NL80211_STA_INFO_PLID: the station's mesh PLID
  * @NL80211_STA_INFO_PLINK_STATE: peer link state for the station
+ *	(see %enum nl80211_plink_state)
  * @NL80211_STA_INFO_RX_BITRATE: last unicast data frame rx rate, nested
  *	attribute, like NL80211_STA_INFO_TX_BITRATE.
  * @NL80211_STA_INFO_BSS_PARAM: current station's view of BSS, nested attribute
@@ -2326,4 +2328,37 @@ enum nl80211_if_combination_attrs {
 	MAX_NL80211_IFACE_COMB = NUM_NL80211_IFACE_COMB - 1
 };
 
+
+/**
+ * enum nl80211_plink_state - state of a mesh peer link finite state machine
+ *
+ * @NL80211_PLINK_LISTEN: initial state, considered the implicit
+ *	state of non existant mesh peer links
+ * @NL80211_PLINK_OPN_SNT: mesh plink open frame has been sent to
+ *	this mesh peer
+ * @NL80211_PLINK_OPN_RCVD: mesh plink open frame has been received
+ *	from this mesh peer
+ * @NL80211_PLINK_CNF_RCVD: mesh plink confirm frame has been
+ *	received from this mesh peer
+ * @NL80211_PLINK_ESTAB: mesh peer link is established
+ * @NL80211_PLINK_HOLDING: mesh peer link is being closed or cancelled
+ * @NL80211_PLINK_BLOCKED: all frames transmitted from this mesh
+ *	plink are discarded
+ * @NUM_NL80211_PLINK_STATES: number of peer link states
+ * @MAX_NL80211_PLINK_STATES: highest numerical value of plink states
+ */
+enum nl80211_plink_state {
+	NL80211_PLINK_LISTEN,
+	NL80211_PLINK_OPN_SNT,
+	NL80211_PLINK_OPN_RCVD,
+	NL80211_PLINK_CNF_RCVD,
+	NL80211_PLINK_ESTAB,
+	NL80211_PLINK_HOLDING,
+	NL80211_PLINK_BLOCKED,
+
+	/* keep last */
+	NUM_NL80211_PLINK_STATES,
+	MAX_NL80211_PLINK_STATES = NUM_NL80211_PLINK_STATES - 1
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 04afcfb9eaf4..0a2d795d35de 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -371,33 +371,6 @@ enum plink_actions {
 	PLINK_ACTION_BLOCK,
 };
 
-/**
- * enum plink_states - state of a mesh peer link finite state machine
- *
- * @PLINK_LISTEN: initial state, considered the implicit state of non
- * existant mesh peer links
- * @PLINK_OPN_SNT: mesh plink open frame has been sent to this mesh
- * peer @PLINK_OPN_RCVD: mesh plink open frame has been received from
- * this mesh peer
- * @PLINK_CNF_RCVD: mesh plink confirm frame has been received from
- * this mesh peer
- * @PLINK_ESTAB: mesh peer link is established
- * @PLINK_HOLDING: mesh peer link is being closed or cancelled
- * @PLINK_BLOCKED: all frames transmitted from this mesh plink are
- * discarded
- * @PLINK_INVALID: reserved
- */
-enum plink_state {
-	PLINK_LISTEN,
-	PLINK_OPN_SNT,
-	PLINK_OPN_RCVD,
-	PLINK_CNF_RCVD,
-	PLINK_ESTAB,
-	PLINK_HOLDING,
-	PLINK_BLOCKED,
-	PLINK_INVALID,
-};
-
 /**
  * struct station_parameters - station parameters
  *
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 6ecd5862735d..be70c70d3f5b 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -730,9 +730,9 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 #ifdef CONFIG_MAC80211_MESH
 		if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED)
 			switch (params->plink_state) {
-			case PLINK_LISTEN:
-			case PLINK_ESTAB:
-			case PLINK_BLOCKED:
+			case NL80211_PLINK_LISTEN:
+			case NL80211_PLINK_ESTAB:
+			case NL80211_PLINK_BLOCKED:
 				sta->plink_state = params->plink_state;
 				break;
 			default:
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 2c37bee3095a..f4adc0917888 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -83,7 +83,7 @@ void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata)
  */
 static inline void mesh_plink_fsm_restart(struct sta_info *sta)
 {
-	sta->plink_state = PLINK_LISTEN;
+	sta->plink_state = NL80211_PLINK_LISTEN;
 	sta->llid = sta->plid = sta->reason = 0;
 	sta->plink_retries = 0;
 }
@@ -126,11 +126,11 @@ static bool __mesh_plink_deactivate(struct sta_info *sta)
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	bool deactivated = false;
 
-	if (sta->plink_state == PLINK_ESTAB) {
+	if (sta->plink_state == NL80211_PLINK_ESTAB) {
 		mesh_plink_dec_estab_count(sdata);
 		deactivated = true;
 	}
-	sta->plink_state = PLINK_BLOCKED;
+	sta->plink_state = NL80211_PLINK_BLOCKED;
 	mesh_path_flush_by_nexthop(sta);
 
 	return deactivated;
@@ -268,7 +268,7 @@ void mesh_neighbour_update(u8 *hw_addr, u32 rates,
 	sta->last_rx = jiffies;
 	sta->sta.supp_rates[local->hw.conf.channel->band] = rates;
 	if (mesh_peer_accepts_plinks(elems) &&
-			sta->plink_state == PLINK_LISTEN &&
+			sta->plink_state == NL80211_PLINK_LISTEN &&
 			sdata->u.mesh.accepting_plinks &&
 			sdata->u.mesh.mshcfg.auto_open_plinks)
 		mesh_plink_open(sta);
@@ -308,8 +308,8 @@ static void mesh_plink_timer(unsigned long data)
 	sdata = sta->sdata;
 
 	switch (sta->plink_state) {
-	case PLINK_OPN_RCVD:
-	case PLINK_OPN_SNT:
+	case NL80211_PLINK_OPN_RCVD:
+	case NL80211_PLINK_OPN_SNT:
 		/* retry timer */
 		if (sta->plink_retries < dot11MeshMaxRetries(sdata)) {
 			u32 rand;
@@ -328,17 +328,17 @@ static void mesh_plink_timer(unsigned long data)
 		}
 		reason = cpu_to_le16(MESH_MAX_RETRIES);
 		/* fall through on else */
-	case PLINK_CNF_RCVD:
+	case NL80211_PLINK_CNF_RCVD:
 		/* confirm timer */
 		if (!reason)
 			reason = cpu_to_le16(MESH_CONFIRM_TIMEOUT);
-		sta->plink_state = PLINK_HOLDING;
+		sta->plink_state = NL80211_PLINK_HOLDING;
 		mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
 		spin_unlock_bh(&sta->lock);
 		mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid,
 				    reason);
 		break;
-	case PLINK_HOLDING:
+	case NL80211_PLINK_HOLDING:
 		/* holding timer */
 		del_timer(&sta->plink_timer);
 		mesh_plink_fsm_restart(sta);
@@ -386,11 +386,11 @@ int mesh_plink_open(struct sta_info *sta)
 	spin_lock_bh(&sta->lock);
 	get_random_bytes(&llid, 2);
 	sta->llid = llid;
-	if (sta->plink_state != PLINK_LISTEN) {
+	if (sta->plink_state != NL80211_PLINK_LISTEN) {
 		spin_unlock_bh(&sta->lock);
 		return -EBUSY;
 	}
-	sta->plink_state = PLINK_OPN_SNT;
+	sta->plink_state = NL80211_PLINK_OPN_SNT;
 	mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata));
 	spin_unlock_bh(&sta->lock);
 	mpl_dbg("Mesh plink: starting establishment with %pM\n",
@@ -407,7 +407,7 @@ void mesh_plink_block(struct sta_info *sta)
 
 	spin_lock_bh(&sta->lock);
 	deactivated = __mesh_plink_deactivate(sta);
-	sta->plink_state = PLINK_BLOCKED;
+	sta->plink_state = NL80211_PLINK_BLOCKED;
 	spin_unlock_bh(&sta->lock);
 
 	if (deactivated)
@@ -430,13 +430,13 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 	__le16 plid, llid, reason;
 #ifdef CONFIG_MAC80211_VERBOSE_MPL_DEBUG
 	static const char *mplstates[] = {
-		[PLINK_LISTEN] = "LISTEN",
-		[PLINK_OPN_SNT] = "OPN-SNT",
-		[PLINK_OPN_RCVD] = "OPN-RCVD",
-		[PLINK_CNF_RCVD] = "CNF_RCVD",
-		[PLINK_ESTAB] = "ESTAB",
-		[PLINK_HOLDING] = "HOLDING",
-		[PLINK_BLOCKED] = "BLOCKED"
+		[NL80211_PLINK_LISTEN] = "LISTEN",
+		[NL80211_PLINK_OPN_SNT] = "OPN-SNT",
+		[NL80211_PLINK_OPN_RCVD] = "OPN-RCVD",
+		[NL80211_PLINK_CNF_RCVD] = "CNF_RCVD",
+		[NL80211_PLINK_ESTAB] = "ESTAB",
+		[NL80211_PLINK_HOLDING] = "HOLDING",
+		[NL80211_PLINK_BLOCKED] = "BLOCKED"
 	};
 #endif
 
@@ -502,7 +502,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		return;
 	}
 
-	if (sta && sta->plink_state == PLINK_BLOCKED) {
+	if (sta && sta->plink_state == NL80211_PLINK_BLOCKED) {
 		rcu_read_unlock();
 		return;
 	}
@@ -572,7 +572,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 				event = CNF_ACPT;
 			break;
 		case PLINK_CLOSE:
-			if (sta->plink_state == PLINK_ESTAB)
+			if (sta->plink_state == NL80211_PLINK_ESTAB)
 				/* Do not check for llid or plid. This does not
 				 * follow the standard but since multiple plinks
 				 * per sta are not supported, it is necessary in
@@ -607,14 +607,14 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 	reason = 0;
 	switch (sta->plink_state) {
 		/* spin_unlock as soon as state is updated at each case */
-	case PLINK_LISTEN:
+	case NL80211_PLINK_LISTEN:
 		switch (event) {
 		case CLS_ACPT:
 			mesh_plink_fsm_restart(sta);
 			spin_unlock_bh(&sta->lock);
 			break;
 		case OPN_ACPT:
-			sta->plink_state = PLINK_OPN_RCVD;
+			sta->plink_state = NL80211_PLINK_OPN_RCVD;
 			sta->plid = plid;
 			get_random_bytes(&llid, 2);
 			sta->llid = llid;
@@ -631,7 +631,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		}
 		break;
 
-	case PLINK_OPN_SNT:
+	case NL80211_PLINK_OPN_SNT:
 		switch (event) {
 		case OPN_RJCT:
 		case CNF_RJCT:
@@ -640,7 +640,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			if (!reason)
 				reason = cpu_to_le16(MESH_CLOSE_RCVD);
 			sta->reason = reason;
-			sta->plink_state = PLINK_HOLDING;
+			sta->plink_state = NL80211_PLINK_HOLDING;
 			if (!mod_plink_timer(sta,
 					     dot11MeshHoldingTimeout(sdata)))
 				sta->ignore_plink_timer = true;
@@ -652,7 +652,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			break;
 		case OPN_ACPT:
 			/* retry timer is left untouched */
-			sta->plink_state = PLINK_OPN_RCVD;
+			sta->plink_state = NL80211_PLINK_OPN_RCVD;
 			sta->plid = plid;
 			llid = sta->llid;
 			spin_unlock_bh(&sta->lock);
@@ -660,7 +660,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 					    plid, 0);
 			break;
 		case CNF_ACPT:
-			sta->plink_state = PLINK_CNF_RCVD;
+			sta->plink_state = NL80211_PLINK_CNF_RCVD;
 			if (!mod_plink_timer(sta,
 					     dot11MeshConfirmTimeout(sdata)))
 				sta->ignore_plink_timer = true;
@@ -673,7 +673,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		}
 		break;
 
-	case PLINK_OPN_RCVD:
+	case NL80211_PLINK_OPN_RCVD:
 		switch (event) {
 		case OPN_RJCT:
 		case CNF_RJCT:
@@ -682,7 +682,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			if (!reason)
 				reason = cpu_to_le16(MESH_CLOSE_RCVD);
 			sta->reason = reason;
-			sta->plink_state = PLINK_HOLDING;
+			sta->plink_state = NL80211_PLINK_HOLDING;
 			if (!mod_plink_timer(sta,
 					     dot11MeshHoldingTimeout(sdata)))
 				sta->ignore_plink_timer = true;
@@ -700,7 +700,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			break;
 		case CNF_ACPT:
 			del_timer(&sta->plink_timer);
-			sta->plink_state = PLINK_ESTAB;
+			sta->plink_state = NL80211_PLINK_ESTAB;
 			spin_unlock_bh(&sta->lock);
 			mesh_plink_inc_estab_count(sdata);
 			ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
@@ -713,7 +713,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		}
 		break;
 
-	case PLINK_CNF_RCVD:
+	case NL80211_PLINK_CNF_RCVD:
 		switch (event) {
 		case OPN_RJCT:
 		case CNF_RJCT:
@@ -722,7 +722,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			if (!reason)
 				reason = cpu_to_le16(MESH_CLOSE_RCVD);
 			sta->reason = reason;
-			sta->plink_state = PLINK_HOLDING;
+			sta->plink_state = NL80211_PLINK_HOLDING;
 			if (!mod_plink_timer(sta,
 					     dot11MeshHoldingTimeout(sdata)))
 				sta->ignore_plink_timer = true;
@@ -734,7 +734,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			break;
 		case OPN_ACPT:
 			del_timer(&sta->plink_timer);
-			sta->plink_state = PLINK_ESTAB;
+			sta->plink_state = NL80211_PLINK_ESTAB;
 			spin_unlock_bh(&sta->lock);
 			mesh_plink_inc_estab_count(sdata);
 			ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
@@ -749,13 +749,13 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		}
 		break;
 
-	case PLINK_ESTAB:
+	case NL80211_PLINK_ESTAB:
 		switch (event) {
 		case CLS_ACPT:
 			reason = cpu_to_le16(MESH_CLOSE_RCVD);
 			sta->reason = reason;
 			deactivated = __mesh_plink_deactivate(sta);
-			sta->plink_state = PLINK_HOLDING;
+			sta->plink_state = NL80211_PLINK_HOLDING;
 			llid = sta->llid;
 			mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
 			spin_unlock_bh(&sta->lock);
@@ -775,7 +775,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			break;
 		}
 		break;
-	case PLINK_HOLDING:
+	case NL80211_PLINK_HOLDING:
 		switch (event) {
 		case CLS_ACPT:
 			if (del_timer(&sta->plink_timer))
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 1b9413fb3839..3a9515cb7ce1 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -490,7 +490,7 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
 	 * establisment frame, beacon or probe, drop the frame.
 	 */
 
-	if (!rx->sta || sta_plink_state(rx->sta) != PLINK_ESTAB) {
+	if (!rx->sta || sta_plink_state(rx->sta) != NL80211_PLINK_ESTAB) {
 		struct ieee80211_mgmt *mgmt;
 
 		if (!ieee80211_is_mgmt(hdr->frame_control))
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 82ab6b4643fc..4a15f9603562 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -277,7 +277,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
 
 #ifdef CONFIG_MAC80211_MESH
-	sta->plink_state = PLINK_LISTEN;
+	sta->plink_state = NL80211_PLINK_LISTEN;
 	init_timer(&sta->plink_timer);
 #endif
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index d6b566076f05..c6ae8718bd57 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -318,7 +318,7 @@ struct sta_info {
 	u8 plink_retries;
 	bool ignore_plink_timer;
 	bool plink_timer_was_running;
-	enum plink_state plink_state;
+	enum nl80211_plink_state plink_state;
 	u32 plink_timeout;
 	struct timer_list plink_timer;
 #endif
@@ -336,12 +336,12 @@ struct sta_info {
 	struct ieee80211_sta sta;
 };
 
-static inline enum plink_state sta_plink_state(struct sta_info *sta)
+static inline enum nl80211_plink_state sta_plink_state(struct sta_info *sta)
 {
 #ifdef CONFIG_MAC80211_MESH
 	return sta->plink_state;
 #endif
-	return PLINK_LISTEN;
+	return NL80211_PLINK_LISTEN;
 }
 
 static inline void set_sta_flags(struct sta_info *sta, const u32 flags)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index beac296b1fde..2222ce08ee91 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2335,7 +2335,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 	memset(&params, 0, sizeof(params));
 
 	params.listen_interval = -1;
-	params.plink_state = PLINK_INVALID;
+	params.plink_state = -1;
 
 	if (info->attrs[NL80211_ATTR_STA_AID])
 		return -EINVAL;
-- 
cgit v1.2.3-71-gd317


From 2064af917b3ba7589070064ca4ed12cecd99a63c Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@ti.com>
Date: Fri, 29 Apr 2011 00:37:26 +0200
Subject: PM: Revert "driver core: platform_bus: allow runtime override of
 dev_pm_ops"

The platform_bus_set_pm_ops() operation is deprecated in favor of the
new device power domain infrastructre implemented in commit
7538e3db6e015e890825fbd9f8659952896ddd5b (PM: add support for device
power domains)

Signed-off-by: Kevin Hilman <khilman@ti.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/platform.c         | 35 -----------------------------------
 include/linux/platform_device.h |  3 ---
 2 files changed, 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 079c18a5e471..48425f183029 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -916,41 +916,6 @@ struct bus_type platform_bus_type = {
 };
 EXPORT_SYMBOL_GPL(platform_bus_type);
 
-/**
- * platform_bus_get_pm_ops() - return pointer to busses dev_pm_ops
- *
- * This function can be used by platform code to get the current
- * set of dev_pm_ops functions used by the platform_bus_type.
- */
-const struct dev_pm_ops * __init platform_bus_get_pm_ops(void)
-{
-	return platform_bus_type.pm;
-}
-
-/**
- * platform_bus_set_pm_ops() - update dev_pm_ops for the platform_bus_type
- *
- * @pm: pointer to new dev_pm_ops struct to be used for platform_bus_type
- *
- * Platform code can override the dev_pm_ops methods of
- * platform_bus_type by using this function.  It is expected that
- * platform code will first do a platform_bus_get_pm_ops(), then
- * kmemdup it, then customize selected methods and pass a pointer to
- * the new struct dev_pm_ops to this function.
- *
- * Since platform-specific code is customizing methods for *all*
- * devices (not just platform-specific devices) it is expected that
- * any custom overrides of these functions will keep existing behavior
- * and simply extend it.  For example, any customization of the
- * runtime PM methods should continue to call the pm_generic_*
- * functions as the default ones do in addition to the
- * platform-specific behavior.
- */
-void __init platform_bus_set_pm_ops(const struct dev_pm_ops *pm)
-{
-	platform_bus_type.pm = pm;
-}
-
 int __init platform_bus_init(void)
 {
 	int error;
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index e0093e061b08..ede1a80e3358 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -150,9 +150,6 @@ extern struct platform_device *platform_create_bundle(struct platform_driver *dr
 					struct resource *res, unsigned int n_res,
 					const void *data, size_t size);
 
-extern const struct dev_pm_ops * platform_bus_get_pm_ops(void);
-extern void platform_bus_set_pm_ops(const struct dev_pm_ops *pm);
-
 /* early platform driver interface */
 struct early_platform_driver {
 	const char *class_str;
-- 
cgit v1.2.3-71-gd317


From f0201738b61b1adcf6b2c4719c5c415745014c1c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 12 Apr 2011 19:06:39 -0400
Subject: ftrace: Avoid recording mcount on .init sections directly

The init and exit sections should not be traced and adding a call to
mcount to them is a waste of text and instruction cache. Have the
macro section attributes include notrace to ignore these functions
for tracing from the build.

Link: http://lkml.kernel.org/r/20110421023738.953028219@goodmis.org
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/init.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index 577671c55153..9146f39cdddf 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -79,29 +79,29 @@
 #define __exitused  __used
 #endif
 
-#define __exit          __section(.exit.text) __exitused __cold
+#define __exit          __section(.exit.text) __exitused __cold notrace
 
 /* Used for HOTPLUG */
-#define __devinit        __section(.devinit.text) __cold
+#define __devinit        __section(.devinit.text) __cold notrace
 #define __devinitdata    __section(.devinit.data)
 #define __devinitconst   __section(.devinit.rodata)
-#define __devexit        __section(.devexit.text) __exitused __cold
+#define __devexit        __section(.devexit.text) __exitused __cold notrace
 #define __devexitdata    __section(.devexit.data)
 #define __devexitconst   __section(.devexit.rodata)
 
 /* Used for HOTPLUG_CPU */
-#define __cpuinit        __section(.cpuinit.text) __cold
+#define __cpuinit        __section(.cpuinit.text) __cold notrace
 #define __cpuinitdata    __section(.cpuinit.data)
 #define __cpuinitconst   __section(.cpuinit.rodata)
-#define __cpuexit        __section(.cpuexit.text) __exitused __cold
+#define __cpuexit        __section(.cpuexit.text) __exitused __cold notrace
 #define __cpuexitdata    __section(.cpuexit.data)
 #define __cpuexitconst   __section(.cpuexit.rodata)
 
 /* Used for MEMORY_HOTPLUG */
-#define __meminit        __section(.meminit.text) __cold
+#define __meminit        __section(.meminit.text) __cold notrace
 #define __meminitdata    __section(.meminit.data)
 #define __meminitconst   __section(.meminit.rodata)
-#define __memexit        __section(.memexit.text) __exitused __cold
+#define __memexit        __section(.memexit.text) __exitused __cold notrace
 #define __memexitdata    __section(.memexit.data)
 #define __memexitconst   __section(.memexit.rodata)
 
-- 
cgit v1.2.3-71-gd317


From a144c6a6c924aa1da04dd77fb84b89927354fdff Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 6 May 2011 20:09:42 +0200
Subject: PM: Print a warning if firmware is requested when tasks are frozen

Some drivers erroneously use request_firmware() from their ->resume()
(or ->thaw(), or ->restore()) callbacks, which is not going to work
unless the firmware has been built in.  This causes system resume to
stall until the firmware-loading timeout expires, which makes users
think that the resume has failed and reboot their machines
unnecessarily.  For this reason, make _request_firmware() print a
warning and return immediately with error code if it has been called
when tasks are frozen and it's impossible to start any new usermode
helpers.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Reviewed-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
---
 drivers/base/firmware_class.c | 5 +++++
 include/linux/kmod.h          | 5 +++++
 kernel/kmod.c                 | 9 +++++++++
 3 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 8c798ef7f13f..bbb03e6f7255 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -521,6 +521,11 @@ static int _request_firmware(const struct firmware **firmware_p,
 	if (!firmware_p)
 		return -EINVAL;
 
+	if (WARN_ON(usermodehelper_is_disabled())) {
+		dev_err(device, "firmware: %s will not be loaded\n", name);
+		return -EBUSY;
+	}
+
 	*firmware_p = firmware = kzalloc(sizeof(*firmware), GFP_KERNEL);
 	if (!firmware) {
 		dev_err(device, "%s: kmalloc(struct firmware) failed\n",
diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 6efd7a78de6a..7f3dbcb78116 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -111,7 +111,12 @@ call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
 
 extern void usermodehelper_init(void);
 
+#ifdef CONFIG_PM_SLEEP
 extern int usermodehelper_disable(void);
 extern void usermodehelper_enable(void);
+extern bool usermodehelper_is_disabled(void);
+#else
+static inline bool usermodehelper_is_disabled(void) { return false; }
+#endif
 
 #endif /* __LINUX_KMOD_H__ */
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 9cd0591c96a2..9ab513bd0c3c 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -301,6 +301,15 @@ void usermodehelper_enable(void)
 	usermodehelper_disabled = 0;
 }
 
+/**
+ * usermodehelper_is_disabled - check if new helpers are allowed to be started
+ */
+bool usermodehelper_is_disabled(void)
+{
+	return usermodehelper_disabled;
+}
+EXPORT_SYMBOL_GPL(usermodehelper_is_disabled);
+
 static void helper_lock(void)
 {
 	atomic_inc(&running_helpers);
-- 
cgit v1.2.3-71-gd317


From 13d53f8775c6a00b070a3eef6833795412eb7fcd Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Tue, 10 May 2011 21:27:34 +0200
Subject: kmod: always provide usermodehelper_disable()

We need to prevent kernel-forked processes during system poweroff.
Such processes try to access the filesystem whose disks we are
trying to shutdown at the same time. This causes delays and exceptions
in the storage drivers.

A follow-up patch will add these calls and need usermodehelper_disable()
also on systems without suspend support.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/kmod.h | 4 ----
 kernel/kmod.c        | 7 -------
 2 files changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 7f3dbcb78116..310231823852 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -111,12 +111,8 @@ call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
 
 extern void usermodehelper_init(void);
 
-#ifdef CONFIG_PM_SLEEP
 extern int usermodehelper_disable(void);
 extern void usermodehelper_enable(void);
 extern bool usermodehelper_is_disabled(void);
-#else
-static inline bool usermodehelper_is_disabled(void) { return false; }
-#endif
 
 #endif /* __LINUX_KMOD_H__ */
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 9ab513bd0c3c..5ae0ff38425f 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -245,7 +245,6 @@ static void __call_usermodehelper(struct work_struct *work)
 	}
 }
 
-#ifdef CONFIG_PM_SLEEP
 /*
  * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY
  * (used for preventing user land processes from being created after the user
@@ -321,12 +320,6 @@ static void helper_unlock(void)
 	if (atomic_dec_and_test(&running_helpers))
 		wake_up(&running_helpers_waitq);
 }
-#else /* CONFIG_PM_SLEEP */
-#define usermodehelper_disabled	0
-
-static inline void helper_lock(void) {}
-static inline void helper_unlock(void) {}
-#endif /* CONFIG_PM_SLEEP */
 
 /**
  * call_usermodehelper_setup - prepare to call a usermode helper
-- 
cgit v1.2.3-71-gd317


From 91e7c75ba93c48a82670d630b9daac92ff70095d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 17 May 2011 23:26:00 +0200
Subject: PM: Allow drivers to allocate memory from .prepare() callbacks safely

If device drivers allocate substantial amounts of memory (above 1 MB)
in their hibernate .freeze() callbacks (or in their legacy suspend
callbcks during hibernation), the subsequent creation of hibernate
image may fail due to the lack of memory.  This is the case, because
the drivers' .freeze() callbacks are executed after the hibernate
memory preallocation has been carried out and the preallocated amount
of memory may be too small to cover the new driver allocations.
Unfortunately, the drivers' .prepare() callbacks also are executed
after the hibernate memory preallocation has completed, so they are
not suitable for allocating additional memory either.  Thus the only
way a driver can safely allocate memory during hibernation is to use
a hibernate/suspend notifier.  However, the notifiers are called
before the freezing of user space and the drivers wanting to use them
for allocating additional memory may not know how much memory needs
to be allocated at that point.

To let device drivers overcome this difficulty rework the hibernation
sequence so that the memory preallocation is carried out after the
drivers' .prepare() callbacks have been executed, so that the
.prepare() callbacks can be used for allocating additional memory
to be used by the drivers' .freeze() callbacks.  Update documentation
to match the new behavior of the code.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 Documentation/power/devices.txt   | 14 +++++++----
 Documentation/power/notifiers.txt | 51 ++++++++++++++++++---------------------
 drivers/base/power/main.c         | 18 +++++++++-----
 include/linux/pm.h                |  4 +++
 kernel/power/hibernate.c          | 17 ++++++++++---
 5 files changed, 61 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 1971bcf48a60..88880839ece4 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -279,11 +279,15 @@ When the system goes into the standby or memory sleep state, the phases are:
 	time.)  Unlike the other suspend-related phases, during the prepare
 	phase the device tree is traversed top-down.
 
-	The prepare phase uses only a bus callback.  After the callback method
-	returns, no new children may be registered below the device.  The method
-	may also prepare the device or driver in some way for the upcoming
-	system power transition, but it should not put the device into a
-	low-power state.
+	In addition to that, if device drivers need to allocate additional
+	memory to be able to hadle device suspend correctly, that should be
+	done in the prepare phase.
+
+	After the prepare callback method returns, no new children may be
+	registered below the device.  The method may also prepare the device or
+	driver in some way for the upcoming system power transition (for
+	example, by allocating additional memory required for this purpose), but
+	it should not put the device into a low-power state.
 
     2.	The suspend methods should quiesce the device to stop it from performing
 	I/O.  They also may save the device registers and put it into the
diff --git a/Documentation/power/notifiers.txt b/Documentation/power/notifiers.txt
index cf980709122a..c2a4a346c0d9 100644
--- a/Documentation/power/notifiers.txt
+++ b/Documentation/power/notifiers.txt
@@ -1,46 +1,41 @@
 Suspend notifiers
-	(C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
-
-There are some operations that device drivers may want to carry out in their
-.suspend() routines, but shouldn't, because they can cause the hibernation or
-suspend to fail. For example, a driver may want to allocate a substantial amount
-of memory (like 50 MB) in .suspend(), but that shouldn't be done after the
-swsusp's memory shrinker has run.
-
-Also, there may be some operations, that subsystems want to carry out before a
-hibernation/suspend or after a restore/resume, requiring the system to be fully
-functional, so the drivers' .suspend() and .resume() routines are not suitable
-for this purpose.  For example, device drivers may want to upload firmware to
-their devices after a restore from a hibernation image, but they cannot do it by
-calling request_firmware() from their .resume() routines (user land processes
-are frozen at this point).  The solution may be to load the firmware into
-memory before processes are frozen and upload it from there in the .resume()
-routine.  Of course, a hibernation notifier may be used for this purpose.
-
-The subsystems that have such needs can register suspend notifiers that will be
-called upon the following events by the suspend core:
+	(C) 2007-2011 Rafael J. Wysocki <rjw@sisk.pl>, GPL
+
+There are some operations that subsystems or drivers may want to carry out
+before hibernation/suspend or after restore/resume, but they require the system
+to be fully functional, so the drivers' and subsystems' .suspend() and .resume()
+or even .prepare() and .complete() callbacks are not suitable for this purpose.
+For example, device drivers may want to upload firmware to their devices after
+resume/restore, but they cannot do it by calling request_firmware() from their
+.resume() or .complete() routines (user land processes are frozen at these
+points).  The solution may be to load the firmware into memory before processes
+are frozen and upload it from there in the .resume() routine.
+A suspend/hibernation notifier may be used for this purpose.
+
+The subsystems or drivers having such needs can register suspend notifiers that
+will be called upon the following events by the PM core:
 
 PM_HIBERNATION_PREPARE	The system is going to hibernate or suspend, tasks will
 			be frozen immediately.
 
 PM_POST_HIBERNATION	The system memory state has been restored from a
-			hibernation image or an error occurred during the
-			hibernation.  Device drivers' .resume() callbacks have
+			hibernation image or an error occurred during
+			hibernation.  Device drivers' restore callbacks have
 			been executed and tasks have been thawed.
 
 PM_RESTORE_PREPARE	The system is going to restore a hibernation image.
-			If all goes well the restored kernel will issue a
+			If all goes well, the restored kernel will issue a
 			PM_POST_HIBERNATION notification.
 
-PM_POST_RESTORE		An error occurred during the hibernation restore.
-			Device drivers' .resume() callbacks have been executed
+PM_POST_RESTORE		An error occurred during restore from hibernation.
+			Device drivers' restore callbacks have been executed
 			and tasks have been thawed.
 
-PM_SUSPEND_PREPARE	The system is preparing for a suspend.
+PM_SUSPEND_PREPARE	The system is preparing for suspend.
 
 PM_POST_SUSPEND		The system has just resumed or an error occurred during
-			the suspend.	Device drivers' .resume() callbacks have
-			been executed and tasks have been thawed.
+			suspend.  Device drivers' resume callbacks have been
+			executed and tasks have been thawed.
 
 It is generally assumed that whatever the notifiers do for
 PM_HIBERNATION_PREPARE, should be undone for PM_POST_HIBERNATION.  Analogously,
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 3b354560f306..aa6320207745 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -579,11 +579,13 @@ static bool is_async(struct device *dev)
  * Execute the appropriate "resume" callback for all devices whose status
  * indicates that they are suspended.
  */
-static void dpm_resume(pm_message_t state)
+void dpm_resume(pm_message_t state)
 {
 	struct device *dev;
 	ktime_t starttime = ktime_get();
 
+	might_sleep();
+
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 	async_error = 0;
@@ -656,10 +658,12 @@ static void device_complete(struct device *dev, pm_message_t state)
  * Execute the ->complete() callbacks for all devices whose PM status is not
  * DPM_ON (this allows new devices to be registered).
  */
-static void dpm_complete(pm_message_t state)
+void dpm_complete(pm_message_t state)
 {
 	struct list_head list;
 
+	might_sleep();
+
 	INIT_LIST_HEAD(&list);
 	mutex_lock(&dpm_list_mtx);
 	while (!list_empty(&dpm_prepared_list)) {
@@ -688,7 +692,6 @@ static void dpm_complete(pm_message_t state)
  */
 void dpm_resume_end(pm_message_t state)
 {
-	might_sleep();
 	dpm_resume(state);
 	dpm_complete(state);
 }
@@ -912,11 +915,13 @@ static int device_suspend(struct device *dev)
  * dpm_suspend - Execute "suspend" callbacks for all non-sysdev devices.
  * @state: PM transition of the system being carried out.
  */
-static int dpm_suspend(pm_message_t state)
+int dpm_suspend(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
 	int error = 0;
 
+	might_sleep();
+
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 	async_error = 0;
@@ -1003,10 +1008,12 @@ static int device_prepare(struct device *dev, pm_message_t state)
  *
  * Execute the ->prepare() callback(s) for all devices.
  */
-static int dpm_prepare(pm_message_t state)
+int dpm_prepare(pm_message_t state)
 {
 	int error = 0;
 
+	might_sleep();
+
 	mutex_lock(&dpm_list_mtx);
 	while (!list_empty(&dpm_list)) {
 		struct device *dev = to_device(dpm_list.next);
@@ -1055,7 +1062,6 @@ int dpm_suspend_start(pm_message_t state)
 {
 	int error;
 
-	might_sleep();
 	error = dpm_prepare(state);
 	if (!error)
 		error = dpm_suspend(state);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 3cc3e7e589f0..dce7c7148771 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -533,10 +533,14 @@ struct dev_power_domain {
 extern void device_pm_lock(void);
 extern void dpm_resume_noirq(pm_message_t state);
 extern void dpm_resume_end(pm_message_t state);
+extern void dpm_resume(pm_message_t state);
+extern void dpm_complete(pm_message_t state);
 
 extern void device_pm_unlock(void);
 extern int dpm_suspend_noirq(pm_message_t state);
 extern int dpm_suspend_start(pm_message_t state);
+extern int dpm_suspend(pm_message_t state);
+extern int dpm_prepare(pm_message_t state);
 
 extern void __suspend_report_result(const char *function, void *fn, int ret);
 
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 95a2ac40f48c..f9bec56d8825 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -327,20 +327,25 @@ static int create_image(int platform_mode)
 
 int hibernation_snapshot(int platform_mode)
 {
+	pm_message_t msg = PMSG_RECOVER;
 	int error;
 
 	error = platform_begin(platform_mode);
 	if (error)
 		goto Close;
 
+	error = dpm_prepare(PMSG_FREEZE);
+	if (error)
+		goto Complete_devices;
+
 	/* Preallocate image memory before shutting down devices. */
 	error = hibernate_preallocate_memory();
 	if (error)
-		goto Close;
+		goto Complete_devices;
 
 	suspend_console();
 	pm_restrict_gfp_mask();
-	error = dpm_suspend_start(PMSG_FREEZE);
+	error = dpm_suspend(PMSG_FREEZE);
 	if (error)
 		goto Recover_platform;
 
@@ -358,13 +363,17 @@ int hibernation_snapshot(int platform_mode)
 	if (error || !in_suspend)
 		swsusp_free();
 
-	dpm_resume_end(in_suspend ?
-		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
+	msg = in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE;
+	dpm_resume(msg);
 
 	if (error || !in_suspend)
 		pm_restore_gfp_mask();
 
 	resume_console();
+
+ Complete_devices:
+	dpm_complete(msg);
+
  Close:
 	platform_end(platform_mode);
 	return error;
-- 
cgit v1.2.3-71-gd317


From 6538df80194e305f1b78cafb556f4bb442f808b3 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 17 May 2011 23:26:21 +0200
Subject: PM: Introduce generic prepare and complete callbacks for subsystems

Introduce generic .prepare() and .complete() power management
callbacks, currently missing, that can be used by subsystems and
power domains and export them.  Provide NULL definitions of all
the generic system sleep callbacks for CONFIG_PM_SLEEP unset.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/generic_ops.c | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/pm.h               | 26 +++++++++++++++++++-------
 2 files changed, 58 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
index 42f97f925629..cb3bb368681c 100644
--- a/drivers/base/power/generic_ops.c
+++ b/drivers/base/power/generic_ops.c
@@ -73,6 +73,23 @@ EXPORT_SYMBOL_GPL(pm_generic_runtime_resume);
 #endif /* CONFIG_PM_RUNTIME */
 
 #ifdef CONFIG_PM_SLEEP
+/**
+ * pm_generic_prepare - Generic routine preparing a device for power transition.
+ * @dev: Device to prepare.
+ *
+ * Prepare a device for a system-wide power transition.
+ */
+int pm_generic_prepare(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+	int ret = 0;
+
+	if (drv && drv->pm && drv->pm->prepare)
+		ret = drv->pm->prepare(dev);
+
+	return ret;
+}
+
 /**
  * __pm_generic_call - Generic suspend/freeze/poweroff/thaw subsystem callback.
  * @dev: Device to handle.
@@ -213,16 +230,38 @@ int pm_generic_restore(struct device *dev)
 	return __pm_generic_resume(dev, PM_EVENT_RESTORE);
 }
 EXPORT_SYMBOL_GPL(pm_generic_restore);
+
+/**
+ * pm_generic_complete - Generic routine competing a device power transition.
+ * @dev: Device to handle.
+ *
+ * Complete a device power transition during a system-wide power transition.
+ */
+void pm_generic_complete(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+
+	if (drv && drv->pm && drv->pm->complete)
+		drv->pm->complete(dev);
+
+	/*
+	 * Let runtime PM try to suspend devices that haven't been in use before
+	 * going into the system-wide sleep state we're resuming from.
+	 */
+	pm_runtime_idle(dev);
+}
 #endif /* CONFIG_PM_SLEEP */
 
 struct dev_pm_ops generic_subsys_pm_ops = {
 #ifdef CONFIG_PM_SLEEP
+	.prepare = pm_generic_prepare,
 	.suspend = pm_generic_suspend,
 	.resume = pm_generic_resume,
 	.freeze = pm_generic_freeze,
 	.thaw = pm_generic_thaw,
 	.poweroff = pm_generic_poweroff,
 	.restore = pm_generic_restore,
+	.complete = pm_generic_complete,
 #endif
 #ifdef CONFIG_PM_RUNTIME
 	.runtime_suspend = pm_generic_runtime_suspend,
diff --git a/include/linux/pm.h b/include/linux/pm.h
index dce7c7148771..3160648ccdda 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -550,6 +550,16 @@ extern void __suspend_report_result(const char *function, void *fn, int ret);
 	} while (0)
 
 extern int device_pm_wait_for_dev(struct device *sub, struct device *dev);
+
+extern int pm_generic_prepare(struct device *dev);
+extern int pm_generic_suspend(struct device *dev);
+extern int pm_generic_resume(struct device *dev);
+extern int pm_generic_freeze(struct device *dev);
+extern int pm_generic_thaw(struct device *dev);
+extern int pm_generic_restore(struct device *dev);
+extern int pm_generic_poweroff(struct device *dev);
+extern void pm_generic_complete(struct device *dev);
+
 #else /* !CONFIG_PM_SLEEP */
 
 #define device_pm_lock() do {} while (0)
@@ -566,6 +576,15 @@ static inline int device_pm_wait_for_dev(struct device *a, struct device *b)
 {
 	return 0;
 }
+
+#define pm_generic_prepare	NULL
+#define pm_generic_suspend	NULL
+#define pm_generic_resume	NULL
+#define pm_generic_freeze	NULL
+#define pm_generic_thaw		NULL
+#define pm_generic_restore	NULL
+#define pm_generic_poweroff	NULL
+#define pm_generic_complete	NULL
 #endif /* !CONFIG_PM_SLEEP */
 
 /* How to reorder dpm_list after device_move() */
@@ -576,11 +595,4 @@ enum dpm_order {
 	DPM_ORDER_DEV_LAST,
 };
 
-extern int pm_generic_suspend(struct device *dev);
-extern int pm_generic_resume(struct device *dev);
-extern int pm_generic_freeze(struct device *dev);
-extern int pm_generic_thaw(struct device *dev);
-extern int pm_generic_restore(struct device *dev);
-extern int pm_generic_poweroff(struct device *dev);
-
 #endif /* _LINUX_PM_H */
-- 
cgit v1.2.3-71-gd317


From f12a20fc9bfba4218ecbc4e40c8e08dc2a85dc99 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 17 May 2011 15:44:12 -0700
Subject: procfs: add stub for proc_mkdir_mode()

Provide a stub for proc_mkdir_mode() when CONFIG_PROC_FS is not
enabled, just like the stub for proc_mkdir().

Fixes this linux-next build error:

  drivers/net/wireless/airo.c:4504: error: implicit declaration of function 'proc_mkdir_mode'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "John W. Linville" <linville@tuxdriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/proc_fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 838c1149251a..eaf4350c0f90 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -208,6 +208,8 @@ static inline struct proc_dir_entry *proc_symlink(const char *name,
 		struct proc_dir_entry *parent,const char *dest) {return NULL;}
 static inline struct proc_dir_entry *proc_mkdir(const char *name,
 	struct proc_dir_entry *parent) {return NULL;}
+static inline struct proc_dir_entry *proc_mkdir_mode(const char *name,
+	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
 
 static inline struct proc_dir_entry *create_proc_read_entry(const char *name,
 	mode_t mode, struct proc_dir_entry *base, 
-- 
cgit v1.2.3-71-gd317


From fe12bc2c996d3e492b2920e32ac79f7bbae3e15d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 May 2011 12:48:00 +0200
Subject: genirq: Uninline and sanity check generic_handle_irq()

generic_handle_irq() is missing a NULL pointer check for the result of
irq_to_desc. This was a not a big problem, but we want to expose it to
drivers, so we better have sanity checks in place. Add a return value
as well, which indicates that the irq number was valid and the handler
was invoked.

Based on the pure code move from Jonathan Cameron.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jonathan Cameron <jic23@cam.ac.uk>
---
 include/linux/irqdesc.h |  5 +----
 kernel/irq/irqdesc.c    | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index c70b1aa4b93a..2d921b35212c 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -111,10 +111,7 @@ static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *de
 	desc->handle_irq(irq, desc);
 }
 
-static inline void generic_handle_irq(unsigned int irq)
-{
-	generic_handle_irq_desc(irq, irq_to_desc(irq));
-}
+int generic_handle_irq(unsigned int irq);
 
 /* Test to see if a driver has successfully requested an irq */
 static inline int irq_has_action(unsigned int irq)
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index e07b975fdc5a..9f65b0225d6a 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -290,6 +290,21 @@ static int irq_expand_nr_irqs(unsigned int nr)
 
 #endif /* !CONFIG_SPARSE_IRQ */
 
+/**
+ * generic_handle_irq - Invoke the handler for a particular irq
+ * @irq:	The irq number to handle
+ *
+ */
+int generic_handle_irq(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (!desc)
+		return -EINVAL;
+	generic_handle_irq_desc(irq, desc);
+	return 0;
+}
+
 /* Dynamic interrupt handling */
 
 /**
-- 
cgit v1.2.3-71-gd317


From b2b07e4fdbc51383cfc0ba5618c2ddf5c9d038f2 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 18 May 2011 15:08:03 +0200
Subject: signal: trivial, fix the "timespec declared inside parameter list"
 warning

Fix the compile warning, do_sigtimedwait(struct timespec *) in signal.h
needs the forward declaration of timespec.

Reported-and-acked-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/signal.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 7e2526374fd7..a44e7f062238 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -234,6 +234,9 @@ static inline int valid_signal(unsigned long sig)
 	return sig <= _NSIG ? 1 : 0;
 }
 
+struct timespec;
+struct pt_regs;
+
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
 extern int do_send_sig_info(int sig, struct siginfo *info,
 				struct task_struct *p, bool group);
@@ -248,7 +251,6 @@ extern int sigprocmask(int, sigset_t *, sigset_t *);
 extern void set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 
-struct pt_regs;
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
 extern void exit_signals(struct task_struct *tsk);
 
-- 
cgit v1.2.3-71-gd317


From 01294d82622d6d9d64bde8e4530c7e2c6dbb6ee6 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Wed, 18 May 2011 10:27:39 -0500
Subject: of: fix race when matching drivers

If two drivers are probing devices at the same time, both will write
their match table result to the dev->of_match cache at the same time.

Only write the result if the device matches.

In a thread titled "SBus devices sometimes detected, sometimes not",
Meelis reported his SBus hme was not detected about 50% of the time.
From the debug suggested by Grant it was obvious another driver matched
some devices between the call to match the hme and the hme discovery
failling.

Reported-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Milton Miller <miltonm@bga.com>
[grant.likely: modified to only call of_match_device() once]
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/of_device.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 8bfe6c1d4365..b33d68814a73 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -21,8 +21,12 @@ extern void of_device_make_bus_id(struct device *dev);
 static inline int of_driver_match_device(struct device *dev,
 					 const struct device_driver *drv)
 {
-	dev->of_match = of_match_device(drv->of_match_table, dev);
-	return dev->of_match != NULL;
+	const struct of_device_id *match;
+
+	match = of_match_device(drv->of_match_table, dev);
+	if (match)
+		dev->of_match = match;
+	return match != NULL;
 }
 
 extern struct platform_device *of_dev_get(struct platform_device *dev);
-- 
cgit v1.2.3-71-gd317


From b1608d69cb804e414d0887140ba08a9398e4e638 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Wed, 18 May 2011 11:19:24 -0600
Subject: drivercore: revert addition of of_match to struct device

Commit b826291c, "drivercore/dt: add a match table pointer to struct
device" added an of_match pointer to struct device to cache the
of_match_table entry discovered at driver match time.  This was unsafe
because matching is not an atomic operation with probing a driver.  If
two or more drivers are attempted to be matched to a driver at the
same time, then the cached matching entry pointer could get
overwritten.

This patch reverts the of_match cache pointer and reworks all users to
call of_match_device() directly instead.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/powerpc/platforms/83xx/suspend.c      |  7 +++++--
 arch/powerpc/sysdev/fsl_msi.c              |  7 +++++--
 arch/sparc/kernel/pci_sabre.c              |  5 ++++-
 arch/sparc/kernel/pci_schizo.c             |  8 ++++++--
 drivers/atm/fore200e.c                     |  7 +++++--
 drivers/char/hw_random/n2-drv.c            |  7 +++++--
 drivers/char/ipmi/ipmi_si_intf.c           |  7 +++++--
 drivers/char/xilinx_hwicap/xilinx_hwicap.c | 14 +++++++++-----
 drivers/edac/ppc4xx_edac.c                 |  2 +-
 drivers/i2c/busses/i2c-mpc.c               |  9 ++++++---
 drivers/mmc/host/sdhci-of-core.c           |  7 +++++--
 drivers/mtd/maps/physmap_of.c              |  7 +++++--
 drivers/net/can/mscan/mpc5xxx_can.c        |  7 +++++--
 drivers/net/fs_enet/fs_enet-main.c         |  9 ++++++---
 drivers/net/fs_enet/mii-fec.c              |  7 +++++--
 drivers/net/sunhme.c                       |  7 +++++--
 drivers/scsi/qlogicpti.c                   |  7 +++++--
 drivers/tty/serial/of_serial.c             |  7 +++++--
 drivers/usb/gadget/fsl_qe_udc.c            |  7 +++++--
 drivers/watchdog/mpc8xxx_wdt.c             |  7 +++++--
 include/linux/device.h                     |  1 -
 include/linux/of_device.h                  | 12 ++++++------
 22 files changed, 108 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 188272934cfb..104faa8aa23c 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -318,17 +318,20 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = {
 	.end = mpc83xx_suspend_end,
 };
 
+static struct of_device_id pmc_match[];
 static int pmc_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct device_node *np = ofdev->dev.of_node;
 	struct resource res;
 	struct pmc_type *type;
 	int ret = 0;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(pmc_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
 
-	type = ofdev->dev.of_match->data;
+	type = match->data;
 
 	if (!of_device_is_available(np))
 		return -ENODEV;
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index d5679dc1e20f..01cd2f089512 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -304,8 +304,10 @@ static int __devinit fsl_msi_setup_hwirq(struct fsl_msi *msi,
 	return 0;
 }
 
+static const struct of_device_id fsl_of_msi_ids[];
 static int __devinit fsl_of_msi_probe(struct platform_device *dev)
 {
+	const struct of_device_id *match;
 	struct fsl_msi *msi;
 	struct resource res;
 	int err, i, j, irq_index, count;
@@ -316,9 +318,10 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev)
 	u32 offset;
 	static const u32 all_avail[] = { 0, NR_MSI_IRQS };
 
-	if (!dev->dev.of_match)
+	match = of_match_device(fsl_of_msi_ids, &dev->dev);
+	if (!match)
 		return -EINVAL;
-	features = dev->dev.of_match->data;
+	features = match->data;
 
 	printk(KERN_DEBUG "Setting up Freescale MSI support\n");
 
diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c
index 948068a083fc..d1840dbdaa2f 100644
--- a/arch/sparc/kernel/pci_sabre.c
+++ b/arch/sparc/kernel/pci_sabre.c
@@ -452,8 +452,10 @@ static void __devinit sabre_pbm_init(struct pci_pbm_info *pbm,
 	sabre_scan_bus(pbm, &op->dev);
 }
 
+static const struct of_device_id sabre_match[];
 static int __devinit sabre_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	const struct linux_prom64_registers *pr_regs;
 	struct device_node *dp = op->dev.of_node;
 	struct pci_pbm_info *pbm;
@@ -463,7 +465,8 @@ static int __devinit sabre_probe(struct platform_device *op)
 	const u32 *vdma;
 	u64 clear_irq;
 
-	hummingbird_p = op->dev.of_match && (op->dev.of_match->data != NULL);
+	match = of_match_device(sabre_match, &op->dev);
+	hummingbird_p = match && (match->data != NULL);
 	if (!hummingbird_p) {
 		struct device_node *cpu_dp;
 
diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c
index fecfcb2063c8..283fbc329a43 100644
--- a/arch/sparc/kernel/pci_schizo.c
+++ b/arch/sparc/kernel/pci_schizo.c
@@ -1458,11 +1458,15 @@ out_err:
 	return err;
 }
 
+static const struct of_device_id schizo_match[];
 static int __devinit schizo_probe(struct platform_device *op)
 {
-	if (!op->dev.of_match)
+	const struct of_device_id *match;
+
+	match = of_match_device(schizo_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	return __schizo_init(op, (unsigned long) op->dev.of_match->data);
+	return __schizo_init(op, (unsigned long)match->data);
 }
 
 /* The ordering of this table is very important.  Some Tomatillo
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index bdd2719f3f68..bc9e702186dd 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -2643,16 +2643,19 @@ fore200e_init(struct fore200e* fore200e, struct device *parent)
 }
 
 #ifdef CONFIG_SBUS
+static const struct of_device_id fore200e_sba_match[];
 static int __devinit fore200e_sba_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	const struct fore200e_bus *bus;
 	struct fore200e *fore200e;
 	static int index = 0;
 	int err;
 
-	if (!op->dev.of_match)
+	match = of_match_device(fore200e_sba_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	bus = op->dev.of_match->data;
+	bus = match->data;
 
 	fore200e = kzalloc(sizeof(struct fore200e), GFP_KERNEL);
 	if (!fore200e)
diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c
index 43ac61978d8b..ac6739e085e3 100644
--- a/drivers/char/hw_random/n2-drv.c
+++ b/drivers/char/hw_random/n2-drv.c
@@ -619,15 +619,18 @@ static void __devinit n2rng_driver_version(void)
 		pr_info("%s", version);
 }
 
+static const struct of_device_id n2rng_match[];
 static int __devinit n2rng_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	int victoria_falls;
 	int err = -ENOMEM;
 	struct n2rng *np;
 
-	if (!op->dev.of_match)
+	match = of_match_device(n2rng_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	victoria_falls = (op->dev.of_match->data != NULL);
+	victoria_falls = (match->data != NULL);
 
 	n2rng_driver_version();
 	np = kzalloc(sizeof(*np), GFP_KERNEL);
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index cc6c9b2546a3..64c6b8530615 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2554,9 +2554,11 @@ static struct pci_driver ipmi_pci_driver = {
 };
 #endif /* CONFIG_PCI */
 
+static struct of_device_id ipmi_match[];
 static int __devinit ipmi_probe(struct platform_device *dev)
 {
 #ifdef CONFIG_OF
+	const struct of_device_id *match;
 	struct smi_info *info;
 	struct resource resource;
 	const __be32 *regsize, *regspacing, *regshift;
@@ -2566,7 +2568,8 @@ static int __devinit ipmi_probe(struct platform_device *dev)
 
 	dev_info(&dev->dev, "probing via device tree\n");
 
-	if (!dev->dev.of_match)
+	match = of_match_device(ipmi_match, &dev->dev);
+	if (!match)
 		return -EINVAL;
 
 	ret = of_address_to_resource(np, 0, &resource);
@@ -2601,7 +2604,7 @@ static int __devinit ipmi_probe(struct platform_device *dev)
 		return -ENOMEM;
 	}
 
-	info->si_type		= (enum si_type) dev->dev.of_match->data;
+	info->si_type		= (enum si_type) match->data;
 	info->addr_source	= SI_DEVICETREE;
 	info->irq_setup		= std_irq_setup;
 
diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
index d6412c16385f..39ccdeada791 100644
--- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c
+++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
@@ -715,13 +715,13 @@ static int __devexit hwicap_remove(struct device *dev)
 }
 
 #ifdef CONFIG_OF
-static int __devinit hwicap_of_probe(struct platform_device *op)
+static int __devinit hwicap_of_probe(struct platform_device *op,
+				     const struct hwicap_driver_config *config)
 {
 	struct resource res;
 	const unsigned int *id;
 	const char *family;
 	int rc;
-	const struct hwicap_driver_config *config = op->dev.of_match->data;
 	const struct config_registers *regs;
 
 
@@ -751,20 +751,24 @@ static int __devinit hwicap_of_probe(struct platform_device *op)
 			regs);
 }
 #else
-static inline int hwicap_of_probe(struct platform_device *op)
+static inline int hwicap_of_probe(struct platform_device *op,
+				  const struct hwicap_driver_config *config)
 {
 	return -EINVAL;
 }
 #endif /* CONFIG_OF */
 
+static const struct of_device_id __devinitconst hwicap_of_match[];
 static int __devinit hwicap_drv_probe(struct platform_device *pdev)
 {
+	const struct of_device_id *match;
 	struct resource *res;
 	const struct config_registers *regs;
 	const char *family;
 
-	if (pdev->dev.of_match)
-		return hwicap_of_probe(pdev);
+	match = of_match_device(hwicap_of_match, &pdev->dev);
+	if (match)
+		return hwicap_of_probe(pdev, match->data);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res)
diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c
index c1f0045ceb8e..af8e7b1aa290 100644
--- a/drivers/edac/ppc4xx_edac.c
+++ b/drivers/edac/ppc4xx_edac.c
@@ -1019,7 +1019,7 @@ ppc4xx_edac_mc_init(struct mem_ctl_info *mci,
 	struct ppc4xx_edac_pdata *pdata = NULL;
 	const struct device_node *np = op->dev.of_node;
 
-	if (op->dev.of_match == NULL)
+	if (of_match_device(ppc4xx_edac_match, &op->dev) == NULL)
 		return -EINVAL;
 
 	/* Initial driver pointers and private data */
diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c
index 75b984c519ac..107397a606b4 100644
--- a/drivers/i2c/busses/i2c-mpc.c
+++ b/drivers/i2c/busses/i2c-mpc.c
@@ -560,15 +560,18 @@ static struct i2c_adapter mpc_ops = {
 	.timeout = HZ,
 };
 
+static const struct of_device_id mpc_i2c_of_match[];
 static int __devinit fsl_i2c_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	struct mpc_i2c *i2c;
 	const u32 *prop;
 	u32 clock = MPC_I2C_CLOCK_LEGACY;
 	int result = 0;
 	int plen;
 
-	if (!op->dev.of_match)
+	match = of_match_device(mpc_i2c_of_match, &op->dev);
+	if (!match)
 		return -EINVAL;
 
 	i2c = kzalloc(sizeof(*i2c), GFP_KERNEL);
@@ -605,8 +608,8 @@ static int __devinit fsl_i2c_probe(struct platform_device *op)
 			clock = *prop;
 	}
 
-	if (op->dev.of_match->data) {
-		struct mpc_i2c_data *data = op->dev.of_match->data;
+	if (match->data) {
+		struct mpc_i2c_data *data = match->data;
 		data->setup(op->dev.of_node, i2c, clock, data->prescaler);
 	} else {
 		/* Backwards compatibility */
diff --git a/drivers/mmc/host/sdhci-of-core.c b/drivers/mmc/host/sdhci-of-core.c
index f9b611fc773e..60e4186a4345 100644
--- a/drivers/mmc/host/sdhci-of-core.c
+++ b/drivers/mmc/host/sdhci-of-core.c
@@ -124,8 +124,10 @@ static bool __devinit sdhci_of_wp_inverted(struct device_node *np)
 #endif
 }
 
+static const struct of_device_id sdhci_of_match[];
 static int __devinit sdhci_of_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct device_node *np = ofdev->dev.of_node;
 	struct sdhci_of_data *sdhci_of_data;
 	struct sdhci_host *host;
@@ -134,9 +136,10 @@ static int __devinit sdhci_of_probe(struct platform_device *ofdev)
 	int size;
 	int ret;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(sdhci_of_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
-	sdhci_of_data = ofdev->dev.of_match->data;
+	sdhci_of_data = match->data;
 
 	if (!of_device_is_available(np))
 		return -ENODEV;
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index bd483f0c57e1..c1d33464aee8 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -214,11 +214,13 @@ static void __devinit of_free_probes(const char **probes)
 }
 #endif
 
+static struct of_device_id of_flash_match[];
 static int __devinit of_flash_probe(struct platform_device *dev)
 {
 #ifdef CONFIG_MTD_PARTITIONS
 	const char **part_probe_types;
 #endif
+	const struct of_device_id *match;
 	struct device_node *dp = dev->dev.of_node;
 	struct resource res;
 	struct of_flash *info;
@@ -232,9 +234,10 @@ static int __devinit of_flash_probe(struct platform_device *dev)
 	struct mtd_info **mtd_list = NULL;
 	resource_size_t res_size;
 
-	if (!dev->dev.of_match)
+	match = of_match_device(of_flash_match, &dev->dev);
+	if (!match)
 		return -EINVAL;
-	probe_type = dev->dev.of_match->data;
+	probe_type = match->data;
 
 	reg_tuple_size = (of_n_addr_cells(dp) + of_n_size_cells(dp)) * sizeof(u32);
 
diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c
index bd1d811c204f..5fedc3375562 100644
--- a/drivers/net/can/mscan/mpc5xxx_can.c
+++ b/drivers/net/can/mscan/mpc5xxx_can.c
@@ -247,8 +247,10 @@ static u32 __devinit mpc512x_can_get_clock(struct platform_device *ofdev,
 }
 #endif /* CONFIG_PPC_MPC512x */
 
+static struct of_device_id mpc5xxx_can_table[];
 static int __devinit mpc5xxx_can_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct mpc5xxx_can_data *data;
 	struct device_node *np = ofdev->dev.of_node;
 	struct net_device *dev;
@@ -258,9 +260,10 @@ static int __devinit mpc5xxx_can_probe(struct platform_device *ofdev)
 	int irq, mscan_clksrc = 0;
 	int err = -ENOMEM;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(mpc5xxx_can_table, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
-	data = (struct mpc5xxx_can_data *)ofdev->dev.of_match->data;
+	data = match->data;
 
 	base = of_iomap(np, 0);
 	if (!base) {
diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index 24cb953900dd..5131e61c358c 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -998,8 +998,10 @@ static const struct net_device_ops fs_enet_netdev_ops = {
 #endif
 };
 
+static struct of_device_id fs_enet_match[];
 static int __devinit fs_enet_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct net_device *ndev;
 	struct fs_enet_private *fep;
 	struct fs_platform_info *fpi;
@@ -1007,14 +1009,15 @@ static int __devinit fs_enet_probe(struct platform_device *ofdev)
 	const u8 *mac_addr;
 	int privsize, len, ret = -ENODEV;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(fs_enet_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
 
 	fpi = kzalloc(sizeof(*fpi), GFP_KERNEL);
 	if (!fpi)
 		return -ENOMEM;
 
-	if (!IS_FEC(ofdev->dev.of_match)) {
+	if (!IS_FEC(match)) {
 		data = of_get_property(ofdev->dev.of_node, "fsl,cpm-command", &len);
 		if (!data || len != 4)
 			goto out_free_fpi;
@@ -1049,7 +1052,7 @@ static int __devinit fs_enet_probe(struct platform_device *ofdev)
 	fep->dev = &ofdev->dev;
 	fep->ndev = ndev;
 	fep->fpi = fpi;
-	fep->ops = ofdev->dev.of_match->data;
+	fep->ops = match->data;
 
 	ret = fep->ops->setup_data(ndev);
 	if (ret)
diff --git a/drivers/net/fs_enet/mii-fec.c b/drivers/net/fs_enet/mii-fec.c
index 7e840d373ab3..6a2e150e75bb 100644
--- a/drivers/net/fs_enet/mii-fec.c
+++ b/drivers/net/fs_enet/mii-fec.c
@@ -101,17 +101,20 @@ static int fs_enet_fec_mii_reset(struct mii_bus *bus)
 	return 0;
 }
 
+static struct of_device_id fs_enet_mdio_fec_match[];
 static int __devinit fs_enet_mdio_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct resource res;
 	struct mii_bus *new_bus;
 	struct fec_info *fec;
 	int (*get_bus_freq)(struct device_node *);
 	int ret = -ENOMEM, clock, speed;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(fs_enet_mdio_fec_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
-	get_bus_freq = ofdev->dev.of_match->data;
+	get_bus_freq = match->data;
 
 	new_bus = mdiobus_alloc();
 	if (!new_bus)
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index eb4f59fb01e9..bff2f7999ff0 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -3237,15 +3237,18 @@ static void happy_meal_pci_exit(void)
 #endif
 
 #ifdef CONFIG_SBUS
+static const struct of_device_id hme_sbus_match[];
 static int __devinit hme_sbus_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	struct device_node *dp = op->dev.of_node;
 	const char *model = of_get_property(dp, "model", NULL);
 	int is_qfe;
 
-	if (!op->dev.of_match)
+	match = of_match_device(hme_sbus_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	is_qfe = (op->dev.of_match->data != NULL);
+	is_qfe = (match->data != NULL);
 
 	if (!is_qfe && model && !strcmp(model, "SUNW,sbus-qfe"))
 		is_qfe = 1;
diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c
index e2d45c91b8e8..9689d41c7888 100644
--- a/drivers/scsi/qlogicpti.c
+++ b/drivers/scsi/qlogicpti.c
@@ -1292,8 +1292,10 @@ static struct scsi_host_template qpti_template = {
 	.use_clustering		= ENABLE_CLUSTERING,
 };
 
+static const struct of_device_id qpti_match[];
 static int __devinit qpti_sbus_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	struct scsi_host_template *tpnt;
 	struct device_node *dp = op->dev.of_node;
 	struct Scsi_Host *host;
@@ -1301,9 +1303,10 @@ static int __devinit qpti_sbus_probe(struct platform_device *op)
 	static int nqptis;
 	const char *fcode;
 
-	if (!op->dev.of_match)
+	match = of_match_device(qpti_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	tpnt = op->dev.of_match->data;
+	tpnt = match->data;
 
 	/* Sometimes Antares cards come up not completely
 	 * setup, and we get a report of a zero IRQ.
diff --git a/drivers/tty/serial/of_serial.c b/drivers/tty/serial/of_serial.c
index 0e8eec516df4..c911b2419abb 100644
--- a/drivers/tty/serial/of_serial.c
+++ b/drivers/tty/serial/of_serial.c
@@ -80,14 +80,17 @@ static int __devinit of_platform_serial_setup(struct platform_device *ofdev,
 /*
  * Try to register a serial port
  */
+static struct of_device_id of_platform_serial_table[];
 static int __devinit of_platform_serial_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct of_serial_info *info;
 	struct uart_port port;
 	int port_type;
 	int ret;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(of_platform_serial_table, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
 
 	if (of_find_property(ofdev->dev.of_node, "used-by-rtas", NULL))
@@ -97,7 +100,7 @@ static int __devinit of_platform_serial_probe(struct platform_device *ofdev)
 	if (info == NULL)
 		return -ENOMEM;
 
-	port_type = (unsigned long)ofdev->dev.of_match->data;
+	port_type = (unsigned long)match->data;
 	ret = of_platform_serial_setup(ofdev, port_type, &port);
 	if (ret)
 		goto out;
diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c
index 36613b37c504..3a68e09309f7 100644
--- a/drivers/usb/gadget/fsl_qe_udc.c
+++ b/drivers/usb/gadget/fsl_qe_udc.c
@@ -2539,15 +2539,18 @@ static void qe_udc_release(struct device *dev)
 }
 
 /* Driver probe functions */
+static const struct of_device_id qe_udc_match[];
 static int __devinit qe_udc_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct device_node *np = ofdev->dev.of_node;
 	struct qe_ep *ep;
 	unsigned int ret = 0;
 	unsigned int i;
 	const void *prop;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(qe_udc_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
 
 	prop = of_get_property(np, "mode", NULL);
@@ -2561,7 +2564,7 @@ static int __devinit qe_udc_probe(struct platform_device *ofdev)
 		return -ENOMEM;
 	}
 
-	udc_controller->soc_type = (unsigned long)ofdev->dev.of_match->data;
+	udc_controller->soc_type = (unsigned long)match->data;
 	udc_controller->usb_regs = of_iomap(np, 0);
 	if (!udc_controller->usb_regs) {
 		ret = -ENOMEM;
diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c
index 528bceb220fd..eed5436ffb51 100644
--- a/drivers/watchdog/mpc8xxx_wdt.c
+++ b/drivers/watchdog/mpc8xxx_wdt.c
@@ -185,17 +185,20 @@ static struct miscdevice mpc8xxx_wdt_miscdev = {
 	.fops	= &mpc8xxx_wdt_fops,
 };
 
+static const struct of_device_id mpc8xxx_wdt_match[];
 static int __devinit mpc8xxx_wdt_probe(struct platform_device *ofdev)
 {
 	int ret;
+	const struct of_device_id *match;
 	struct device_node *np = ofdev->dev.of_node;
 	struct mpc8xxx_wdt_type *wdt_type;
 	u32 freq = fsl_get_sys_freq();
 	bool enabled;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(mpc8xxx_wdt_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
-	wdt_type = ofdev->dev.of_match->data;
+	wdt_type = match->data;
 
 	if (!freq || freq == -1)
 		return -EINVAL;
diff --git a/include/linux/device.h b/include/linux/device.h
index ab8dfc095709..d08399db6e2c 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -442,7 +442,6 @@ struct device {
 	struct dev_archdata	archdata;
 
 	struct device_node	*of_node; /* associated device tree node */
-	const struct of_device_id *of_match; /* matching of_device_id from driver */
 
 	dev_t			devt;	/* dev_t, creates the sysfs "dev" */
 
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index b33d68814a73..ae5638480ef2 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -21,12 +21,7 @@ extern void of_device_make_bus_id(struct device *dev);
 static inline int of_driver_match_device(struct device *dev,
 					 const struct device_driver *drv)
 {
-	const struct of_device_id *match;
-
-	match = of_match_device(drv->of_match_table, dev);
-	if (match)
-		dev->of_match = match;
-	return match != NULL;
+	return of_match_device(drv->of_match_table, dev) != NULL;
 }
 
 extern struct platform_device *of_dev_get(struct platform_device *dev);
@@ -62,6 +57,11 @@ static inline int of_device_uevent(struct device *dev,
 
 static inline void of_device_node_put(struct device *dev) { }
 
+static inline const struct of_device_id *of_match_device(
+		const struct of_device_id *matches, const struct device *dev)
+{
+	return NULL;
+}
 #endif /* CONFIG_OF_DEVICE */
 
 #endif /* _LINUX_OF_DEVICE_H */
-- 
cgit v1.2.3-71-gd317


From b448c4e3ae6d20108dba1d7833f2c0d3dbad87ce Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 29 Apr 2011 15:12:32 -0400
Subject: ftrace: Replace FTRACE_FL_NOTRACE flag with a hash of ignored
 functions

To prepare for the accounting system that will allow multiple users of
the function tracer, having the FTRACE_FL_NOTRACE as a flag in the
dyn_trace record does not make sense.

All ftrace_ops will soon have a hash of functions they should trace
and not trace. By making a global hash of functions not to trace makes
this easier for the transition.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |   1 -
 kernel/trace/ftrace.c  | 176 +++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 150 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 32047449b309..fe0a90a09243 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -149,7 +149,6 @@ enum {
 	FTRACE_FL_FREE		= (1 << 0),
 	FTRACE_FL_FILTER	= (1 << 1),
 	FTRACE_FL_ENABLED	= (1 << 2),
-	FTRACE_FL_NOTRACE	= (1 << 3),
 };
 
 struct dyn_ftrace {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index d3406346ced6..04c002a491fb 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -57,6 +57,7 @@
 /* hash bits for specific function selection */
 #define FTRACE_HASH_BITS 7
 #define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS)
+#define FTRACE_HASH_MAX_BITS 10
 
 /* ftrace_enabled is a method to turn ftrace on or off */
 int ftrace_enabled __read_mostly;
@@ -865,6 +866,22 @@ enum {
 	FTRACE_START_FUNC_RET		= (1 << 3),
 	FTRACE_STOP_FUNC_RET		= (1 << 4),
 };
+struct ftrace_func_entry {
+	struct hlist_node hlist;
+	unsigned long ip;
+};
+
+struct ftrace_hash {
+	unsigned long		size_bits;
+	struct hlist_head	*buckets;
+	unsigned long		count;
+};
+
+static struct hlist_head notrace_buckets[1 << FTRACE_HASH_MAX_BITS];
+static struct ftrace_hash notrace_hash = {
+	.size_bits = FTRACE_HASH_MAX_BITS,
+	.buckets = notrace_buckets,
+};
 
 static int ftrace_filtered;
 
@@ -889,6 +906,79 @@ static struct ftrace_page	*ftrace_pages;
 
 static struct dyn_ftrace *ftrace_free_records;
 
+static struct ftrace_func_entry *
+ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
+{
+	unsigned long key;
+	struct ftrace_func_entry *entry;
+	struct hlist_head *hhd;
+	struct hlist_node *n;
+
+	if (!hash->count)
+		return NULL;
+
+	if (hash->size_bits > 0)
+		key = hash_long(ip, hash->size_bits);
+	else
+		key = 0;
+
+	hhd = &hash->buckets[key];
+
+	hlist_for_each_entry_rcu(entry, n, hhd, hlist) {
+		if (entry->ip == ip)
+			return entry;
+	}
+	return NULL;
+}
+
+static int add_hash_entry(struct ftrace_hash *hash, unsigned long ip)
+{
+	struct ftrace_func_entry *entry;
+	struct hlist_head *hhd;
+	unsigned long key;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	if (hash->size_bits)
+		key = hash_long(ip, hash->size_bits);
+	else
+		key = 0;
+
+	entry->ip = ip;
+	hhd = &hash->buckets[key];
+	hlist_add_head(&entry->hlist, hhd);
+	hash->count++;
+
+	return 0;
+}
+
+static void
+remove_hash_entry(struct ftrace_hash *hash,
+		  struct ftrace_func_entry *entry)
+{
+	hlist_del(&entry->hlist);
+	kfree(entry);
+	hash->count--;
+}
+
+static void ftrace_hash_clear(struct ftrace_hash *hash)
+{
+	struct hlist_head *hhd;
+	struct hlist_node *tp, *tn;
+	struct ftrace_func_entry *entry;
+	int size = 1 << hash->size_bits;
+	int i;
+
+	for (i = 0; i < size; i++) {
+		hhd = &hash->buckets[i];
+		hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist)
+			remove_hash_entry(hash, entry);
+	}
+	FTRACE_WARN_ON(hash->count);
+}
+
 /*
  * This is a double for. Do not use 'break' to break out of the loop,
  * you must use a goto.
@@ -1032,7 +1122,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 	 * If we want to enable it and filtering is on, enable it only if
 	 * it's filtered
 	 */
-	if (enable && !(rec->flags & FTRACE_FL_NOTRACE)) {
+	if (enable && !ftrace_lookup_ip(&notrace_hash, rec->ip)) {
 		if (!ftrace_filtered || (rec->flags & FTRACE_FL_FILTER))
 			flag = FTRACE_FL_ENABLED;
 	}
@@ -1465,7 +1555,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 		     !(rec->flags & FTRACE_FL_FILTER)) ||
 
 		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
-		     !(rec->flags & FTRACE_FL_NOTRACE))) {
+		     !ftrace_lookup_ip(&notrace_hash, rec->ip))) {
 			rec = NULL;
 			goto retry;
 		}
@@ -1609,14 +1699,15 @@ static void ftrace_filter_reset(int enable)
 {
 	struct ftrace_page *pg;
 	struct dyn_ftrace *rec;
-	unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
 
 	mutex_lock(&ftrace_lock);
-	if (enable)
+	if (enable) {
 		ftrace_filtered = 0;
-	do_for_each_ftrace_rec(pg, rec) {
-		rec->flags &= ~type;
-	} while_for_each_ftrace_rec();
+		do_for_each_ftrace_rec(pg, rec) {
+			rec->flags &= ~FTRACE_FL_FILTER;
+		} while_for_each_ftrace_rec();
+	} else
+		ftrace_hash_clear(&notrace_hash);
 	mutex_unlock(&ftrace_lock);
 }
 
@@ -1716,13 +1807,36 @@ static int ftrace_match(char *str, char *regex, int len, int type)
 	return matched;
 }
 
-static void
-update_record(struct dyn_ftrace *rec, unsigned long flag, int not)
+static int
+update_record(struct dyn_ftrace *rec, int enable, int not)
 {
-	if (not)
-		rec->flags &= ~flag;
-	else
-		rec->flags |= flag;
+	struct ftrace_func_entry *entry;
+	struct ftrace_hash *hash = &notrace_hash;
+	int ret = 0;
+
+	if (enable) {
+		if (not)
+			rec->flags &= ~FTRACE_FL_FILTER;
+		else
+			rec->flags |= FTRACE_FL_FILTER;
+	} else {
+		if (not) {
+			/* Do nothing if it doesn't exist */
+			entry = ftrace_lookup_ip(hash, rec->ip);
+			if (!entry)
+				return 0;
+
+			remove_hash_entry(hash, entry);
+		} else {
+			/* Do nothing if it exists */
+			entry = ftrace_lookup_ip(hash, rec->ip);
+			if (entry)
+				return 0;
+
+			ret = add_hash_entry(hash, rec->ip);
+		}
+	}
+	return ret;
 }
 
 static int
@@ -1754,16 +1868,14 @@ static int match_records(char *buff, int len, char *mod, int enable, int not)
 	struct dyn_ftrace *rec;
 	int type = MATCH_FULL;
 	char *search = buff;
-	unsigned long flag;
 	int found = 0;
+	int ret;
 
 	if (len) {
 		type = filter_parse_regex(buff, len, &search, &not);
 		search_len = strlen(search);
 	}
 
-	flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
-
 	mutex_lock(&ftrace_lock);
 
 	if (unlikely(ftrace_disabled))
@@ -1772,7 +1884,11 @@ static int match_records(char *buff, int len, char *mod, int enable, int not)
 	do_for_each_ftrace_rec(pg, rec) {
 
 		if (ftrace_match_record(rec, mod, search, search_len, type)) {
-			update_record(rec, flag, not);
+			ret = update_record(rec, enable, not);
+			if (ret < 0) {
+				found = ret;
+				goto out_unlock;
+			}
 			found = 1;
 		}
 		/*
@@ -1821,6 +1937,7 @@ static int
 ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
 {
 	char *mod;
+	int ret = -EINVAL;
 
 	/*
 	 * cmd == 'mod' because we only registered this func
@@ -1832,15 +1949,19 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
 
 	/* we must have a module name */
 	if (!param)
-		return -EINVAL;
+		return ret;
 
 	mod = strsep(&param, ":");
 	if (!strlen(mod))
-		return -EINVAL;
+		return ret;
 
-	if (ftrace_match_module_records(func, mod, enable))
-		return 0;
-	return -EINVAL;
+	ret = ftrace_match_module_records(func, mod, enable);
+	if (!ret)
+		ret = -EINVAL;
+	if (ret < 0)
+		return ret;
+
+	return 0;
 }
 
 static struct ftrace_func_command ftrace_mod_cmd = {
@@ -2132,14 +2253,17 @@ static int ftrace_process_regex(char *buff, int len, int enable)
 {
 	char *func, *command, *next = buff;
 	struct ftrace_func_command *p;
-	int ret = -EINVAL;
+	int ret;
 
 	func = strsep(&next, ":");
 
 	if (!next) {
-		if (ftrace_match_records(func, len, enable))
-			return 0;
-		return ret;
+		ret = ftrace_match_records(func, len, enable);
+		if (!ret)
+			ret = -EINVAL;
+		if (ret < 0)
+			return ret;
+		return 0;
 	}
 
 	/* command found */
-- 
cgit v1.2.3-71-gd317


From 1cf41dd79993389b012e4542ab502ce36ae7343f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 29 Apr 2011 20:59:51 -0400
Subject: ftrace: Use hash instead for FTRACE_FL_FILTER

When multiple users are allowed to have their own set of functions
to trace, having the FTRACE_FL_FILTER flag will not be enough to
handle the accounting of those users. Each user will need their own
set of functions.

Replace the FTRACE_FL_FILTER with a filter_hash instead. This is
temporary until the rest of the function filtering accounting
gets in.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |   3 +-
 kernel/trace/ftrace.c  | 151 ++++++++++++++++++++++---------------------------
 2 files changed, 70 insertions(+), 84 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fe0a90a09243..52fc5d4e6edd 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -147,8 +147,7 @@ extern int ftrace_text_reserved(void *start, void *end);
 
 enum {
 	FTRACE_FL_FREE		= (1 << 0),
-	FTRACE_FL_FILTER	= (1 << 1),
-	FTRACE_FL_ENABLED	= (1 << 2),
+	FTRACE_FL_ENABLED	= (1 << 1),
 };
 
 struct dyn_ftrace {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 04c002a491fb..222eca4c3022 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -883,7 +883,11 @@ static struct ftrace_hash notrace_hash = {
 	.buckets = notrace_buckets,
 };
 
-static int ftrace_filtered;
+static struct hlist_head filter_buckets[1 << FTRACE_HASH_MAX_BITS];
+static struct ftrace_hash filter_hash = {
+	.size_bits = FTRACE_HASH_MAX_BITS,
+	.buckets = filter_buckets,
+};
 
 static struct dyn_ftrace *ftrace_new_addrs;
 
@@ -1123,7 +1127,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 	 * it's filtered
 	 */
 	if (enable && !ftrace_lookup_ip(&notrace_hash, rec->ip)) {
-		if (!ftrace_filtered || (rec->flags & FTRACE_FL_FILTER))
+		if (!filter_hash.count || ftrace_lookup_ip(&filter_hash, rec->ip))
 			flag = FTRACE_FL_ENABLED;
 	}
 
@@ -1430,6 +1434,7 @@ struct ftrace_iterator {
 	struct dyn_ftrace		*func;
 	struct ftrace_func_probe	*probe;
 	struct trace_parser		parser;
+	struct ftrace_hash		*hash;
 	int				hidx;
 	int				idx;
 	unsigned			flags;
@@ -1552,7 +1557,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 		if ((rec->flags & FTRACE_FL_FREE) ||
 
 		    ((iter->flags & FTRACE_ITER_FILTER) &&
-		     !(rec->flags & FTRACE_FL_FILTER)) ||
+		     !(ftrace_lookup_ip(&filter_hash, rec->ip))) ||
 
 		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
 		     !ftrace_lookup_ip(&notrace_hash, rec->ip))) {
@@ -1598,7 +1603,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 	 * off, we can short cut and just print out that all
 	 * functions are enabled.
 	 */
-	if (iter->flags & FTRACE_ITER_FILTER && !ftrace_filtered) {
+	if (iter->flags & FTRACE_ITER_FILTER && !filter_hash.count) {
 		if (*pos > 0)
 			return t_hash_start(m, pos);
 		iter->flags |= FTRACE_ITER_PRINTALL;
@@ -1695,24 +1700,16 @@ ftrace_avail_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
-static void ftrace_filter_reset(int enable)
+static void ftrace_filter_reset(struct ftrace_hash *hash)
 {
-	struct ftrace_page *pg;
-	struct dyn_ftrace *rec;
-
 	mutex_lock(&ftrace_lock);
-	if (enable) {
-		ftrace_filtered = 0;
-		do_for_each_ftrace_rec(pg, rec) {
-			rec->flags &= ~FTRACE_FL_FILTER;
-		} while_for_each_ftrace_rec();
-	} else
-		ftrace_hash_clear(&notrace_hash);
+	ftrace_hash_clear(hash);
 	mutex_unlock(&ftrace_lock);
 }
 
 static int
-ftrace_regex_open(struct inode *inode, struct file *file, int enable)
+ftrace_regex_open(struct ftrace_hash *hash, int flag,
+		  struct inode *inode, struct file *file)
 {
 	struct ftrace_iterator *iter;
 	int ret = 0;
@@ -1729,15 +1726,16 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
 		return -ENOMEM;
 	}
 
+	iter->hash = hash;
+
 	mutex_lock(&ftrace_regex_lock);
 	if ((file->f_mode & FMODE_WRITE) &&
 	    (file->f_flags & O_TRUNC))
-		ftrace_filter_reset(enable);
+		ftrace_filter_reset(hash);
 
 	if (file->f_mode & FMODE_READ) {
 		iter->pg = ftrace_pages_start;
-		iter->flags = enable ? FTRACE_ITER_FILTER :
-			FTRACE_ITER_NOTRACE;
+		iter->flags = flag;
 
 		ret = seq_open(file, &show_ftrace_seq_ops);
 		if (!ret) {
@@ -1757,13 +1755,15 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
 static int
 ftrace_filter_open(struct inode *inode, struct file *file)
 {
-	return ftrace_regex_open(inode, file, 1);
+	return ftrace_regex_open(&filter_hash, FTRACE_ITER_FILTER,
+				 inode, file);
 }
 
 static int
 ftrace_notrace_open(struct inode *inode, struct file *file)
 {
-	return ftrace_regex_open(inode, file, 0);
+	return ftrace_regex_open(&notrace_hash, FTRACE_ITER_NOTRACE,
+				 inode, file);
 }
 
 static loff_t
@@ -1808,33 +1808,24 @@ static int ftrace_match(char *str, char *regex, int len, int type)
 }
 
 static int
-update_record(struct dyn_ftrace *rec, int enable, int not)
+enter_record(struct ftrace_hash *hash, struct dyn_ftrace *rec, int not)
 {
 	struct ftrace_func_entry *entry;
-	struct ftrace_hash *hash = &notrace_hash;
 	int ret = 0;
 
-	if (enable) {
-		if (not)
-			rec->flags &= ~FTRACE_FL_FILTER;
-		else
-			rec->flags |= FTRACE_FL_FILTER;
-	} else {
-		if (not) {
-			/* Do nothing if it doesn't exist */
-			entry = ftrace_lookup_ip(hash, rec->ip);
-			if (!entry)
-				return 0;
+	entry = ftrace_lookup_ip(hash, rec->ip);
+	if (not) {
+		/* Do nothing if it doesn't exist */
+		if (!entry)
+			return 0;
 
-			remove_hash_entry(hash, entry);
-		} else {
-			/* Do nothing if it exists */
-			entry = ftrace_lookup_ip(hash, rec->ip);
-			if (entry)
-				return 0;
+		remove_hash_entry(hash, entry);
+	} else {
+		/* Do nothing if it exists */
+		if (entry)
+			return 0;
 
-			ret = add_hash_entry(hash, rec->ip);
-		}
+		ret = add_hash_entry(hash, rec->ip);
 	}
 	return ret;
 }
@@ -1861,7 +1852,9 @@ ftrace_match_record(struct dyn_ftrace *rec, char *mod,
 	return ftrace_match(str, regex, len, type);
 }
 
-static int match_records(char *buff, int len, char *mod, int enable, int not)
+static int
+match_records(struct ftrace_hash *hash, char *buff,
+	      int len, char *mod, int not)
 {
 	unsigned search_len = 0;
 	struct ftrace_page *pg;
@@ -1884,20 +1877,13 @@ static int match_records(char *buff, int len, char *mod, int enable, int not)
 	do_for_each_ftrace_rec(pg, rec) {
 
 		if (ftrace_match_record(rec, mod, search, search_len, type)) {
-			ret = update_record(rec, enable, not);
+			ret = enter_record(hash, rec, not);
 			if (ret < 0) {
 				found = ret;
 				goto out_unlock;
 			}
 			found = 1;
 		}
-		/*
-		 * Only enable filtering if we have a function that
-		 * is filtered on.
-		 */
-		if (enable && (rec->flags & FTRACE_FL_FILTER))
-			ftrace_filtered = 1;
-
 	} while_for_each_ftrace_rec();
  out_unlock:
 	mutex_unlock(&ftrace_lock);
@@ -1906,12 +1892,13 @@ static int match_records(char *buff, int len, char *mod, int enable, int not)
 }
 
 static int
-ftrace_match_records(char *buff, int len, int enable)
+ftrace_match_records(struct ftrace_hash *hash, char *buff, int len)
 {
-	return match_records(buff, len, NULL, enable, 0);
+	return match_records(hash, buff, len, NULL, 0);
 }
 
-static int ftrace_match_module_records(char *buff, char *mod, int enable)
+static int
+ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod)
 {
 	int not = 0;
 
@@ -1925,7 +1912,7 @@ static int ftrace_match_module_records(char *buff, char *mod, int enable)
 		not = 1;
 	}
 
-	return match_records(buff, strlen(buff), mod, enable, not);
+	return match_records(hash, buff, strlen(buff), mod, not);
 }
 
 /*
@@ -1936,6 +1923,7 @@ static int ftrace_match_module_records(char *buff, char *mod, int enable)
 static int
 ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
 {
+	struct ftrace_hash *hash;
 	char *mod;
 	int ret = -EINVAL;
 
@@ -1955,7 +1943,12 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
 	if (!strlen(mod))
 		return ret;
 
-	ret = ftrace_match_module_records(func, mod, enable);
+	if (enable)
+		hash = &filter_hash;
+	else
+		hash = &notrace_hash;
+
+	ret = ftrace_match_module_records(hash, func, mod);
 	if (!ret)
 		ret = -EINVAL;
 	if (ret < 0)
@@ -2253,12 +2246,18 @@ static int ftrace_process_regex(char *buff, int len, int enable)
 {
 	char *func, *command, *next = buff;
 	struct ftrace_func_command *p;
+	struct ftrace_hash *hash;
 	int ret;
 
+	if (enable)
+		hash = &filter_hash;
+	else
+		hash = &notrace_hash;
+
 	func = strsep(&next, ":");
 
 	if (!next) {
-		ret = ftrace_match_records(func, len, enable);
+		ret = ftrace_match_records(hash, func, len);
 		if (!ret)
 			ret = -EINVAL;
 		if (ret < 0)
@@ -2340,16 +2339,16 @@ ftrace_notrace_write(struct file *file, const char __user *ubuf,
 }
 
 static void
-ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
+ftrace_set_regex(struct ftrace_hash *hash, unsigned char *buf, int len, int reset)
 {
 	if (unlikely(ftrace_disabled))
 		return;
 
 	mutex_lock(&ftrace_regex_lock);
 	if (reset)
-		ftrace_filter_reset(enable);
+		ftrace_filter_reset(hash);
 	if (buf)
-		ftrace_match_records(buf, len, enable);
+		ftrace_match_records(hash, buf, len);
 	mutex_unlock(&ftrace_regex_lock);
 }
 
@@ -2364,7 +2363,7 @@ ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
  */
 void ftrace_set_filter(unsigned char *buf, int len, int reset)
 {
-	ftrace_set_regex(buf, len, reset, 1);
+	ftrace_set_regex(&filter_hash, buf, len, reset);
 }
 
 /**
@@ -2379,7 +2378,7 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset)
  */
 void ftrace_set_notrace(unsigned char *buf, int len, int reset)
 {
-	ftrace_set_regex(buf, len, reset, 0);
+	ftrace_set_regex(&notrace_hash, buf, len, reset);
 }
 
 /*
@@ -2431,22 +2430,22 @@ static void __init set_ftrace_early_graph(char *buf)
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
-static void __init set_ftrace_early_filter(char *buf, int enable)
+static void __init set_ftrace_early_filter(struct ftrace_hash *hash, char *buf)
 {
 	char *func;
 
 	while (buf) {
 		func = strsep(&buf, ",");
-		ftrace_set_regex(func, strlen(func), 0, enable);
+		ftrace_set_regex(hash, func, strlen(func), 0);
 	}
 }
 
 static void __init set_ftrace_early_filters(void)
 {
 	if (ftrace_filter_buf[0])
-		set_ftrace_early_filter(ftrace_filter_buf, 1);
+		set_ftrace_early_filter(&filter_hash, ftrace_filter_buf);
 	if (ftrace_notrace_buf[0])
-		set_ftrace_early_filter(ftrace_notrace_buf, 0);
+		set_ftrace_early_filter(&notrace_hash, ftrace_notrace_buf);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	if (ftrace_graph_buf[0])
 		set_ftrace_early_graph(ftrace_graph_buf);
@@ -2454,7 +2453,7 @@ static void __init set_ftrace_early_filters(void)
 }
 
 static int
-ftrace_regex_release(struct inode *inode, struct file *file, int enable)
+ftrace_regex_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = (struct seq_file *)file->private_data;
 	struct ftrace_iterator *iter;
@@ -2471,7 +2470,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
 	parser = &iter->parser;
 	if (trace_parser_loaded(parser)) {
 		parser->buffer[parser->idx] = 0;
-		ftrace_match_records(parser->buffer, parser->idx, enable);
+		ftrace_match_records(iter->hash, parser->buffer, parser->idx);
 	}
 
 	trace_parser_put(parser);
@@ -2488,18 +2487,6 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
 	return 0;
 }
 
-static int
-ftrace_filter_release(struct inode *inode, struct file *file)
-{
-	return ftrace_regex_release(inode, file, 1);
-}
-
-static int
-ftrace_notrace_release(struct inode *inode, struct file *file)
-{
-	return ftrace_regex_release(inode, file, 0);
-}
-
 static const struct file_operations ftrace_avail_fops = {
 	.open = ftrace_avail_open,
 	.read = seq_read,
@@ -2512,7 +2499,7 @@ static const struct file_operations ftrace_filter_fops = {
 	.read = seq_read,
 	.write = ftrace_filter_write,
 	.llseek = ftrace_regex_lseek,
-	.release = ftrace_filter_release,
+	.release = ftrace_regex_release,
 };
 
 static const struct file_operations ftrace_notrace_fops = {
@@ -2520,7 +2507,7 @@ static const struct file_operations ftrace_notrace_fops = {
 	.read = seq_read,
 	.write = ftrace_notrace_write,
 	.llseek = ftrace_regex_lseek,
-	.release = ftrace_notrace_release,
+	.release = ftrace_regex_release,
 };
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-- 
cgit v1.2.3-71-gd317


From f45948e898e7bc76a73a468796d2ce80dd040058 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 2 May 2011 12:29:25 -0400
Subject: ftrace: Create a global_ops to hold the filter and notrace hashes

Combine the filter and notrace hashes to be accessed by a single entity,
the global_ops. The global_ops is a ftrace_ops structure that is passed
to different functions that can read or modify the filtering of the
function tracer.

The ftrace_ops structure was modified to hold a filter and notrace
hashes so that later patches may allow each ftrace_ops to have its own
set of rules to what functions may be filtered.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 10 ++++++--
 kernel/trace/ftrace.c  | 65 +++++++++++++++++++++++++++++++++++---------------
 2 files changed, 54 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 52fc5d4e6edd..6658a51390fe 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -29,9 +29,15 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
 
+struct ftrace_hash;
+
 struct ftrace_ops {
-	ftrace_func_t	  func;
-	struct ftrace_ops *next;
+	ftrace_func_t			func;
+	struct ftrace_ops		*next;
+#ifdef CONFIG_DYNAMIC_FTRACE
+	struct ftrace_hash		*notrace_hash;
+	struct ftrace_hash		*filter_hash;
+#endif
 };
 
 extern int function_trace_stop;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 222eca4c3022..a517a6c40645 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -889,6 +889,12 @@ static struct ftrace_hash filter_hash = {
 	.buckets = filter_buckets,
 };
 
+struct ftrace_ops global_ops = {
+	.func			= ftrace_stub,
+	.notrace_hash		= &notrace_hash,
+	.filter_hash		= &filter_hash,
+};
+
 static struct dyn_ftrace *ftrace_new_addrs;
 
 static DEFINE_MUTEX(ftrace_regex_lock);
@@ -1112,6 +1118,7 @@ int ftrace_text_reserved(void *start, void *end)
 static int
 __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 {
+	struct ftrace_ops *ops = &global_ops;
 	unsigned long ftrace_addr;
 	unsigned long flag = 0UL;
 
@@ -1126,8 +1133,9 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 	 * If we want to enable it and filtering is on, enable it only if
 	 * it's filtered
 	 */
-	if (enable && !ftrace_lookup_ip(&notrace_hash, rec->ip)) {
-		if (!filter_hash.count || ftrace_lookup_ip(&filter_hash, rec->ip))
+	if (enable && !ftrace_lookup_ip(ops->notrace_hash, rec->ip)) {
+		if (!ops->filter_hash->count ||
+		    ftrace_lookup_ip(ops->filter_hash, rec->ip))
 			flag = FTRACE_FL_ENABLED;
 	}
 
@@ -1531,6 +1539,7 @@ static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct ftrace_iterator *iter = m->private;
+	struct ftrace_ops *ops = &global_ops;
 	struct dyn_ftrace *rec = NULL;
 
 	if (unlikely(ftrace_disabled))
@@ -1557,10 +1566,10 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 		if ((rec->flags & FTRACE_FL_FREE) ||
 
 		    ((iter->flags & FTRACE_ITER_FILTER) &&
-		     !(ftrace_lookup_ip(&filter_hash, rec->ip))) ||
+		     !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) ||
 
 		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
-		     !ftrace_lookup_ip(&notrace_hash, rec->ip))) {
+		     !ftrace_lookup_ip(ops->notrace_hash, rec->ip))) {
 			rec = NULL;
 			goto retry;
 		}
@@ -1584,6 +1593,7 @@ static void reset_iter_read(struct ftrace_iterator *iter)
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
 	struct ftrace_iterator *iter = m->private;
+	struct ftrace_ops *ops = &global_ops;
 	void *p = NULL;
 	loff_t l;
 
@@ -1603,7 +1613,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 	 * off, we can short cut and just print out that all
 	 * functions are enabled.
 	 */
-	if (iter->flags & FTRACE_ITER_FILTER && !filter_hash.count) {
+	if (iter->flags & FTRACE_ITER_FILTER && !ops->filter_hash->count) {
 		if (*pos > 0)
 			return t_hash_start(m, pos);
 		iter->flags |= FTRACE_ITER_PRINTALL;
@@ -1708,10 +1718,11 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)
 }
 
 static int
-ftrace_regex_open(struct ftrace_hash *hash, int flag,
+ftrace_regex_open(struct ftrace_ops *ops, int flag,
 		  struct inode *inode, struct file *file)
 {
 	struct ftrace_iterator *iter;
+	struct ftrace_hash *hash;
 	int ret = 0;
 
 	if (unlikely(ftrace_disabled))
@@ -1726,6 +1737,11 @@ ftrace_regex_open(struct ftrace_hash *hash, int flag,
 		return -ENOMEM;
 	}
 
+	if (flag & FTRACE_ITER_NOTRACE)
+		hash = ops->notrace_hash;
+	else
+		hash = ops->filter_hash;
+
 	iter->hash = hash;
 
 	mutex_lock(&ftrace_regex_lock);
@@ -1755,14 +1771,14 @@ ftrace_regex_open(struct ftrace_hash *hash, int flag,
 static int
 ftrace_filter_open(struct inode *inode, struct file *file)
 {
-	return ftrace_regex_open(&filter_hash, FTRACE_ITER_FILTER,
+	return ftrace_regex_open(&global_ops, FTRACE_ITER_FILTER,
 				 inode, file);
 }
 
 static int
 ftrace_notrace_open(struct inode *inode, struct file *file)
 {
-	return ftrace_regex_open(&notrace_hash, FTRACE_ITER_NOTRACE,
+	return ftrace_regex_open(&global_ops, FTRACE_ITER_NOTRACE,
 				 inode, file);
 }
 
@@ -1923,6 +1939,7 @@ ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod)
 static int
 ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
 {
+	struct ftrace_ops *ops = &global_ops;
 	struct ftrace_hash *hash;
 	char *mod;
 	int ret = -EINVAL;
@@ -1944,9 +1961,9 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
 		return ret;
 
 	if (enable)
-		hash = &filter_hash;
+		hash = ops->filter_hash;
 	else
-		hash = &notrace_hash;
+		hash = ops->notrace_hash;
 
 	ret = ftrace_match_module_records(hash, func, mod);
 	if (!ret)
@@ -2245,14 +2262,15 @@ int unregister_ftrace_command(struct ftrace_func_command *cmd)
 static int ftrace_process_regex(char *buff, int len, int enable)
 {
 	char *func, *command, *next = buff;
+	struct ftrace_ops *ops = &global_ops;
 	struct ftrace_func_command *p;
 	struct ftrace_hash *hash;
 	int ret;
 
 	if (enable)
-		hash = &filter_hash;
+		hash = ops->filter_hash;
 	else
-		hash = &notrace_hash;
+		hash = ops->notrace_hash;
 
 	func = strsep(&next, ":");
 
@@ -2339,11 +2357,19 @@ ftrace_notrace_write(struct file *file, const char __user *ubuf,
 }
 
 static void
-ftrace_set_regex(struct ftrace_hash *hash, unsigned char *buf, int len, int reset)
+ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
+		 int reset, int enable)
 {
+	struct ftrace_hash *hash;
+
 	if (unlikely(ftrace_disabled))
 		return;
 
+	if (enable)
+		hash = ops->filter_hash;
+	else
+		hash = ops->notrace_hash;
+
 	mutex_lock(&ftrace_regex_lock);
 	if (reset)
 		ftrace_filter_reset(hash);
@@ -2363,7 +2389,7 @@ ftrace_set_regex(struct ftrace_hash *hash, unsigned char *buf, int len, int rese
  */
 void ftrace_set_filter(unsigned char *buf, int len, int reset)
 {
-	ftrace_set_regex(&filter_hash, buf, len, reset);
+	ftrace_set_regex(&global_ops, buf, len, reset, 1);
 }
 
 /**
@@ -2378,7 +2404,7 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset)
  */
 void ftrace_set_notrace(unsigned char *buf, int len, int reset)
 {
-	ftrace_set_regex(&notrace_hash, buf, len, reset);
+	ftrace_set_regex(&global_ops, buf, len, reset, 0);
 }
 
 /*
@@ -2430,22 +2456,23 @@ static void __init set_ftrace_early_graph(char *buf)
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
-static void __init set_ftrace_early_filter(struct ftrace_hash *hash, char *buf)
+static void __init
+set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
 {
 	char *func;
 
 	while (buf) {
 		func = strsep(&buf, ",");
-		ftrace_set_regex(hash, func, strlen(func), 0);
+		ftrace_set_regex(ops, func, strlen(func), 0, enable);
 	}
 }
 
 static void __init set_ftrace_early_filters(void)
 {
 	if (ftrace_filter_buf[0])
-		set_ftrace_early_filter(&filter_hash, ftrace_filter_buf);
+		set_ftrace_early_filter(&global_ops, ftrace_filter_buf, 1);
 	if (ftrace_notrace_buf[0])
-		set_ftrace_early_filter(&notrace_hash, ftrace_notrace_buf);
+		set_ftrace_early_filter(&global_ops, ftrace_notrace_buf, 0);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	if (ftrace_graph_buf[0])
 		set_ftrace_early_graph(ftrace_graph_buf);
-- 
cgit v1.2.3-71-gd317


From ed926f9b35cda0988234c356e16a7cb30f4e5338 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 3 May 2011 13:25:24 -0400
Subject: ftrace: Use counters to enable functions to trace

Every function has its own record that stores the instruction
pointer and flags for the function to be traced. There are only
two flags: enabled and free. The enabled flag states that tracing
for the function has been enabled (actively traced), and the free
flag states that the record no longer points to a function and can
be used by new functions (loaded modules).

These flags are now moved to the MSB of the flags (actually just
the top 32bits). The rest of the bits (30 bits) are now used as
a ref counter. Everytime a tracer register functions to trace,
those functions will have its counter incremented.

When tracing is enabled, to determine if a function should be traced,
the counter is examined, and if it is non-zero it is set to trace.

When a ftrace_ops is registered to trace functions, its hashes
are examined. If the ftrace_ops filter_hash count is zero, then
all functions are set to be traced, otherwise only the functions
in the hash are to be traced. The exception to this is if a function
is also in the ftrace_ops notrace_hash. Then that function's counter
is not incremented for this ftrace_ops.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |   8 ++-
 kernel/trace/ftrace.c  | 158 ++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 148 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 6658a51390fe..ab1c46e70bb6 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -37,6 +37,7 @@ struct ftrace_ops {
 #ifdef CONFIG_DYNAMIC_FTRACE
 	struct ftrace_hash		*notrace_hash;
 	struct ftrace_hash		*filter_hash;
+	unsigned long			flags;
 #endif
 };
 
@@ -152,10 +153,13 @@ extern void unregister_ftrace_function_probe_all(char *glob);
 extern int ftrace_text_reserved(void *start, void *end);
 
 enum {
-	FTRACE_FL_FREE		= (1 << 0),
-	FTRACE_FL_ENABLED	= (1 << 1),
+	FTRACE_FL_ENABLED	= (1 << 30),
+	FTRACE_FL_FREE		= (1 << 31),
 };
 
+#define FTRACE_FL_MASK		(0x3UL << 30)
+#define FTRACE_REF_MAX		((1 << 30) - 1)
+
 struct dyn_ftrace {
 	union {
 		unsigned long		ip; /* address of mcount call-site */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 46f08264980b..5dd332cc5aa8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -890,6 +890,10 @@ static const struct ftrace_hash empty_hash = {
 };
 #define EMPTY_HASH	((struct ftrace_hash *)&empty_hash)
 
+enum {
+	FTRACE_OPS_FL_ENABLED		= 1,
+};
+
 struct ftrace_ops global_ops = {
 	.func			= ftrace_stub,
 	.notrace_hash		= EMPTY_HASH,
@@ -1161,6 +1165,105 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
 		}				\
 	}
 
+static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
+				     int filter_hash,
+				     bool inc)
+{
+	struct ftrace_hash *hash;
+	struct ftrace_hash *other_hash;
+	struct ftrace_page *pg;
+	struct dyn_ftrace *rec;
+	int count = 0;
+	int all = 0;
+
+	/* Only update if the ops has been registered */
+	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+		return;
+
+	/*
+	 * In the filter_hash case:
+	 *   If the count is zero, we update all records.
+	 *   Otherwise we just update the items in the hash.
+	 *
+	 * In the notrace_hash case:
+	 *   We enable the update in the hash.
+	 *   As disabling notrace means enabling the tracing,
+	 *   and enabling notrace means disabling, the inc variable
+	 *   gets inversed.
+	 */
+	if (filter_hash) {
+		hash = ops->filter_hash;
+		other_hash = ops->notrace_hash;
+		if (!hash->count)
+			all = 1;
+	} else {
+		inc = !inc;
+		hash = ops->notrace_hash;
+		other_hash = ops->filter_hash;
+		/*
+		 * If the notrace hash has no items,
+		 * then there's nothing to do.
+		 */
+		if (!hash->count)
+			return;
+	}
+
+	do_for_each_ftrace_rec(pg, rec) {
+		int in_other_hash = 0;
+		int in_hash = 0;
+		int match = 0;
+
+		if (all) {
+			/*
+			 * Only the filter_hash affects all records.
+			 * Update if the record is not in the notrace hash.
+			 */
+			if (!ftrace_lookup_ip(other_hash, rec->ip))
+				match = 1;
+		} else {
+			in_hash = !!ftrace_lookup_ip(hash, rec->ip);
+			in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip);
+
+			/*
+			 *
+			 */
+			if (filter_hash && in_hash && !in_other_hash)
+				match = 1;
+			else if (!filter_hash && in_hash &&
+				 (in_other_hash || !other_hash->count))
+				match = 1;
+		}
+		if (!match)
+			continue;
+
+		if (inc) {
+			rec->flags++;
+			if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX))
+				return;
+		} else {
+			if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0))
+				return;
+			rec->flags--;
+		}
+		count++;
+		/* Shortcut, if we handled all records, we are done. */
+		if (!all && count == hash->count)
+			return;
+	} while_for_each_ftrace_rec();
+}
+
+static void ftrace_hash_rec_disable(struct ftrace_ops *ops,
+				    int filter_hash)
+{
+	__ftrace_hash_rec_update(ops, filter_hash, 0);
+}
+
+static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
+				   int filter_hash)
+{
+	__ftrace_hash_rec_update(ops, filter_hash, 1);
+}
+
 static void ftrace_free_rec(struct dyn_ftrace *rec)
 {
 	rec->freelist = ftrace_free_records;
@@ -1276,26 +1379,24 @@ int ftrace_text_reserved(void *start, void *end)
 static int
 __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 {
-	struct ftrace_ops *ops = &global_ops;
 	unsigned long ftrace_addr;
 	unsigned long flag = 0UL;
 
 	ftrace_addr = (unsigned long)FTRACE_ADDR;
 
 	/*
-	 * If this record is not to be traced or we want to disable it,
-	 * then disable it.
+	 * If we are enabling tracing:
 	 *
-	 * If we want to enable it and filtering is off, then enable it.
+	 *   If the record has a ref count, then we need to enable it
+	 *   because someone is using it.
 	 *
-	 * If we want to enable it and filtering is on, enable it only if
-	 * it's filtered
+	 *   Otherwise we make sure its disabled.
+	 *
+	 * If we are disabling tracing, then disable all records that
+	 * are enabled.
 	 */
-	if (enable && !ftrace_lookup_ip(ops->notrace_hash, rec->ip)) {
-		if (!ops->filter_hash->count ||
-		    ftrace_lookup_ip(ops->filter_hash, rec->ip))
-			flag = FTRACE_FL_ENABLED;
-	}
+	if (enable && (rec->flags & ~FTRACE_FL_MASK))
+		flag = FTRACE_FL_ENABLED;
 
 	/* If the state of this record hasn't changed, then do nothing */
 	if ((rec->flags & FTRACE_FL_ENABLED) == flag)
@@ -1423,17 +1524,25 @@ static void ftrace_startup_enable(int command)
 
 static void ftrace_startup(int command)
 {
+	struct ftrace_ops *ops = &global_ops;
+
 	if (unlikely(ftrace_disabled))
 		return;
 
 	ftrace_start_up++;
 	command |= FTRACE_ENABLE_CALLS;
 
+	ops->flags |= FTRACE_OPS_FL_ENABLED;
+	if (ftrace_start_up == 1)
+		ftrace_hash_rec_enable(ops, 1);
+
 	ftrace_startup_enable(command);
 }
 
 static void ftrace_shutdown(int command)
 {
+	struct ftrace_ops *ops = &global_ops;
+
 	if (unlikely(ftrace_disabled))
 		return;
 
@@ -1446,7 +1555,12 @@ static void ftrace_shutdown(int command)
 	WARN_ON_ONCE(ftrace_start_up < 0);
 
 	if (!ftrace_start_up)
+		ftrace_hash_rec_disable(ops, 1);
+
+	if (!ftrace_start_up) {
 		command |= FTRACE_DISABLE_CALLS;
+		ops->flags &= ~FTRACE_OPS_FL_ENABLED;
+	}
 
 	if (saved_ftrace_func != ftrace_trace_function) {
 		saved_ftrace_func = ftrace_trace_function;
@@ -2668,6 +2782,7 @@ ftrace_regex_release(struct inode *inode, struct file *file)
 	struct ftrace_iterator *iter;
 	struct ftrace_hash **orig_hash;
 	struct trace_parser *parser;
+	int filter_hash;
 	int ret;
 
 	mutex_lock(&ftrace_regex_lock);
@@ -2687,15 +2802,26 @@ ftrace_regex_release(struct inode *inode, struct file *file)
 	trace_parser_put(parser);
 
 	if (file->f_mode & FMODE_WRITE) {
-		if (iter->flags & FTRACE_ITER_NOTRACE)
-			orig_hash = &iter->ops->notrace_hash;
-		else
+		filter_hash = !!(iter->flags & FTRACE_ITER_FILTER);
+
+		if (filter_hash)
 			orig_hash = &iter->ops->filter_hash;
+		else
+			orig_hash = &iter->ops->notrace_hash;
 
 		mutex_lock(&ftrace_lock);
+		/*
+		 * Remove the current set, update the hash and add
+		 * them back.
+		 */
+		ftrace_hash_rec_disable(iter->ops, filter_hash);
 		ret = ftrace_hash_move(orig_hash, iter->hash);
-		if (!ret && ftrace_start_up && ftrace_enabled)
-			ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+		if (!ret) {
+			ftrace_hash_rec_enable(iter->ops, filter_hash);
+			if (iter->ops->flags & FTRACE_OPS_FL_ENABLED
+			    && ftrace_enabled)
+				ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+		}
 		mutex_unlock(&ftrace_lock);
 	}
 	free_ftrace_hash(iter->hash);
-- 
cgit v1.2.3-71-gd317


From b848914ce39589d89ee0078a6d1ef452b464729e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 4 May 2011 09:27:52 -0400
Subject: ftrace: Implement separate user function filtering

ftrace_ops that are registered to trace functions can now be
agnostic to each other in respect to what functions they trace.
Each ops has their own hash of the functions they want to trace
and a hash to what they do not want to trace. A empty hash for
the functions they want to trace denotes all functions should
be traced that are not in the notrace hash.

Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h            |   7 +-
 kernel/trace/ftrace.c             | 193 ++++++++++++++++++++++++++++++--------
 kernel/trace/trace_functions.c    |   2 +
 kernel/trace/trace_irqsoff.c      |   1 +
 kernel/trace/trace_sched_wakeup.c |   1 +
 kernel/trace/trace_stack.c        |   1 +
 6 files changed, 166 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ab1c46e70bb6..4609c0ece79a 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -31,13 +31,18 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
 
 struct ftrace_hash;
 
+enum {
+	FTRACE_OPS_FL_ENABLED		= 1 << 0,
+	FTRACE_OPS_FL_GLOBAL		= 1 << 1,
+};
+
 struct ftrace_ops {
 	ftrace_func_t			func;
 	struct ftrace_ops		*next;
+	unsigned long			flags;
 #ifdef CONFIG_DYNAMIC_FTRACE
 	struct ftrace_hash		*notrace_hash;
 	struct ftrace_hash		*filter_hash;
-	unsigned long			flags;
 #endif
 };
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 92b6fdf49ae5..6c7e1df39b57 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -87,24 +87,29 @@ static struct ftrace_ops ftrace_list_end __read_mostly =
 	.func		= ftrace_stub,
 };
 
-static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
+static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
+static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
 ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
 ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
 ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
 static struct ftrace_ops global_ops;
 
+static void
+ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
+
 /*
- * Traverse the ftrace_list, invoking all entries.  The reason that we
+ * Traverse the ftrace_global_list, invoking all entries.  The reason that we
  * can use rcu_dereference_raw() is that elements removed from this list
  * are simply leaked, so there is no need to interact with a grace-period
  * mechanism.  The rcu_dereference_raw() calls are needed to handle
- * concurrent insertions into the ftrace_list.
+ * concurrent insertions into the ftrace_global_list.
  *
  * Silly Alpha and silly pointer-speculation compiler optimizations!
  */
-static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
+static void ftrace_global_list_func(unsigned long ip,
+				    unsigned long parent_ip)
 {
-	struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/
+	struct ftrace_ops *op = rcu_dereference_raw(ftrace_global_list); /*see above*/
 
 	while (op != &ftrace_list_end) {
 		op->func(ip, parent_ip);
@@ -163,11 +168,11 @@ static void update_global_ops(void)
 	 * function directly. Otherwise, we need to iterate over the
 	 * registered callers.
 	 */
-	if (ftrace_list == &ftrace_list_end ||
-	    ftrace_list->next == &ftrace_list_end)
-		func = ftrace_list->func;
+	if (ftrace_global_list == &ftrace_list_end ||
+	    ftrace_global_list->next == &ftrace_list_end)
+		func = ftrace_global_list->func;
 	else
-		func = ftrace_list_func;
+		func = ftrace_global_list_func;
 
 	/* If we filter on pids, update to use the pid function */
 	if (!list_empty(&ftrace_pids)) {
@@ -184,7 +189,11 @@ static void update_ftrace_function(void)
 
 	update_global_ops();
 
-	func = global_ops.func;
+	if (ftrace_ops_list == &ftrace_list_end ||
+	    ftrace_ops_list->next == &ftrace_list_end)
+		func = ftrace_ops_list->func;
+	else
+		func = ftrace_ops_list_func;
 
 #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	ftrace_trace_function = func;
@@ -198,10 +207,10 @@ static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
 {
 	ops->next = *list;
 	/*
-	 * We are entering ops into the ftrace_list but another
+	 * We are entering ops into the list but another
 	 * CPU might be walking that list. We need to make sure
 	 * the ops->next pointer is valid before another CPU sees
-	 * the ops pointer included into the ftrace_list.
+	 * the ops pointer included into the list.
 	 */
 	rcu_assign_pointer(*list, ops);
 }
@@ -238,7 +247,18 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 	if (FTRACE_WARN_ON(ops == &global_ops))
 		return -EINVAL;
 
-	add_ftrace_ops(&ftrace_list, ops);
+	if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
+		return -EBUSY;
+
+	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
+		int first = ftrace_global_list == &ftrace_list_end;
+		add_ftrace_ops(&ftrace_global_list, ops);
+		ops->flags |= FTRACE_OPS_FL_ENABLED;
+		if (first)
+			add_ftrace_ops(&ftrace_ops_list, &global_ops);
+	} else
+		add_ftrace_ops(&ftrace_ops_list, ops);
+
 	if (ftrace_enabled)
 		update_ftrace_function();
 
@@ -252,12 +272,24 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
 	if (ftrace_disabled)
 		return -ENODEV;
 
+	if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)))
+		return -EBUSY;
+
 	if (FTRACE_WARN_ON(ops == &global_ops))
 		return -EINVAL;
 
-	ret = remove_ftrace_ops(&ftrace_list, ops);
+	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
+		ret = remove_ftrace_ops(&ftrace_global_list, ops);
+		if (!ret && ftrace_global_list == &ftrace_list_end)
+			ret = remove_ftrace_ops(&ftrace_ops_list, &global_ops);
+		if (!ret)
+			ops->flags &= ~FTRACE_OPS_FL_ENABLED;
+	} else
+		ret = remove_ftrace_ops(&ftrace_ops_list, ops);
+
 	if (ret < 0)
 		return ret;
+
 	if (ftrace_enabled)
 		update_ftrace_function();
 
@@ -928,10 +960,6 @@ static const struct ftrace_hash empty_hash = {
 };
 #define EMPTY_HASH	((struct ftrace_hash *)&empty_hash)
 
-enum {
-	FTRACE_OPS_FL_ENABLED		= 1,
-};
-
 static struct ftrace_ops global_ops = {
 	.func			= ftrace_stub,
 	.notrace_hash		= EMPTY_HASH,
@@ -1189,6 +1217,40 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
 	return 0;
 }
 
+/*
+ * Test the hashes for this ops to see if we want to call
+ * the ops->func or not.
+ *
+ * It's a match if the ip is in the ops->filter_hash or
+ * the filter_hash does not exist or is empty,
+ *  AND
+ * the ip is not in the ops->notrace_hash.
+ */
+static int
+ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
+{
+	struct ftrace_hash *filter_hash;
+	struct ftrace_hash *notrace_hash;
+	int ret;
+
+	/* The hashes are freed with call_rcu_sched() */
+	preempt_disable_notrace();
+
+	filter_hash = rcu_dereference_raw(ops->filter_hash);
+	notrace_hash = rcu_dereference_raw(ops->notrace_hash);
+
+	if ((!filter_hash || !filter_hash->count ||
+	     ftrace_lookup_ip(filter_hash, ip)) &&
+	    (!notrace_hash || !notrace_hash->count ||
+	     !ftrace_lookup_ip(notrace_hash, ip)))
+		ret = 1;
+	else
+		ret = 0;
+	preempt_enable_notrace();
+
+	return ret;
+}
+
 /*
  * This is a double for. Do not use 'break' to break out of the loop,
  * you must use a goto.
@@ -1232,7 +1294,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 	if (filter_hash) {
 		hash = ops->filter_hash;
 		other_hash = ops->notrace_hash;
-		if (!hash->count)
+		if (!hash || !hash->count)
 			all = 1;
 	} else {
 		inc = !inc;
@@ -1242,7 +1304,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 		 * If the notrace hash has no items,
 		 * then there's nothing to do.
 		 */
-		if (!hash->count)
+		if (hash && !hash->count)
 			return;
 	}
 
@@ -1256,11 +1318,11 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 			 * Only the filter_hash affects all records.
 			 * Update if the record is not in the notrace hash.
 			 */
-			if (!ftrace_lookup_ip(other_hash, rec->ip))
+			if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip))
 				match = 1;
 		} else {
-			in_hash = !!ftrace_lookup_ip(hash, rec->ip);
-			in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip);
+			in_hash = hash && !!ftrace_lookup_ip(hash, rec->ip);
+			in_other_hash = other_hash && !!ftrace_lookup_ip(other_hash, rec->ip);
 
 			/*
 			 *
@@ -1546,6 +1608,7 @@ static void ftrace_run_update_code(int command)
 
 static ftrace_func_t saved_ftrace_func;
 static int ftrace_start_up;
+static int global_start_up;
 
 static void ftrace_startup_enable(int command)
 {
@@ -1562,14 +1625,25 @@ static void ftrace_startup_enable(int command)
 
 static void ftrace_startup(struct ftrace_ops *ops, int command)
 {
+	bool hash_enable = true;
+
 	if (unlikely(ftrace_disabled))
 		return;
 
 	ftrace_start_up++;
 	command |= FTRACE_ENABLE_CALLS;
 
+	/* ops marked global share the filter hashes */
+	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
+		ops = &global_ops;
+		/* Don't update hash if global is already set */
+		if (global_start_up)
+			hash_enable = false;
+		global_start_up++;
+	}
+
 	ops->flags |= FTRACE_OPS_FL_ENABLED;
-	if (ftrace_start_up == 1)
+	if (hash_enable)
 		ftrace_hash_rec_enable(ops, 1);
 
 	ftrace_startup_enable(command);
@@ -1577,6 +1651,8 @@ static void ftrace_startup(struct ftrace_ops *ops, int command)
 
 static void ftrace_shutdown(struct ftrace_ops *ops, int command)
 {
+	bool hash_disable = true;
+
 	if (unlikely(ftrace_disabled))
 		return;
 
@@ -1588,13 +1664,25 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command)
 	 */
 	WARN_ON_ONCE(ftrace_start_up < 0);
 
-	if (!ftrace_start_up)
+	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
+		ops = &global_ops;
+		global_start_up--;
+		WARN_ON_ONCE(global_start_up < 0);
+		/* Don't update hash if global still has users */
+		if (global_start_up) {
+			WARN_ON_ONCE(!ftrace_start_up);
+			hash_disable = false;
+		}
+	}
+
+	if (hash_disable)
 		ftrace_hash_rec_disable(ops, 1);
 
-	if (!ftrace_start_up) {
-		command |= FTRACE_DISABLE_CALLS;
+	if (ops != &global_ops || !global_start_up)
 		ops->flags &= ~FTRACE_OPS_FL_ENABLED;
-	}
+
+	if (!ftrace_start_up)
+		command |= FTRACE_DISABLE_CALLS;
 
 	if (saved_ftrace_func != ftrace_trace_function) {
 		saved_ftrace_func = ftrace_trace_function;
@@ -2381,6 +2469,7 @@ static int ftrace_probe_registered;
 
 static void __enable_ftrace_function_probe(void)
 {
+	int ret;
 	int i;
 
 	if (ftrace_probe_registered)
@@ -2395,13 +2484,16 @@ static void __enable_ftrace_function_probe(void)
 	if (i == FTRACE_FUNC_HASHSIZE)
 		return;
 
-	__register_ftrace_function(&trace_probe_ops);
-	ftrace_startup(&global_ops, 0);
+	ret = __register_ftrace_function(&trace_probe_ops);
+	if (!ret)
+		ftrace_startup(&trace_probe_ops, 0);
+
 	ftrace_probe_registered = 1;
 }
 
 static void __disable_ftrace_function_probe(void)
 {
+	int ret;
 	int i;
 
 	if (!ftrace_probe_registered)
@@ -2414,8 +2506,10 @@ static void __disable_ftrace_function_probe(void)
 	}
 
 	/* no more funcs left */
-	__unregister_ftrace_function(&trace_probe_ops);
-	ftrace_shutdown(&global_ops, 0);
+	ret = __unregister_ftrace_function(&trace_probe_ops);
+	if (!ret)
+		ftrace_shutdown(&trace_probe_ops, 0);
+
 	ftrace_probe_registered = 0;
 }
 
@@ -3319,8 +3413,28 @@ static inline void ftrace_startup_enable(int command) { }
 # define ftrace_shutdown(ops, command)	do { } while (0)
 # define ftrace_startup_sysctl()	do { } while (0)
 # define ftrace_shutdown_sysctl()	do { } while (0)
+
+static inline int
+ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
+{
+	return 1;
+}
+
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
+static void
+ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
+{
+	/* see comment above ftrace_global_list_func */
+	struct ftrace_ops *op = rcu_dereference_raw(ftrace_ops_list);
+
+	while (op != &ftrace_list_end) {
+		if (ftrace_ops_test(op, ip))
+			op->func(ip, parent_ip);
+		op = rcu_dereference_raw(op->next);
+	};
+}
+
 static void clear_ftrace_swapper(void)
 {
 	struct task_struct *p;
@@ -3621,7 +3735,9 @@ int register_ftrace_function(struct ftrace_ops *ops)
 		goto out_unlock;
 
 	ret = __register_ftrace_function(ops);
-	ftrace_startup(&global_ops, 0);
+	if (!ret)
+		ftrace_startup(ops, 0);
+
 
  out_unlock:
 	mutex_unlock(&ftrace_lock);
@@ -3640,7 +3756,8 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
 
 	mutex_lock(&ftrace_lock);
 	ret = __unregister_ftrace_function(ops);
-	ftrace_shutdown(&global_ops, 0);
+	if (!ret)
+		ftrace_shutdown(ops, 0);
 	mutex_unlock(&ftrace_lock);
 
 	return ret;
@@ -3670,11 +3787,11 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 		ftrace_startup_sysctl();
 
 		/* we are starting ftrace again */
-		if (ftrace_list != &ftrace_list_end) {
-			if (ftrace_list->next == &ftrace_list_end)
-				ftrace_trace_function = ftrace_list->func;
+		if (ftrace_ops_list != &ftrace_list_end) {
+			if (ftrace_ops_list->next == &ftrace_list_end)
+				ftrace_trace_function = ftrace_ops_list->func;
 			else
-				ftrace_trace_function = ftrace_list_func;
+				ftrace_trace_function = ftrace_ops_list_func;
 		}
 
 	} else {
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 16aee4d44e8f..8d0e1cc4e974 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -149,11 +149,13 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
 static struct ftrace_ops trace_ops __read_mostly =
 {
 	.func = function_trace_call,
+	.flags = FTRACE_OPS_FL_GLOBAL,
 };
 
 static struct ftrace_ops trace_stack_ops __read_mostly =
 {
 	.func = function_stack_trace_call,
+	.flags = FTRACE_OPS_FL_GLOBAL,
 };
 
 /* Our two options */
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index a4969b47afc1..c77424be284d 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -153,6 +153,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
 static struct ftrace_ops trace_ops __read_mostly =
 {
 	.func = irqsoff_tracer_call,
+	.flags = FTRACE_OPS_FL_GLOBAL,
 };
 #endif /* CONFIG_FUNCTION_TRACER */
 
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 7319559ed59f..f029dd4fd2ca 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -129,6 +129,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
 static struct ftrace_ops trace_ops __read_mostly =
 {
 	.func = wakeup_tracer_call,
+	.flags = FTRACE_OPS_FL_GLOBAL,
 };
 #endif /* CONFIG_FUNCTION_TRACER */
 
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 4c5dead0c239..b0b53b8e4c25 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -133,6 +133,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
 static struct ftrace_ops trace_ops __read_mostly =
 {
 	.func = stack_trace_call,
+	.flags = FTRACE_OPS_FL_GLOBAL,
 };
 
 static ssize_t
-- 
cgit v1.2.3-71-gd317


From cdbe61bfe70440939e457fb4a8d0995eaaed17de Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 5 May 2011 21:14:55 -0400
Subject: ftrace: Allow dynamically allocated function tracers

Now that functions may be selected individually, it only makes sense
that we should allow dynamically allocated trace structures to
be traced. This will allow perf to allocate a ftrace_ops structure
at runtime and use it to pick and choose which functions that
structure will trace.

Note, a dynamically allocated ftrace_ops will always be called
indirectly instead of being called directly from the mcount in
entry.S. This is because there's no safe way to prevent mcount
from being preempted before calling the function, unless we
modify every entry.S to do so (not likely). Thus, dynamically allocated
functions will now be called by the ftrace_ops_list_func() that
loops through the ops that are allocated if there are more than
one op allocated at a time. This loop is protected with a
preempt_disable.

To determine if an ftrace_ops structure is allocated or not, a new
util function was added to the kernel/extable.c called
core_kernel_data(), which returns 1 if the address is between
_sdata and _edata.

Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  1 +
 include/linux/kernel.h |  1 +
 kernel/extable.c       |  8 ++++++++
 kernel/trace/ftrace.c  | 37 ++++++++++++++++++++++++++++++-------
 4 files changed, 40 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 4609c0ece79a..caba694a62b6 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -34,6 +34,7 @@ struct ftrace_hash;
 enum {
 	FTRACE_OPS_FL_ENABLED		= 1 << 0,
 	FTRACE_OPS_FL_GLOBAL		= 1 << 1,
+	FTRACE_OPS_FL_DYNAMIC		= 1 << 2,
 };
 
 struct ftrace_ops {
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 00cec4dc0ae2..f37ba716ef8b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -283,6 +283,7 @@ extern char *get_options(const char *str, int nints, int *ints);
 extern unsigned long long memparse(const char *ptr, char **retptr);
 
 extern int core_kernel_text(unsigned long addr);
+extern int core_kernel_data(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
 extern int kernel_text_address(unsigned long addr);
 extern int func_ptr_is_kernel_text(void *ptr);
diff --git a/kernel/extable.c b/kernel/extable.c
index 7f8f263f8524..c2d625fcda77 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -72,6 +72,14 @@ int core_kernel_text(unsigned long addr)
 	return 0;
 }
 
+int core_kernel_data(unsigned long addr)
+{
+	if (addr >= (unsigned long)_sdata &&
+	    addr < (unsigned long)_edata)
+		return 1;
+	return 0;
+}
+
 int __kernel_text_address(unsigned long addr)
 {
 	if (core_kernel_text(addr))
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6c7e1df39b57..5b3ee04e39d9 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -189,8 +189,14 @@ static void update_ftrace_function(void)
 
 	update_global_ops();
 
+	/*
+	 * If we are at the end of the list and this ops is
+	 * not dynamic, then have the mcount trampoline call
+	 * the function directly
+	 */
 	if (ftrace_ops_list == &ftrace_list_end ||
-	    ftrace_ops_list->next == &ftrace_list_end)
+	    (ftrace_ops_list->next == &ftrace_list_end &&
+	     !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC)))
 		func = ftrace_ops_list->func;
 	else
 		func = ftrace_ops_list_func;
@@ -250,6 +256,9 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 	if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
 		return -EBUSY;
 
+	if (!core_kernel_data((unsigned long)ops))
+		ops->flags |= FTRACE_OPS_FL_DYNAMIC;
+
 	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
 		int first = ftrace_global_list == &ftrace_list_end;
 		add_ftrace_ops(&ftrace_global_list, ops);
@@ -293,6 +302,13 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
 	if (ftrace_enabled)
 		update_ftrace_function();
 
+	/*
+	 * Dynamic ops may be freed, we must make sure that all
+	 * callers are done before leaving this function.
+	 */
+	if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
+		synchronize_sched();
+
 	return 0;
 }
 
@@ -1225,6 +1241,9 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
  * the filter_hash does not exist or is empty,
  *  AND
  * the ip is not in the ops->notrace_hash.
+ *
+ * This needs to be called with preemption disabled as
+ * the hashes are freed with call_rcu_sched().
  */
 static int
 ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
@@ -1233,9 +1252,6 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
 	struct ftrace_hash *notrace_hash;
 	int ret;
 
-	/* The hashes are freed with call_rcu_sched() */
-	preempt_disable_notrace();
-
 	filter_hash = rcu_dereference_raw(ops->filter_hash);
 	notrace_hash = rcu_dereference_raw(ops->notrace_hash);
 
@@ -1246,7 +1262,6 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
 		ret = 1;
 	else
 		ret = 0;
-	preempt_enable_notrace();
 
 	return ret;
 }
@@ -3425,14 +3440,20 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
 static void
 ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
 {
-	/* see comment above ftrace_global_list_func */
-	struct ftrace_ops *op = rcu_dereference_raw(ftrace_ops_list);
+	struct ftrace_ops *op;
 
+	/*
+	 * Some of the ops may be dynamically allocated,
+	 * they must be freed after a synchronize_sched().
+	 */
+	preempt_disable_notrace();
+	op = rcu_dereference_raw(ftrace_ops_list);
 	while (op != &ftrace_list_end) {
 		if (ftrace_ops_test(op, ip))
 			op->func(ip, parent_ip);
 		op = rcu_dereference_raw(op->next);
 	};
+	preempt_enable_notrace();
 }
 
 static void clear_ftrace_swapper(void)
@@ -3743,6 +3764,7 @@ int register_ftrace_function(struct ftrace_ops *ops)
 	mutex_unlock(&ftrace_lock);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(register_ftrace_function);
 
 /**
  * unregister_ftrace_function - unregister a function for profiling.
@@ -3762,6 +3784,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(unregister_ftrace_function);
 
 int
 ftrace_enable_sysctl(struct ctl_table *table, int write,
-- 
cgit v1.2.3-71-gd317


From 936e074b286ae779f134312178dbab139ee7ea52 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 5 May 2011 22:54:01 -0400
Subject: ftrace: Modify ftrace_set_filter/notrace to take ops

Since users of the function tracer can now pick and choose which
functions they want to trace agnostically from other users of the
function tracer, we need to pass the ops struct to the ftrace_set_filter()
functions.

The functions ftrace_set_global_filter() and ftrace_set_global_notrace()
is added to keep the old filter functions which are used to modify
the generic function tracers.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h        |  7 ++++++-
 kernel/trace/ftrace.c         | 46 +++++++++++++++++++++++++++++++++++++++++--
 kernel/trace/trace_selftest.c |  4 ++--
 3 files changed, 52 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index caba694a62b6..9d88e1cb5dbb 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -179,7 +179,12 @@ struct dyn_ftrace {
 };
 
 int ftrace_force_update(void);
-void ftrace_set_filter(unsigned char *buf, int len, int reset);
+void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
+		       int len, int reset);
+void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
+			int len, int reset);
+void ftrace_set_global_filter(unsigned char *buf, int len, int reset);
+void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
 
 int register_ftrace_command(struct ftrace_func_command *cmd);
 int unregister_ftrace_command(struct ftrace_func_command *cmd);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5b3ee04e39d9..d017c2c82c44 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2826,6 +2826,10 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
 	struct ftrace_hash *hash;
 	int ret;
 
+	/* All global ops uses the global ops filters */
+	if (ops->flags & FTRACE_OPS_FL_GLOBAL)
+		ops = &global_ops;
+
 	if (unlikely(ftrace_disabled))
 		return -ENODEV;
 
@@ -2856,6 +2860,41 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
 
 /**
  * ftrace_set_filter - set a function to filter on in ftrace
+ * @ops - the ops to set the filter with
+ * @buf - the string that holds the function filter text.
+ * @len - the length of the string.
+ * @reset - non zero to reset all filters before applying this filter.
+ *
+ * Filters denote which functions should be enabled when tracing is enabled.
+ * If @buf is NULL and reset is set, all functions will be enabled for tracing.
+ */
+void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
+		       int len, int reset)
+{
+	ftrace_set_regex(ops, buf, len, reset, 1);
+}
+EXPORT_SYMBOL_GPL(ftrace_set_filter);
+
+/**
+ * ftrace_set_notrace - set a function to not trace in ftrace
+ * @ops - the ops to set the notrace filter with
+ * @buf - the string that holds the function notrace text.
+ * @len - the length of the string.
+ * @reset - non zero to reset all filters before applying this filter.
+ *
+ * Notrace Filters denote which functions should not be enabled when tracing
+ * is enabled. If @buf is NULL and reset is set, all functions will be enabled
+ * for tracing.
+ */
+void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
+			int len, int reset)
+{
+	ftrace_set_regex(ops, buf, len, reset, 0);
+}
+EXPORT_SYMBOL_GPL(ftrace_set_notrace);
+/**
+ * ftrace_set_filter - set a function to filter on in ftrace
+ * @ops - the ops to set the filter with
  * @buf - the string that holds the function filter text.
  * @len - the length of the string.
  * @reset - non zero to reset all filters before applying this filter.
@@ -2863,13 +2902,15 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
  * Filters denote which functions should be enabled when tracing is enabled.
  * If @buf is NULL and reset is set, all functions will be enabled for tracing.
  */
-void ftrace_set_filter(unsigned char *buf, int len, int reset)
+void ftrace_set_global_filter(unsigned char *buf, int len, int reset)
 {
 	ftrace_set_regex(&global_ops, buf, len, reset, 1);
 }
+EXPORT_SYMBOL_GPL(ftrace_set_global_filter);
 
 /**
  * ftrace_set_notrace - set a function to not trace in ftrace
+ * @ops - the ops to set the notrace filter with
  * @buf - the string that holds the function notrace text.
  * @len - the length of the string.
  * @reset - non zero to reset all filters before applying this filter.
@@ -2878,10 +2919,11 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset)
  * is enabled. If @buf is NULL and reset is set, all functions will be enabled
  * for tracing.
  */
-void ftrace_set_notrace(unsigned char *buf, int len, int reset)
+void ftrace_set_global_notrace(unsigned char *buf, int len, int reset)
 {
 	ftrace_set_regex(&global_ops, buf, len, reset, 0);
 }
+EXPORT_SYMBOL_GPL(ftrace_set_global_notrace);
 
 /*
  * command line interface to allow users to set filters on boot up.
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 659732eba07c..0fa2db305b7c 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -131,7 +131,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 	func_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
 
 	/* filter only on our function */
-	ftrace_set_filter(func_name, strlen(func_name), 1);
+	ftrace_set_global_filter(func_name, strlen(func_name), 1);
 
 	/* enable tracing */
 	ret = tracer_init(trace, tr);
@@ -181,7 +181,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 	tracer_enabled = save_tracer_enabled;
 
 	/* Enable tracing on all functions again */
-	ftrace_set_filter(NULL, 0, 1);
+	ftrace_set_global_filter(NULL, 0, 1);
 
 	return ret;
 }
-- 
cgit v1.2.3-71-gd317


From b4bc842802db3314f9a657094da0450a903ea619 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@vmware.com>
Date: Mon, 7 Feb 2011 16:02:25 -0800
Subject: module: deal with alignment issues in built-in module versions

On m68k natural alignment is 2-byte boundary but we are trying to
align structures in __modver section on sizeof(void *) boundary.
This causes trouble when we try to access elements in this section
in array-like fashion when create "version" attributes for built-in
modules.

Moreover, as DaveM said, we can't reliably put structures into
independent objects, put them into a special section, and then expect
array access over them (via the section boundaries) after linking the
objects together to just "work" due to variable alignment choices in
different situations. The only solution that seems to work reliably
is to make an array of plain pointers to the objects in question and
put those pointers in the special section.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Dmitry Torokhov <dtor@vmware.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h | 10 +++++-----
 kernel/params.c        |  9 ++++++---
 2 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 5de42043dff0..4cfebc211695 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -174,10 +174,7 @@ extern struct module __this_module;
 #define MODULE_VERSION(_version)					\
 	extern ssize_t __modver_version_show(struct module_attribute *,	\
 					     struct module *, char *);	\
-	static struct module_version_attribute __modver_version_attr	\
-	__used								\
-    __attribute__ ((__section__ ("__modver"),aligned(sizeof(void *)))) \
-	= {								\
+	static struct module_version_attribute ___modver_attr = {	\
 		.mattr	= {						\
 			.attr	= {					\
 				.name	= "version",			\
@@ -187,7 +184,10 @@ extern struct module __this_module;
 		},							\
 		.module_name	= KBUILD_MODNAME,			\
 		.version	= _version,				\
-	}
+	};								\
+	static const struct module_version_attribute			\
+	__used __attribute__ ((__section__ ("__modver")))		\
+	* __moduleparam_const __modver_attr = &___modver_attr
 #endif
 
 /* Optional firmware file (or files) needed by the module
diff --git a/kernel/params.c b/kernel/params.c
index 7ab388a48a2e..28c5d5c83f6b 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -821,15 +821,18 @@ ssize_t __modver_version_show(struct module_attribute *mattr,
 	return sprintf(buf, "%s\n", vattr->version);
 }
 
-extern struct module_version_attribute __start___modver[], __stop___modver[];
+extern const struct module_version_attribute *__start___modver[];
+extern const struct module_version_attribute *__stop___modver[];
 
 static void __init version_sysfs_builtin(void)
 {
-	const struct module_version_attribute *vattr;
+	const struct module_version_attribute **p;
 	struct module_kobject *mk;
 	int err;
 
-	for (vattr = __start___modver; vattr < __stop___modver; vattr++) {
+	for (p = __start___modver; p < __stop___modver; p++) {
+		const struct module_version_attribute *vattr = *p;
+
 		mk = locate_module_kobject(vattr->module_name);
 		if (mk) {
 			err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr);
-- 
cgit v1.2.3-71-gd317


From 9b73a5840c7d5f77e5766626716df13787cb258c Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@vmware.com>
Date: Mon, 7 Feb 2011 16:02:27 -0800
Subject: module: do not hide __modver_version_show declaration behind ifdef

Doing so prevents the following warning from sparse:

  CHECK   kernel/params.c
kernel/params.c:817:9: warning: symbol '__modver_version_show' was not
declared. Should it be static?

since kernel/params.c is never compiled with MODULE being set.

Signed-off-by: Dmitry Torokhov <dtor@vmware.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 4cfebc211695..23996ad147e7 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -64,6 +64,9 @@ struct module_version_attribute {
 	const char *version;
 } __attribute__ ((__aligned__(sizeof(void *))));
 
+extern ssize_t __modver_version_show(struct module_attribute *,
+				     struct module *, char *);
+
 struct module_kobject
 {
 	struct kobject kobj;
@@ -172,8 +175,6 @@ extern struct module __this_module;
 #define MODULE_VERSION(_version) MODULE_INFO(version, _version)
 #else
 #define MODULE_VERSION(_version)					\
-	extern ssize_t __modver_version_show(struct module_attribute *,	\
-					     struct module *, char *);	\
 	static struct module_version_attribute ___modver_attr = {	\
 		.mattr	= {						\
 			.attr	= {					\
-- 
cgit v1.2.3-71-gd317


From a288bd651f4180c224cfddf837a0416157a36661 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Thu, 19 May 2011 16:55:25 -0600
Subject: module: remove 64 bit alignment padding from struct module with
 CONFIG_TRACE*

Reorder struct module to remove 24 bytes of alignment padding on 64 bit
builds when the CONFIG_TRACE options are selected. This allows the
structure to fit into one fewer cache lines, and its size drops from 592
to 568 on x86_64.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 23996ad147e7..65cc6cc73ca8 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -368,34 +368,35 @@ struct module
 	struct module_notes_attrs *notes_attrs;
 #endif
 
+	/* The command line arguments (may be mangled).  People like
+	   keeping pointers to this stuff */
+	char *args;
+
 #ifdef CONFIG_SMP
 	/* Per-cpu data. */
 	void __percpu *percpu;
 	unsigned int percpu_size;
 #endif
 
-	/* The command line arguments (may be mangled).  People like
-	   keeping pointers to this stuff */
-	char *args;
 #ifdef CONFIG_TRACEPOINTS
-	struct tracepoint * const *tracepoints_ptrs;
 	unsigned int num_tracepoints;
+	struct tracepoint * const *tracepoints_ptrs;
 #endif
 #ifdef HAVE_JUMP_LABEL
 	struct jump_entry *jump_entries;
 	unsigned int num_jump_entries;
 #endif
 #ifdef CONFIG_TRACING
-	const char **trace_bprintk_fmt_start;
 	unsigned int num_trace_bprintk_fmt;
+	const char **trace_bprintk_fmt_start;
 #endif
 #ifdef CONFIG_EVENT_TRACING
 	struct ftrace_event_call **trace_events;
 	unsigned int num_trace_events;
 #endif
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
-	unsigned long *ftrace_callsites;
 	unsigned int num_ftrace_callsites;
+	unsigned long *ftrace_callsites;
 #endif
 
 #ifdef CONFIG_MODULE_UNLOAD
-- 
cgit v1.2.3-71-gd317


From c5be0b2eb1ca05e0cd747f9c0ba552c6ee8827a0 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Thu, 19 May 2011 16:55:25 -0600
Subject: module: reorder kparam_array to remove alignment padding on 64 bit
 builds

Reorder structure kparam_array to remove 8 bytes of alignment padding on
64 bit builds, dropping its size from 40 to 32 bytes.

Also update the macro module_param_array_named to initialise the
structure using its member names to allow it to be changed without
touching all its call sites.

'git grep' finds module_param_array in 1037 places so this patch will
save a small amount of data space across many modules.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 07b41951e3fa..ddaae98c53f9 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -67,9 +67,9 @@ struct kparam_string {
 struct kparam_array
 {
 	unsigned int max;
+	unsigned int elemsize;
 	unsigned int *num;
 	const struct kernel_param_ops *ops;
-	unsigned int elemsize;
 	void *elem;
 };
 
@@ -371,8 +371,9 @@ extern int param_get_invbool(char *buffer, const struct kernel_param *kp);
  */
 #define module_param_array_named(name, array, type, nump, perm)		\
 	static const struct kparam_array __param_arr_##name		\
-	= { ARRAY_SIZE(array), nump, &param_ops_##type,			\
-	    sizeof(array[0]), array };					\
+	= { .max = ARRAY_SIZE(array), .num = nump,                      \
+	    .ops = &param_ops_##type,					\
+	    .elemsize = sizeof(array[0]), .elem = array };		\
 	__module_param_call(MODULE_PARAM_PREFIX, name,			\
 			    &param_array_ops,				\
 			    .arr = &__param_arr_##name,			\
-- 
cgit v1.2.3-71-gd317


From de4d8d53465483168d6a627d409ee2d09d8e3308 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 19 Apr 2011 21:49:58 +0200
Subject: module: each_symbol_section instead of each_symbol

Instead of having a callback function for each symbol in the kernel,
have a callback for each array of symbols.

This eases the logic when we move to sorted symbols and binary search.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Alessio Igor Bogani <abogani@kernel.org>
---
 include/linux/module.h |  5 +++--
 kernel/module.c        | 42 +++++++++++++++++++++++++++---------------
 2 files changed, 30 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 65cc6cc73ca8..49f4ad0ddec2 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -477,8 +477,9 @@ const struct kernel_symbol *find_symbol(const char *name,
 					bool warn);
 
 /* Walk the exported symbol table */
-bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner,
-			    unsigned int symnum, void *data), void *data);
+bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
+				    struct module *owner,
+				    void *data), void *data);
 
 /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
    symnum out of range. */
diff --git a/kernel/module.c b/kernel/module.c
index 0e6f97f43c88..e8aa462301e7 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -240,23 +240,24 @@ static bool each_symbol_in_section(const struct symsearch *arr,
 				   struct module *owner,
 				   bool (*fn)(const struct symsearch *syms,
 					      struct module *owner,
-					      unsigned int symnum, void *data),
+					      void *data),
 				   void *data)
 {
-	unsigned int i, j;
+	unsigned int j;
 
 	for (j = 0; j < arrsize; j++) {
-		for (i = 0; i < arr[j].stop - arr[j].start; i++)
-			if (fn(&arr[j], owner, i, data))
-				return true;
+		if (fn(&arr[j], owner, data))
+			return true;
 	}
 
 	return false;
 }
 
 /* Returns true as soon as fn returns true, otherwise false. */
-bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner,
-			    unsigned int symnum, void *data), void *data)
+bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
+				    struct module *owner,
+				    void *data),
+			 void *data)
 {
 	struct module *mod;
 	static const struct symsearch arr[] = {
@@ -309,7 +310,7 @@ bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner,
 	}
 	return false;
 }
-EXPORT_SYMBOL_GPL(each_symbol);
+EXPORT_SYMBOL_GPL(each_symbol_section);
 
 struct find_symbol_arg {
 	/* Input */
@@ -323,15 +324,12 @@ struct find_symbol_arg {
 	const struct kernel_symbol *sym;
 };
 
-static bool find_symbol_in_section(const struct symsearch *syms,
-				   struct module *owner,
-				   unsigned int symnum, void *data)
+static bool check_symbol(const struct symsearch *syms,
+				 struct module *owner,
+				 unsigned int symnum, void *data)
 {
 	struct find_symbol_arg *fsa = data;
 
-	if (strcmp(syms->start[symnum].name, fsa->name) != 0)
-		return false;
-
 	if (!fsa->gplok) {
 		if (syms->licence == GPL_ONLY)
 			return false;
@@ -365,6 +363,20 @@ static bool find_symbol_in_section(const struct symsearch *syms,
 	return true;
 }
 
+static bool find_symbol_in_section(const struct symsearch *syms,
+				   struct module *owner,
+				   void *data)
+{
+	struct find_symbol_arg *fsa = data;
+	unsigned int i;
+
+	for (i = 0; i < syms->stop - syms->start; i++) {
+		if (strcmp(syms->start[i].name, fsa->name) == 0)
+			return check_symbol(syms, owner, i, data);
+	}
+	return false;
+}
+
 /* Find a symbol and return it, along with, (optional) crc and
  * (optional) module which owns it.  Needs preempt disabled or module_mutex. */
 const struct kernel_symbol *find_symbol(const char *name,
@@ -379,7 +391,7 @@ const struct kernel_symbol *find_symbol(const char *name,
 	fsa.gplok = gplok;
 	fsa.warn = warn;
 
-	if (each_symbol(find_symbol_in_section, &fsa)) {
+	if (each_symbol_section(find_symbol_in_section, &fsa)) {
 		if (owner)
 			*owner = fsa.owner;
 		if (crc)
-- 
cgit v1.2.3-71-gd317


From f02e8a6596b7dc9b2171f7ff5654039ef0950cdc Mon Sep 17 00:00:00 2001
From: Alessio Igor Bogani <abogani@kernel.org>
Date: Thu, 14 Apr 2011 14:59:39 +0200
Subject: module: Sort exported symbols

This patch places every exported symbol in its own section
(i.e. "___ksymtab+printk").  Thus the linker will use its SORT() directive
to sort and finally merge all symbol in the right and final section
(i.e. "__ksymtab").

The symbol prefixed archs use an underscore as prefix for symbols.
To avoid collision we use a different character to create the temporary
section names.

This work was supported by a hardware donation from the CE Linux Forum.

Signed-off-by: Alessio Igor Bogani <abogani@kernel.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (folded in '+' fixup)
Tested-by: Dirk Behme <dirk.behme@googlemail.com>
---
 include/asm-generic/vmlinux.lds.h | 20 ++++++++++----------
 include/linux/module.h            |  4 ++--
 scripts/module-common.lds         | 11 +++++++++++
 3 files changed, 23 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index bd297a20ab98..b27445e00b6c 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -274,70 +274,70 @@
 	/* Kernel symbol table: Normal symbols */			\
 	__ksymtab         : AT(ADDR(__ksymtab) - LOAD_OFFSET) {		\
 		VMLINUX_SYMBOL(__start___ksymtab) = .;			\
-		*(__ksymtab)						\
+		*(SORT(___ksymtab+*))					\
 		VMLINUX_SYMBOL(__stop___ksymtab) = .;			\
 	}								\
 									\
 	/* Kernel symbol table: GPL-only symbols */			\
 	__ksymtab_gpl     : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start___ksymtab_gpl) = .;		\
-		*(__ksymtab_gpl)					\
+		*(SORT(___ksymtab_gpl+*))				\
 		VMLINUX_SYMBOL(__stop___ksymtab_gpl) = .;		\
 	}								\
 									\
 	/* Kernel symbol table: Normal unused symbols */		\
 	__ksymtab_unused  : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start___ksymtab_unused) = .;		\
-		*(__ksymtab_unused)					\
+		*(SORT(___ksymtab_unused+*))				\
 		VMLINUX_SYMBOL(__stop___ksymtab_unused) = .;		\
 	}								\
 									\
 	/* Kernel symbol table: GPL-only unused symbols */		\
 	__ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) { \
 		VMLINUX_SYMBOL(__start___ksymtab_unused_gpl) = .;	\
-		*(__ksymtab_unused_gpl)					\
+		*(SORT(___ksymtab_unused_gpl+*))			\
 		VMLINUX_SYMBOL(__stop___ksymtab_unused_gpl) = .;	\
 	}								\
 									\
 	/* Kernel symbol table: GPL-future-only symbols */		\
 	__ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) { \
 		VMLINUX_SYMBOL(__start___ksymtab_gpl_future) = .;	\
-		*(__ksymtab_gpl_future)					\
+		*(SORT(___ksymtab_gpl_future+*))			\
 		VMLINUX_SYMBOL(__stop___ksymtab_gpl_future) = .;	\
 	}								\
 									\
 	/* Kernel symbol table: Normal symbols */			\
 	__kcrctab         : AT(ADDR(__kcrctab) - LOAD_OFFSET) {		\
 		VMLINUX_SYMBOL(__start___kcrctab) = .;			\
-		*(__kcrctab)						\
+		*(SORT(___kcrctab+*))					\
 		VMLINUX_SYMBOL(__stop___kcrctab) = .;			\
 	}								\
 									\
 	/* Kernel symbol table: GPL-only symbols */			\
 	__kcrctab_gpl     : AT(ADDR(__kcrctab_gpl) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start___kcrctab_gpl) = .;		\
-		*(__kcrctab_gpl)					\
+		*(SORT(___kcrctab_gpl+*))				\
 		VMLINUX_SYMBOL(__stop___kcrctab_gpl) = .;		\
 	}								\
 									\
 	/* Kernel symbol table: Normal unused symbols */		\
 	__kcrctab_unused  : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start___kcrctab_unused) = .;		\
-		*(__kcrctab_unused)					\
+		*(SORT(___kcrctab_unused+*))				\
 		VMLINUX_SYMBOL(__stop___kcrctab_unused) = .;		\
 	}								\
 									\
 	/* Kernel symbol table: GPL-only unused symbols */		\
 	__kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) { \
 		VMLINUX_SYMBOL(__start___kcrctab_unused_gpl) = .;	\
-		*(__kcrctab_unused_gpl)					\
+		*(SORT(___kcrctab_unused_gpl+*))			\
 		VMLINUX_SYMBOL(__stop___kcrctab_unused_gpl) = .;	\
 	}								\
 									\
 	/* Kernel symbol table: GPL-future-only symbols */		\
 	__kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) { \
 		VMLINUX_SYMBOL(__start___kcrctab_gpl_future) = .;	\
-		*(__kcrctab_gpl_future)					\
+		*(SORT(___kcrctab_gpl_future+*))			\
 		VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .;	\
 	}								\
 									\
diff --git a/include/linux/module.h b/include/linux/module.h
index 49f4ad0ddec2..d9ca2d5dc6d0 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -224,7 +224,7 @@ struct module_use {
 	extern void *__crc_##sym __attribute__((weak));		\
 	static const unsigned long __kcrctab_##sym		\
 	__used							\
-	__attribute__((section("__kcrctab" sec), unused))	\
+	__attribute__((section("___kcrctab" sec "+" #sym), unused))	\
 	= (unsigned long) &__crc_##sym;
 #else
 #define __CRC_SYMBOL(sym, sec)
@@ -239,7 +239,7 @@ struct module_use {
 	= MODULE_SYMBOL_PREFIX #sym;                    	\
 	static const struct kernel_symbol __ksymtab_##sym	\
 	__used							\
-	__attribute__((section("__ksymtab" sec), unused))	\
+	__attribute__((section("___ksymtab" sec "+" #sym), unused))	\
 	= { (unsigned long)&sym, __kstrtab_##sym }
 
 #define EXPORT_SYMBOL(sym)					\
diff --git a/scripts/module-common.lds b/scripts/module-common.lds
index 47a1f9ae0ede..0865b3e752be 100644
--- a/scripts/module-common.lds
+++ b/scripts/module-common.lds
@@ -5,4 +5,15 @@
  */
 SECTIONS {
 	/DISCARD/ : { *(.discard) }
+
+	__ksymtab		: { *(SORT(___ksymtab+*)) }
+	__ksymtab_gpl		: { *(SORT(___ksymtab_gpl+*)) }
+	__ksymtab_unused	: { *(SORT(___ksymtab_unused+*)) }
+	__ksymtab_unused_gpl	: { *(SORT(___ksymtab_unused_gpl+*)) }
+	__ksymtab_gpl_future	: { *(SORT(___ksymtab_gpl_future+*)) }
+	__kcrctab		: { *(SORT(___kcrctab+*)) }
+	__kcrctab_gpl		: { *(SORT(___kcrctab_gpl+*)) }
+	__kcrctab_unused	: { *(SORT(___kcrctab_unused+*)) }
+	__kcrctab_unused_gpl	: { *(SORT(___kcrctab_unused_gpl+*)) }
+	__kcrctab_gpl_future	: { *(SORT(___kcrctab_gpl_future+*)) }
 }
-- 
cgit v1.2.3-71-gd317


From 1a94dc35bc5c166d89913dc01a49d27a3c21a455 Mon Sep 17 00:00:00 2001
From: Tim Abbott <tabbott@ksplice.com>
Date: Thu, 14 Apr 2011 20:00:19 +0200
Subject: lib: Add generic binary search function to the kernel.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There a large number hand-coded binary searches in the kernel (run
"git grep search | grep binary" to find many of them).  Since in my
experience, hand-coding binary searches can be error-prone, it seems
worth cleaning this up by providing a generic binary search function.

This generic binary search implementation comes from Ksplice.  It has
the same basic API as the C library bsearch() function.  Ksplice uses
it in half a dozen places with 4 different comparison functions, and I
think our code is substantially cleaner because of this.

Signed-off-by: Tim Abbott <tabbott@ksplice.com>
Extra-bikeshedding-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Extra-bikeshedding-by: André Goddard Rosa <andre.goddard@gmail.com>
Extra-bikeshedding-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Alessio Igor Bogani <abogani@kernel.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/bsearch.h |  9 +++++++++
 lib/Makefile            |  3 ++-
 lib/bsearch.c           | 53 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/bsearch.h
 create mode 100644 lib/bsearch.c

(limited to 'include/linux')

diff --git a/include/linux/bsearch.h b/include/linux/bsearch.h
new file mode 100644
index 000000000000..90b1aa867224
--- /dev/null
+++ b/include/linux/bsearch.h
@@ -0,0 +1,9 @@
+#ifndef _LINUX_BSEARCH_H
+#define _LINUX_BSEARCH_H
+
+#include <linux/types.h>
+
+void *bsearch(const void *key, const void *base, size_t num, size_t size,
+	      int (*cmp)(const void *key, const void *elt));
+
+#endif /* _LINUX_BSEARCH_H */
diff --git a/lib/Makefile b/lib/Makefile
index ef0f28571156..4b49a249064b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,7 +21,8 @@ lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o
+	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \
+	 bsearch.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 
diff --git a/lib/bsearch.c b/lib/bsearch.c
new file mode 100644
index 000000000000..5b54758e2afb
--- /dev/null
+++ b/lib/bsearch.c
@@ -0,0 +1,53 @@
+/*
+ * A generic implementation of binary search for the Linux kernel
+ *
+ * Copyright (C) 2008-2009 Ksplice, Inc.
+ * Author: Tim Abbott <tabbott@ksplice.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2.
+ */
+
+#include <linux/module.h>
+#include <linux/bsearch.h>
+
+/*
+ * bsearch - binary search an array of elements
+ * @key: pointer to item being searched for
+ * @base: pointer to first element to search
+ * @num: number of elements
+ * @size: size of each element
+ * @cmp: pointer to comparison function
+ *
+ * This function does a binary search on the given array.  The
+ * contents of the array should already be in ascending sorted order
+ * under the provided comparison function.
+ *
+ * Note that the key need not have the same type as the elements in
+ * the array, e.g. key could be a string and the comparison function
+ * could compare the string with the struct's name field.  However, if
+ * the key and elements in the array are of the same type, you can use
+ * the same comparison function for both sort() and bsearch().
+ */
+void *bsearch(const void *key, const void *base, size_t num, size_t size,
+	      int (*cmp)(const void *key, const void *elt))
+{
+	size_t start = 0, end = num;
+	int result;
+
+	while (start < end) {
+		size_t mid = start + (end - start) / 2;
+
+		result = cmp(key, base + mid * size);
+		if (result < 0)
+			end = mid;
+		else if (result > 0)
+			start = mid + 1;
+		else
+			return (void *)base + mid * size;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(bsearch);
-- 
cgit v1.2.3-71-gd317


From d0f1fed29e6e73d9d17f4c91a5896a4ce3938d45 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@cam.ac.uk>
Date: Tue, 19 Apr 2011 12:43:45 +0100
Subject: Add a strtobool function matching semantics of existing in kernel
 equivalents

This is a rename of the usr_strtobool proposal, which was a renamed,
relocated and fixed version of previous kstrtobool RFC

Signed-off-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/string.h |  1 +
 lib/string.c           | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index a716ee2a8adb..a176db2f2c85 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -123,6 +123,7 @@ extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
 extern void argv_free(char **argv);
 
 extern bool sysfs_streq(const char *s1, const char *s2);
+extern int strtobool(const char *s, bool *res);
 
 #ifdef CONFIG_BINARY_PRINTF
 int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
diff --git a/lib/string.c b/lib/string.c
index f71bead1be3e..01fad9b203e1 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -535,6 +535,35 @@ bool sysfs_streq(const char *s1, const char *s2)
 }
 EXPORT_SYMBOL(sysfs_streq);
 
+/**
+ * strtobool - convert common user inputs into boolean values
+ * @s: input string
+ * @res: result
+ *
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
+ * Otherwise it will return -EINVAL.  Value pointed to by res is
+ * updated upon finding a match.
+ */
+int strtobool(const char *s, bool *res)
+{
+	switch (s[0]) {
+	case 'y':
+	case 'Y':
+	case '1':
+		*res = true;
+		break;
+	case 'n':
+	case 'N':
+	case '0':
+		*res = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(strtobool);
+
 #ifndef __HAVE_ARCH_MEMSET
 /**
  * memset - Fill a region of memory with the given value
-- 
cgit v1.2.3-71-gd317


From b3ae52b6b0335eba547221aad2cb3c50902e3d2d Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Tue, 10 May 2011 23:31:30 +0200
Subject: SSB: Change fallback sprom to callback mechanism.

Some embedded devices like the Netgear WNDR3300 have two SSB based cards
without an own sprom on the pci bus. We have to provide two different
fallback sproms for these and this was not possible with the old solution.
In the bcm47xx architecture the sprom data is stored in the nvram in the
main flash storage. The architecture code will be able to fill the sprom
with the stored data based on the bus where the device was found.

The bcm63xx code should do the same thing as before, just using the new
API.

Acked-by: Michael Buesch <mb@bu3sch.de>
Cc: netdev@vger.kernel.org
Cc: linux-wireless@vger.kernel.org
Cc: Florian Fainelli <florian@openwrt.org>
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2362/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/bcm63xx/boards/board_bcm963xx.c | 16 ++++++++++--
 drivers/ssb/pci.c                         | 16 ++++++++----
 drivers/ssb/sprom.c                       | 43 ++++++++++++++++++-------------
 drivers/ssb/ssb_private.h                 |  3 ++-
 include/linux/ssb/ssb.h                   |  4 ++-
 5 files changed, 55 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c
index 8dba8cfb752f..40b223b603be 100644
--- a/arch/mips/bcm63xx/boards/board_bcm963xx.c
+++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c
@@ -643,6 +643,17 @@ static struct ssb_sprom bcm63xx_sprom = {
 	.boardflags_lo		= 0x2848,
 	.boardflags_hi		= 0x0000,
 };
+
+int bcm63xx_get_fallback_sprom(struct ssb_bus *bus, struct ssb_sprom *out)
+{
+	if (bus->bustype == SSB_BUSTYPE_PCI) {
+		memcpy(out, &bcm63xx_sprom, sizeof(struct ssb_sprom));
+		return 0;
+	} else {
+		printk(KERN_ERR PFX "unable to fill SPROM for given bustype.\n");
+		return -EINVAL;
+	}
+}
 #endif
 
 /*
@@ -793,8 +804,9 @@ void __init board_prom_init(void)
 	if (!board_get_mac_address(bcm63xx_sprom.il0mac)) {
 		memcpy(bcm63xx_sprom.et0mac, bcm63xx_sprom.il0mac, ETH_ALEN);
 		memcpy(bcm63xx_sprom.et1mac, bcm63xx_sprom.il0mac, ETH_ALEN);
-		if (ssb_arch_set_fallback_sprom(&bcm63xx_sprom) < 0)
-			printk(KERN_ERR "failed to register fallback SPROM\n");
+		if (ssb_arch_register_fallback_sprom(
+				&bcm63xx_get_fallback_sprom) < 0)
+			printk(KERN_ERR PFX "failed to register fallback SPROM\n");
 	}
 #endif
 }
diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index 6f34963b3c64..7ad48585c5e6 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -662,7 +662,6 @@ static int sprom_extract(struct ssb_bus *bus, struct ssb_sprom *out,
 static int ssb_pci_sprom_get(struct ssb_bus *bus,
 			     struct ssb_sprom *sprom)
 {
-	const struct ssb_sprom *fallback;
 	int err;
 	u16 *buf;
 
@@ -707,10 +706,17 @@ static int ssb_pci_sprom_get(struct ssb_bus *bus,
 		if (err) {
 			/* All CRC attempts failed.
 			 * Maybe there is no SPROM on the device?
-			 * If we have a fallback, use that. */
-			fallback = ssb_get_fallback_sprom();
-			if (fallback) {
-				memcpy(sprom, fallback, sizeof(*sprom));
+			 * Now we ask the arch code if there is some sprom
+			 * available for this device in some other storage */
+			err = ssb_fill_sprom_with_fallback(bus, sprom);
+			if (err) {
+				ssb_printk(KERN_WARNING PFX "WARNING: Using"
+					   " fallback SPROM failed (err %d)\n",
+					   err);
+			} else {
+				ssb_dprintk(KERN_DEBUG PFX "Using SPROM"
+					    " revision %d provided by"
+					    " platform.\n", sprom->revision);
 				err = 0;
 				goto out_free;
 			}
diff --git a/drivers/ssb/sprom.c b/drivers/ssb/sprom.c
index 5f34d7a3e3a5..45ff0e3a3828 100644
--- a/drivers/ssb/sprom.c
+++ b/drivers/ssb/sprom.c
@@ -17,7 +17,7 @@
 #include <linux/slab.h>
 
 
-static const struct ssb_sprom *fallback_sprom;
+static int(*get_fallback_sprom)(struct ssb_bus *dev, struct ssb_sprom *out);
 
 
 static int sprom2hex(const u16 *sprom, char *buf, size_t buf_len,
@@ -145,36 +145,43 @@ out:
 }
 
 /**
- * ssb_arch_set_fallback_sprom - Set a fallback SPROM for use if no SPROM is found.
+ * ssb_arch_register_fallback_sprom - Registers a method providing a
+ * fallback SPROM if no SPROM is found.
  *
- * @sprom: The SPROM data structure to register.
+ * @sprom_callback: The callback function.
  *
- * With this function the architecture implementation may register a fallback
- * SPROM data structure. The fallback is only used for PCI based SSB devices,
- * where no valid SPROM can be found in the shadow registers.
+ * With this function the architecture implementation may register a
+ * callback handler which fills the SPROM data structure. The fallback is
+ * only used for PCI based SSB devices, where no valid SPROM can be found
+ * in the shadow registers.
  *
- * This function is useful for weird architectures that have a half-assed SSB device
- * hardwired to their PCI bus.
+ * This function is useful for weird architectures that have a half-assed
+ * SSB device hardwired to their PCI bus.
  *
- * Note that it does only work with PCI attached SSB devices. PCMCIA devices currently
- * don't use this fallback.
- * Architectures must provide the SPROM for native SSB devices anyway,
- * so the fallback also isn't used for native devices.
+ * Note that it does only work with PCI attached SSB devices. PCMCIA
+ * devices currently don't use this fallback.
+ * Architectures must provide the SPROM for native SSB devices anyway, so
+ * the fallback also isn't used for native devices.
  *
- * This function is available for architecture code, only. So it is not exported.
+ * This function is available for architecture code, only. So it is not
+ * exported.
  */
-int ssb_arch_set_fallback_sprom(const struct ssb_sprom *sprom)
+int ssb_arch_register_fallback_sprom(int (*sprom_callback)(struct ssb_bus *bus,
+				     struct ssb_sprom *out))
 {
-	if (fallback_sprom)
+	if (get_fallback_sprom)
 		return -EEXIST;
-	fallback_sprom = sprom;
+	get_fallback_sprom = sprom_callback;
 
 	return 0;
 }
 
-const struct ssb_sprom *ssb_get_fallback_sprom(void)
+int ssb_fill_sprom_with_fallback(struct ssb_bus *bus, struct ssb_sprom *out)
 {
-	return fallback_sprom;
+	if (!get_fallback_sprom)
+		return -ENOENT;
+
+	return get_fallback_sprom(bus, out);
 }
 
 /* http://bcm-v4.sipsolutions.net/802.11/IsSpromAvailable */
diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h
index 0331139a726f..77653014db0b 100644
--- a/drivers/ssb/ssb_private.h
+++ b/drivers/ssb/ssb_private.h
@@ -171,7 +171,8 @@ ssize_t ssb_attr_sprom_store(struct ssb_bus *bus,
 			     const char *buf, size_t count,
 			     int (*sprom_check_crc)(const u16 *sprom, size_t size),
 			     int (*sprom_write)(struct ssb_bus *bus, const u16 *sprom));
-extern const struct ssb_sprom *ssb_get_fallback_sprom(void);
+extern int ssb_fill_sprom_with_fallback(struct ssb_bus *bus,
+					struct ssb_sprom *out);
 
 
 /* core.c */
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 9659eff52ca2..045f72ab5dfd 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -404,7 +404,9 @@ extern bool ssb_is_sprom_available(struct ssb_bus *bus);
 
 /* Set a fallback SPROM.
  * See kdoc at the function definition for complete documentation. */
-extern int ssb_arch_set_fallback_sprom(const struct ssb_sprom *sprom);
+extern int ssb_arch_register_fallback_sprom(
+		int (*sprom_callback)(struct ssb_bus *bus,
+		struct ssb_sprom *out));
 
 /* Suspend a SSB bus.
  * Call this from the parent bus suspend routine. */
-- 
cgit v1.2.3-71-gd317


From 369db4c9524b7487faf1ff89646eee396c1363e1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 May 2011 21:33:40 +0000
Subject: clocksource: Restructure clocksource struct members

Group the hot path members of struct clocksource together so we have a
better cache line footprint. Make it cacheline aligned.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Link: http://lkml.kernel.org/r/%3C20110518210136.003081882%40linutronix.de%3E
---
 include/linux/clocksource.h | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 0fb0b7e79394..c918fbd33ee5 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -159,42 +159,38 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
  */
 struct clocksource {
 	/*
-	 * First part of structure is read mostly
+	 * Hotpath data, fits in a single cache line when the
+	 * clocksource itself is cacheline aligned.
 	 */
-	const char *name;
-	struct list_head list;
-	int rating;
 	cycle_t (*read)(struct clocksource *cs);
-	int (*enable)(struct clocksource *cs);
-	void (*disable)(struct clocksource *cs);
+	cycle_t cycle_last;
 	cycle_t mask;
 	u32 mult;
 	u32 shift;
 	u64 max_idle_ns;
-	unsigned long flags;
-	cycle_t (*vread)(void);
-	void (*suspend)(struct clocksource *cs);
-	void (*resume)(struct clocksource *cs);
+
 #ifdef CONFIG_IA64
 	void *fsys_mmio;        /* used by fsyscall asm code */
 #define CLKSRC_FSYS_MMIO_SET(mmio, addr)      ((mmio) = (addr))
 #else
 #define CLKSRC_FSYS_MMIO_SET(mmio, addr)      do { } while (0)
 #endif
-
-	/*
-	 * Second part is written at each timer interrupt
-	 * Keep it in a different cache line to dirty no
-	 * more than one cache line.
-	 */
-	cycle_t cycle_last ____cacheline_aligned_in_smp;
+	const char *name;
+	struct list_head list;
+	int rating;
+	cycle_t (*vread)(void);
+	int (*enable)(struct clocksource *cs);
+	void (*disable)(struct clocksource *cs);
+	unsigned long flags;
+	void (*suspend)(struct clocksource *cs);
+	void (*resume)(struct clocksource *cs);
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 	/* Watchdog related data, used by the framework */
 	struct list_head wd_list;
 	cycle_t wd_last;
 #endif
-};
+} ____cacheline_aligned;
 
 /*
  * Clock source flags bits::
-- 
cgit v1.2.3-71-gd317


From 847b2f42be203f3cff7f243fdd3ee50c1e06c882 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 May 2011 21:33:41 +0000
Subject: clockevents: Restructure clock_event_device members

Group the hot path members of struct clock_event_device together so we
have a better cache line footprint. Make it cacheline aligned.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Link: http://lkml.kernel.org/r/%3C20110518210136.223607682%40linutronix.de%3E
---
 include/linux/clockchips.h | 45 +++++++++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index fc53492b6ad7..9466eebc8e19 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -56,46 +56,47 @@ enum clock_event_nofitiers {
 
 /**
  * struct clock_event_device - clock event device descriptor
- * @name:		ptr to clock event name
- * @features:		features
+ * @event_handler:	Assigned by the framework to be called by the low
+ *			level handler of the event source
+ * @set_next_event:	set next event function
+ * @next_event:		local storage for the next event in oneshot mode
  * @max_delta_ns:	maximum delta value in ns
  * @min_delta_ns:	minimum delta value in ns
  * @mult:		nanosecond to cycles multiplier
  * @shift:		nanoseconds to cycles divisor (power of two)
+ * @mode:		operating mode assigned by the management code
+ * @features:		features
+ * @retries:		number of forced programming retries
+ * @set_mode:		set mode function
+ * @broadcast:		function to broadcast events
+ * @name:		ptr to clock event name
  * @rating:		variable to rate clock event devices
  * @irq:		IRQ number (only for non CPU local devices)
  * @cpumask:		cpumask to indicate for which CPUs this device works
- * @set_next_event:	set next event function
- * @set_mode:		set mode function
- * @event_handler:	Assigned by the framework to be called by the low
- *			level handler of the event source
- * @broadcast:		function to broadcast events
  * @list:		list head for the management code
- * @mode:		operating mode assigned by the management code
- * @next_event:		local storage for the next event in oneshot mode
- * @retries:		number of forced programming retries
  */
 struct clock_event_device {
-	const char		*name;
-	unsigned int		features;
+	void			(*event_handler)(struct clock_event_device *);
+	int			(*set_next_event)(unsigned long evt,
+						  struct clock_event_device *);
+	ktime_t			next_event;
 	u64			max_delta_ns;
 	u64			min_delta_ns;
 	u32			mult;
 	u32			shift;
+	enum clock_event_mode	mode;
+	unsigned int		features;
+	unsigned long		retries;
+
+	void			(*broadcast)(const struct cpumask *mask);
+	void			(*set_mode)(enum clock_event_mode mode,
+					    struct clock_event_device *);
+	const char		*name;
 	int			rating;
 	int			irq;
 	const struct cpumask	*cpumask;
-	int			(*set_next_event)(unsigned long evt,
-						  struct clock_event_device *);
-	void			(*set_mode)(enum clock_event_mode mode,
-					    struct clock_event_device *);
-	void			(*event_handler)(struct clock_event_device *);
-	void			(*broadcast)(const struct cpumask *mask);
 	struct list_head	list;
-	enum clock_event_mode	mode;
-	ktime_t			next_event;
-	unsigned long		retries;
-};
+} ____cacheline_aligned;
 
 /*
  * Calculate a multiplication factor for scaled math, which is used to convert
-- 
cgit v1.2.3-71-gd317


From 57f0fcbe1dea8a36c9d1673086326059991c5f81 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 May 2011 21:33:41 +0000
Subject: clockevents: Provide combined configure and register function

All clockevent devices have the same open coded initialization
functions. Provide an interface which does all necessary
initialization in the core code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Link: http://lkml.kernel.org/r/%3C20110518210136.331975870%40linutronix.de%3E
---
 include/linux/clockchips.h |  9 +++++++++
 kernel/time/clockevents.c  | 44 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 9466eebc8e19..80acc79e0dc5 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -69,6 +69,8 @@ enum clock_event_nofitiers {
  * @retries:		number of forced programming retries
  * @set_mode:		set mode function
  * @broadcast:		function to broadcast events
+ * @min_delta_ticks:	minimum delta value in ticks stored for reconfiguration
+ * @max_delta_ticks:	maximum delta value in ticks stored for reconfiguration
  * @name:		ptr to clock event name
  * @rating:		variable to rate clock event devices
  * @irq:		IRQ number (only for non CPU local devices)
@@ -91,6 +93,9 @@ struct clock_event_device {
 	void			(*broadcast)(const struct cpumask *mask);
 	void			(*set_mode)(enum clock_event_mode mode,
 					    struct clock_event_device *);
+	unsigned long		min_delta_ticks;
+	unsigned long		max_delta_ticks;
+
 	const char		*name;
 	int			rating;
 	int			irq;
@@ -123,6 +128,10 @@ extern u64 clockevent_delta2ns(unsigned long latch,
 			       struct clock_event_device *evt);
 extern void clockevents_register_device(struct clock_event_device *dev);
 
+extern void clockevents_config_and_register(struct clock_event_device *dev,
+					    u32 freq, unsigned long min_delta,
+					    unsigned long max_delta);
+
 extern void clockevents_exchange_device(struct clock_event_device *old,
 					struct clock_event_device *new);
 extern void clockevents_set_mode(struct clock_event_device *dev,
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 0d74b9ba90c8..c69e88c94446 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -194,6 +194,50 @@ void clockevents_register_device(struct clock_event_device *dev)
 }
 EXPORT_SYMBOL_GPL(clockevents_register_device);
 
+static void clockevents_config(struct clock_event_device *dev,
+			       u32 freq)
+{
+	unsigned long sec;
+
+	if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
+		return;
+
+	/*
+	 * Calculate the maximum number of seconds we can sleep. Limit
+	 * to 10 minutes for hardware which can program more than
+	 * 32bit ticks so we still get reasonable conversion values.
+	 */
+	sec = dev->max_delta_ticks;
+	do_div(sec, freq);
+	if (!sec)
+		sec = 1;
+	else if (sec > 600 && dev->max_delta_ticks > UINT_MAX)
+		sec = 600;
+
+	clockevents_calc_mult_shift(dev, freq, sec);
+	dev->min_delta_ns = clockevent_delta2ns(dev->min_delta_ticks, dev);
+	dev->max_delta_ns = clockevent_delta2ns(dev->max_delta_ticks, dev);
+}
+
+/**
+ * clockevents_config_and_register - Configure and register a clock event device
+ * @dev:	device to register
+ * @freq:	The clock frequency
+ * @min_delta:	The minimum clock ticks to program in oneshot mode
+ * @max_delta:	The maximum clock ticks to program in oneshot mode
+ *
+ * min/max_delta can be 0 for devices which do not support oneshot mode.
+ */
+void clockevents_config_and_register(struct clock_event_device *dev,
+				     u32 freq, unsigned long min_delta,
+				     unsigned long max_delta)
+{
+	dev->min_delta_ticks = min_delta;
+	dev->max_delta_ticks = max_delta;
+	clockevents_config(dev, freq);
+	clockevents_register_device(dev);
+}
+
 /*
  * Noop handler when we shut down an event device
  */
-- 
cgit v1.2.3-71-gd317


From 80b816b736cfa5b9582279127099b20a479ab7d9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 May 2011 21:33:42 +0000
Subject: clockevents: Provide interface to reconfigure an active clock event
 device

Some ARM SoCs have clock event devices which have their frequency
modified due to frequency scaling. Provide an interface which allows
to reconfigure an active device. After reconfiguration reprogram the
current pending event.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: LAK <linux-arm-kernel@lists.infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Link: http://lkml.kernel.org/r/%3C20110518210136.437459958%40linutronix.de%3E
---
 include/linux/clockchips.h |  2 ++
 kernel/time/clockevents.c  | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 80acc79e0dc5..d6733e27af34 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -132,6 +132,8 @@ extern void clockevents_config_and_register(struct clock_event_device *dev,
 					    u32 freq, unsigned long min_delta,
 					    unsigned long max_delta);
 
+extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq);
+
 extern void clockevents_exchange_device(struct clock_event_device *old,
 					struct clock_event_device *new);
 extern void clockevents_set_mode(struct clock_event_device *dev,
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index c69e88c94446..22a9da9a9c96 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -238,6 +238,26 @@ void clockevents_config_and_register(struct clock_event_device *dev,
 	clockevents_register_device(dev);
 }
 
+/**
+ * clockevents_update_freq - Update frequency and reprogram a clock event device.
+ * @dev:	device to modify
+ * @freq:	new device frequency
+ *
+ * Reconfigure and reprogram a clock event device in oneshot
+ * mode. Must be called on the cpu for which the device delivers per
+ * cpu timer events with interrupts disabled!  Returns 0 on success,
+ * -ETIME when the event is in the past.
+ */
+int clockevents_update_freq(struct clock_event_device *dev, u32 freq)
+{
+	clockevents_config(dev, freq);
+
+	if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
+		return 0;
+
+	return clockevents_program_event(dev, dev->next_event, ktime_get());
+}
+
 /*
  * Noop handler when we shut down an event device
  */
-- 
cgit v1.2.3-71-gd317


From 99a0378de9f887fd4d501f1baa50aaf16d01a8e8 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Tue, 12 Apr 2011 15:34:36 -0400
Subject: hwmon: (sht15) general code clean-up

* Add a documentation file for the device.
* Respect a bit more the kernel-doc syntax.
* Rename some variables for clarity.
* Use bool type for flags.
* Use an enum for states (actions being done).

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Acked-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
---
 Documentation/hwmon/sht15 |  42 ++++++++
 drivers/hwmon/sht15.c     | 268 ++++++++++++++++++++++++++--------------------
 include/linux/sht15.h     |  12 ++-
 3 files changed, 203 insertions(+), 119 deletions(-)
 create mode 100644 Documentation/hwmon/sht15

(limited to 'include/linux')

diff --git a/Documentation/hwmon/sht15 b/Documentation/hwmon/sht15
new file mode 100644
index 000000000000..2919c516fdbc
--- /dev/null
+++ b/Documentation/hwmon/sht15
@@ -0,0 +1,42 @@
+Kernel driver sht15
+===================
+
+Authors:
+  * Wouter Horre
+  * Jonathan Cameron
+
+Supported chips:
+  * Sensirion SHT10
+    Prefix: 'sht10'
+
+  * Sensirion SHT11
+    Prefix: 'sht11'
+
+  * Sensirion SHT15
+    Prefix: 'sht15'
+
+  * Sensirion SHT71
+    Prefix: 'sht71'
+
+  * Sensirion SHT75
+    Prefix: 'sht75'
+
+Datasheet: Publicly available at the Sensirion website
+http://www.sensirion.ch/en/pdf/product_information/Datasheet-humidity-sensor-SHT1x.pdf
+
+Description
+-----------
+
+The SHT10, SHT11, SHT15, SHT71, and SHT75 are humidity and temperature
+sensors.
+
+The devices communicate using two GPIO lines and use the default
+resolution settings of 14 bits for temperature and 12 bits for humidity.
+
+Note: The regulator supply name is set to "vcc".
+
+Sysfs interface
+---------------
+
+* temp1_input:     temperature input
+* humidity1_input: humidity input
diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c
index f4e617adb220..080af75c517b 100644
--- a/drivers/hwmon/sht15.c
+++ b/drivers/hwmon/sht15.c
@@ -9,16 +9,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
- * Currently ignoring checksum on readings.
- * Default resolution only (14bit temp, 12bit humidity)
- * Ignoring battery status.
- * Heater not enabled.
- * Timings are all conservative.
- *
- * Data sheet available (1/2009) at
- * http://www.sensirion.ch/en/pdf/product_information/Datasheet-humidity-sensor-SHT1x.pdf
- *
- * Regulator supply name = vcc
+ * For further information, see the Documentation/hwmon/sht15 file.
  */
 
 #include <linux/interrupt.h>
@@ -39,17 +30,21 @@
 #include <linux/slab.h>
 #include <asm/atomic.h>
 
-#define SHT15_MEASURE_TEMP	3
-#define SHT15_MEASURE_RH	5
+/* Commands */
+#define SHT15_MEASURE_TEMP		0x03
+#define SHT15_MEASURE_RH		0x05
 
-#define SHT15_READING_NOTHING	0
-#define SHT15_READING_TEMP	1
-#define SHT15_READING_HUMID	2
+/* Min timings */
+#define SHT15_TSCKL			100	/* (nsecs) clock low */
+#define SHT15_TSCKH			100	/* (nsecs) clock high */
+#define SHT15_TSU			150	/* (nsecs) data setup time */
 
-/* Min timings in nsecs */
-#define SHT15_TSCKL		100	/* clock low */
-#define SHT15_TSCKH		100	/* clock high */
-#define SHT15_TSU		150	/* data setup time */
+/* Actions the driver may be doing */
+enum sht15_state {
+	SHT15_READING_NOTHING,
+	SHT15_READING_TEMP,
+	SHT15_READING_HUMID
+};
 
 /**
  * struct sht15_temppair - elements of voltage dependent temp calc
@@ -61,9 +56,7 @@ struct sht15_temppair {
 	int d1;
 };
 
-/* Table 9 from data sheet - relates temperature calculation
- * to supply voltage.
- */
+/* Table 9 from datasheet - relates temperature calculation to supply voltage */
 static const struct sht15_temppair temppoints[] = {
 	{ 2500000, -39400 },
 	{ 3000000, -39600 },
@@ -74,27 +67,27 @@ static const struct sht15_temppair temppoints[] = {
 
 /**
  * struct sht15_data - device instance specific data
- * @pdata:	platform data (gpio's etc)
- * @read_work:	bh of interrupt handler
- * @wait_queue:	wait queue for getting values from device
- * @val_temp:	last temperature value read from device
- * @val_humid: 	last humidity value read from device
- * @flag:	status flag used to identify what the last request was
- * @valid:	are the current stored values valid (start condition)
- * @last_updat:	time of last update
- * @read_lock:	mutex to ensure only one read in progress
- *		at a time.
- * @dev:	associate device structure
- * @hwmon_dev:	device associated with hwmon subsystem
- * @reg:	associated regulator (if specified)
- * @nb:		notifier block to handle notifications of voltage changes
- * @supply_uV:	local copy of supply voltage used to allow
- *		use of regulator consumer if available
- * @supply_uV_valid:   indicates that an updated value has not yet
- *		been obtained from the regulator and so any calculations
- *		based upon it will be invalid.
- * @update_supply_work:	work struct that is used to update the supply_uV
- * @interrupt_handled:	flag used to indicate a hander has been scheduled
+ * @pdata:		platform data (gpio's etc).
+ * @read_work:		bh of interrupt handler.
+ * @wait_queue:		wait queue for getting values from device.
+ * @val_temp:		last temperature value read from device.
+ * @val_humid:		last humidity value read from device.
+ * @state:		state identifying the action the driver is doing.
+ * @measurements_valid:	are the current stored measures valid (start condition).
+ * @last_measurement:	time of last measure.
+ * @read_lock:		mutex to ensure only one read in progress at a time.
+ * @dev:		associate device structure.
+ * @hwmon_dev:		device associated with hwmon subsystem.
+ * @reg:		associated regulator (if specified).
+ * @nb:			notifier block to handle notifications of voltage
+ *                      changes.
+ * @supply_uV:		local copy of supply voltage used to allow use of
+ *                      regulator consumer if available.
+ * @supply_uV_valid:	indicates that an updated value has not yet been
+ *			obtained from the regulator and so any calculations
+ *			based upon it will be invalid.
+ * @update_supply_work:	work struct that is used to update the supply_uV.
+ * @interrupt_handled:	flag used to indicate a handler has been scheduled.
  */
 struct sht15_data {
 	struct sht15_platform_data	*pdata;
@@ -102,16 +95,16 @@ struct sht15_data {
 	wait_queue_head_t		wait_queue;
 	uint16_t			val_temp;
 	uint16_t			val_humid;
-	u8				flag;
-	u8				valid;
-	unsigned long			last_updat;
+	enum sht15_state		state;
+	bool				measurements_valid;
+	unsigned long			last_measurement;
 	struct mutex			read_lock;
 	struct device			*dev;
 	struct device			*hwmon_dev;
 	struct regulator		*reg;
 	struct notifier_block		nb;
 	int				supply_uV;
-	int				supply_uV_valid;
+	bool				supply_uV_valid;
 	struct work_struct		update_supply_work;
 	atomic_t			interrupt_handled;
 };
@@ -125,6 +118,7 @@ struct sht15_data {
 static void sht15_connection_reset(struct sht15_data *data)
 {
 	int i;
+
 	gpio_direction_output(data->pdata->gpio_data, 1);
 	ndelay(SHT15_TSCKL);
 	gpio_set_value(data->pdata->gpio_sck, 0);
@@ -136,14 +130,14 @@ static void sht15_connection_reset(struct sht15_data *data)
 		ndelay(SHT15_TSCKL);
 	}
 }
+
 /**
  * sht15_send_bit() - send an individual bit to the device
  * @data:	device state data
  * @val:	value of bit to be sent
- **/
+ */
 static inline void sht15_send_bit(struct sht15_data *data, int val)
 {
-
 	gpio_set_value(data->pdata->gpio_data, val);
 	ndelay(SHT15_TSU);
 	gpio_set_value(data->pdata->gpio_sck, 1);
@@ -154,12 +148,12 @@ static inline void sht15_send_bit(struct sht15_data *data, int val)
 
 /**
  * sht15_transmission_start() - specific sequence for new transmission
- *
  * @data:	device state data
+ *
  * Timings for this are not documented on the data sheet, so very
  * conservative ones used in implementation. This implements
  * figure 12 on the data sheet.
- **/
+ */
 static void sht15_transmission_start(struct sht15_data *data)
 {
 	/* ensure data is high and output */
@@ -180,23 +174,26 @@ static void sht15_transmission_start(struct sht15_data *data)
 	gpio_set_value(data->pdata->gpio_sck, 0);
 	ndelay(SHT15_TSCKL);
 }
+
 /**
  * sht15_send_byte() - send a single byte to the device
  * @data:	device state
  * @byte:	value to be sent
- **/
+ */
 static void sht15_send_byte(struct sht15_data *data, u8 byte)
 {
 	int i;
+
 	for (i = 0; i < 8; i++) {
 		sht15_send_bit(data, !!(byte & 0x80));
 		byte <<= 1;
 	}
 }
+
 /**
  * sht15_wait_for_response() - checks for ack from device
  * @data:	device state
- **/
+ */
 static int sht15_wait_for_response(struct sht15_data *data)
 {
 	gpio_direction_input(data->pdata->gpio_data);
@@ -220,27 +217,30 @@ static int sht15_wait_for_response(struct sht15_data *data)
  *
  * On entry, sck is output low, data is output pull high
  * and the interrupt disabled.
- **/
+ */
 static int sht15_send_cmd(struct sht15_data *data, u8 cmd)
 {
 	int ret = 0;
+
 	sht15_transmission_start(data);
 	sht15_send_byte(data, cmd);
 	ret = sht15_wait_for_response(data);
 	return ret;
 }
+
 /**
- * sht15_update_single_val() - get a new value from device
+ * sht15_measurement() - get a new value from device
  * @data:		device instance specific data
  * @command:		command sent to request value
  * @timeout_msecs:	timeout after which comms are assumed
  *			to have failed are reset.
- **/
-static inline int sht15_update_single_val(struct sht15_data *data,
-					  int command,
-					  int timeout_msecs)
+ */
+static int sht15_measurement(struct sht15_data *data,
+			     int command,
+			     int timeout_msecs)
 {
 	int ret;
+
 	ret = sht15_send_cmd(data, command);
 	if (ret)
 		return ret;
@@ -256,7 +256,7 @@ static inline int sht15_update_single_val(struct sht15_data *data,
 			schedule_work(&data->read_work);
 	}
 	ret = wait_event_timeout(data->wait_queue,
-				 (data->flag == SHT15_READING_NOTHING),
+				 (data->state == SHT15_READING_NOTHING),
 				 msecs_to_jiffies(timeout_msecs));
 	if (ret == 0) {/* timeout occurred */
 		disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data));
@@ -267,27 +267,27 @@ static inline int sht15_update_single_val(struct sht15_data *data,
 }
 
 /**
- * sht15_update_vals() - get updated readings from device if too old
+ * sht15_update_measurements() - get updated measures from device if too old
  * @data:	device state
- **/
-static int sht15_update_vals(struct sht15_data *data)
+ */
+static int sht15_update_measurements(struct sht15_data *data)
 {
 	int ret = 0;
 	int timeout = HZ;
 
 	mutex_lock(&data->read_lock);
-	if (time_after(jiffies, data->last_updat + timeout)
-	    || !data->valid) {
-		data->flag = SHT15_READING_HUMID;
-		ret = sht15_update_single_val(data, SHT15_MEASURE_RH, 160);
+	if (time_after(jiffies, data->last_measurement + timeout)
+	    || !data->measurements_valid) {
+		data->state = SHT15_READING_HUMID;
+		ret = sht15_measurement(data, SHT15_MEASURE_RH, 160);
 		if (ret)
 			goto error_ret;
-		data->flag = SHT15_READING_TEMP;
-		ret = sht15_update_single_val(data, SHT15_MEASURE_TEMP, 400);
+		data->state = SHT15_READING_TEMP;
+		ret = sht15_measurement(data, SHT15_MEASURE_TEMP, 400);
 		if (ret)
 			goto error_ret;
-		data->valid = 1;
-		data->last_updat = jiffies;
+		data->measurements_valid = true;
+		data->last_measurement = jiffies;
 	}
 error_ret:
 	mutex_unlock(&data->read_lock);
@@ -300,7 +300,7 @@ error_ret:
  * @data:	device state
  *
  * As per section 4.3 of the data sheet.
- **/
+ */
 static inline int sht15_calc_temp(struct sht15_data *data)
 {
 	int d1 = temppoints[0].d1;
@@ -316,7 +316,7 @@ static inline int sht15_calc_temp(struct sht15_data *data)
 			break;
 		}
 
-	return data->val_temp*10 + d1;
+	return data->val_temp * 10 + d1;
 }
 
 /**
@@ -325,23 +325,35 @@ static inline int sht15_calc_temp(struct sht15_data *data)
  *
  * This is the temperature compensated version as per section 4.2 of
  * the data sheet.
- **/
+ *
+ * The sensor is assumed to be V3, which is compatible with V4.
+ * Humidity conversion coefficients are shown in table 7 of the datasheet.
+ */
 static inline int sht15_calc_humid(struct sht15_data *data)
 {
-	int RHlinear; /* milli percent */
+	int rh_linear; /* milli percent */
 	int temp = sht15_calc_temp(data);
 
 	const int c1 = -4;
 	const int c2 = 40500; /* x 10 ^ -6 */
-	const int c3 = -28; /* x 10 ^ -7 */
+	const int c3 = -28;   /* x 10 ^ -7 */
 
-	RHlinear = c1*1000
-		+ c2 * data->val_humid/1000
+	rh_linear = c1 * 1000
+		+ c2 * data->val_humid / 1000
 		+ (data->val_humid * data->val_humid * c3) / 10000;
 	return (temp - 25000) * (10000 + 80 * data->val_humid)
-		/ 1000000 + RHlinear;
+		/ 1000000 + rh_linear;
 }
 
+/**
+ * sht15_show_temp() - show temperature measurement value in sysfs
+ * @dev:	device.
+ * @attr:	device attribute.
+ * @buf:	sysfs buffer where measurement values are written to.
+ *
+ * Will be called on read access to temp1_input sysfs attribute.
+ * Returns number of bytes written into buffer, negative errno on error.
+ */
 static ssize_t sht15_show_temp(struct device *dev,
 			       struct device_attribute *attr,
 			       char *buf)
@@ -350,12 +362,21 @@ static ssize_t sht15_show_temp(struct device *dev,
 	struct sht15_data *data = dev_get_drvdata(dev);
 
 	/* Technically no need to read humidity as well */
-	ret = sht15_update_vals(data);
+	ret = sht15_update_measurements(data);
 
 	return ret ? ret : sprintf(buf, "%d\n",
 				   sht15_calc_temp(data));
 }
 
+/**
+ * sht15_show_humidity() - show humidity measurement value in sysfs
+ * @dev:	device.
+ * @attr:	device attribute.
+ * @buf:	sysfs buffer where measurement values are written to.
+ *
+ * Will be called on read access to humidity1_input sysfs attribute.
+ * Returns number of bytes written into buffer, negative errno on error.
+ */
 static ssize_t sht15_show_humidity(struct device *dev,
 				   struct device_attribute *attr,
 				   char *buf)
@@ -363,11 +384,12 @@ static ssize_t sht15_show_humidity(struct device *dev,
 	int ret;
 	struct sht15_data *data = dev_get_drvdata(dev);
 
-	ret = sht15_update_vals(data);
+	ret = sht15_update_measurements(data);
 
 	return ret ? ret : sprintf(buf, "%d\n", sht15_calc_humid(data));
 
-};
+}
+
 static ssize_t show_name(struct device *dev,
 			 struct device_attribute *attr,
 			 char *buf)
@@ -376,12 +398,10 @@ static ssize_t show_name(struct device *dev,
 	return sprintf(buf, "%s\n", pdev->name);
 }
 
-static SENSOR_DEVICE_ATTR(temp1_input,
-			  S_IRUGO, sht15_show_temp,
-			  NULL, 0);
-static SENSOR_DEVICE_ATTR(humidity1_input,
-			  S_IRUGO, sht15_show_humidity,
-			  NULL, 0);
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
+			  sht15_show_temp, NULL, 0);
+static SENSOR_DEVICE_ATTR(humidity1_input, S_IRUGO,
+			  sht15_show_humidity, NULL, 0);
 static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
 static struct attribute *sht15_attrs[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
@@ -397,16 +417,20 @@ static const struct attribute_group sht15_attr_group = {
 static irqreturn_t sht15_interrupt_fired(int irq, void *d)
 {
 	struct sht15_data *data = d;
+
 	/* First disable the interrupt */
 	disable_irq_nosync(irq);
 	atomic_inc(&data->interrupt_handled);
 	/* Then schedule a reading work struct */
-	if (data->flag != SHT15_READING_NOTHING)
+	if (data->state != SHT15_READING_NOTHING)
 		schedule_work(&data->read_work);
 	return IRQ_HANDLED;
 }
 
-/* Each byte of data is acknowledged by pulling the data line
+/**
+ * sht15_ack() - Send an ack to the device
+ *
+ * Each byte of data is acknowledged by pulling the data line
  * low for one clock pulse.
  */
 static void sht15_ack(struct sht15_data *data)
@@ -421,12 +445,13 @@ static void sht15_ack(struct sht15_data *data)
 
 	gpio_direction_input(data->pdata->gpio_data);
 }
+
 /**
  * sht15_end_transmission() - notify device of end of transmission
  * @data:	device state
  *
  * This is basically a NAK. (single clock pulse, data high)
- **/
+ */
 static void sht15_end_transmission(struct sht15_data *data)
 {
 	gpio_direction_output(data->pdata->gpio_data, 1);
@@ -444,12 +469,13 @@ static void sht15_bh_read_data(struct work_struct *work_s)
 	struct sht15_data *data
 		= container_of(work_s, struct sht15_data,
 			       read_work);
+
 	/* Firstly, verify the line is low */
 	if (gpio_get_value(data->pdata->gpio_data)) {
-		/* If not, then start the interrupt again - care
-		   here as could have gone low in meantime so verify
-		   it hasn't!
-		*/
+		/*
+		 * If not, then start the interrupt again - care here as could
+		 * have gone low in meantime so verify it hasn't!
+		 */
 		atomic_set(&data->interrupt_handled, 0);
 		enable_irq(gpio_to_irq(data->pdata->gpio_data));
 		/* If still not occurred or another handler has been scheduled */
@@ -457,6 +483,7 @@ static void sht15_bh_read_data(struct work_struct *work_s)
 		    || atomic_read(&data->interrupt_handled))
 			return;
 	}
+
 	/* Read the data back from the device */
 	for (i = 0; i < 16; ++i) {
 		val <<= 1;
@@ -468,19 +495,22 @@ static void sht15_bh_read_data(struct work_struct *work_s)
 		if (i == 7)
 			sht15_ack(data);
 	}
+
 	/* Tell the device we are done */
 	sht15_end_transmission(data);
 
-	switch (data->flag) {
+	switch (data->state) {
 	case SHT15_READING_TEMP:
 		data->val_temp = val;
 		break;
 	case SHT15_READING_HUMID:
 		data->val_humid = val;
 		break;
+	default:
+		break;
 	}
 
-	data->flag = SHT15_READING_NOTHING;
+	data->state = SHT15_READING_NOTHING;
 	wake_up(&data->wait_queue);
 }
 
@@ -500,10 +530,10 @@ static void sht15_update_voltage(struct work_struct *work_s)
  *
  * Note that as the notification code holds the regulator lock, we have
  * to schedule an update of the supply voltage rather than getting it directly.
- **/
+ */
 static int sht15_invalidate_voltage(struct notifier_block *nb,
-				unsigned long event,
-				void *ignored)
+				    unsigned long event,
+				    void *ignored)
 {
 	struct sht15_data *data = container_of(nb, struct sht15_data, nb);
 
@@ -521,7 +551,7 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 
 	if (!data) {
 		ret = -ENOMEM;
-		dev_err(&pdev->dev, "kzalloc failed");
+		dev_err(&pdev->dev, "kzalloc failed\n");
 		goto error_ret;
 	}
 
@@ -533,13 +563,16 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 	init_waitqueue_head(&data->wait_queue);
 
 	if (pdev->dev.platform_data == NULL) {
-		dev_err(&pdev->dev, "no platform data supplied");
+		dev_err(&pdev->dev, "no platform data supplied\n");
 		goto err_free_data;
 	}
 	data->pdata = pdev->dev.platform_data;
-	data->supply_uV = data->pdata->supply_mv*1000;
+	data->supply_uV = data->pdata->supply_mv * 1000;
 
-/* If a regulator is available, query what the supply voltage actually is!*/
+	/*
+	 * If a regulator is available,
+	 * query what the supply voltage actually is!
+	 */
 	data->reg = regulator_get(data->dev, "vcc");
 	if (!IS_ERR(data->reg)) {
 		int voltage;
@@ -549,21 +582,25 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 			data->supply_uV = voltage;
 
 		regulator_enable(data->reg);
-		/* setup a notifier block to update this if another device
-		 *  causes the voltage to change */
+		/*
+		 * Setup a notifier block to update this if another device
+		 * causes the voltage to change
+		 */
 		data->nb.notifier_call = &sht15_invalidate_voltage;
 		ret = regulator_register_notifier(data->reg, &data->nb);
 	}
-/* Try requesting the GPIOs */
+
+	/* Try requesting the GPIOs */
 	ret = gpio_request(data->pdata->gpio_sck, "SHT15 sck");
 	if (ret) {
-		dev_err(&pdev->dev, "gpio request failed");
+		dev_err(&pdev->dev, "gpio request failed\n");
 		goto err_free_data;
 	}
 	gpio_direction_output(data->pdata->gpio_sck, 0);
+
 	ret = gpio_request(data->pdata->gpio_data, "SHT15 data");
 	if (ret) {
-		dev_err(&pdev->dev, "gpio request failed");
+		dev_err(&pdev->dev, "gpio request failed\n");
 		goto err_release_gpio_sck;
 	}
 	ret = sysfs_create_group(&pdev->dev.kobj, &sht15_attr_group);
@@ -578,7 +615,7 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 			  "sht15 data",
 			  data);
 	if (ret) {
-		dev_err(&pdev->dev, "failed to get irq for data line");
+		dev_err(&pdev->dev, "failed to get irq for data line\n");
 		goto err_release_gpio_data;
 	}
 	disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data));
@@ -590,6 +627,7 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 		ret = PTR_ERR(data->hwmon_dev);
 		goto err_release_irq;
 	}
+
 	return 0;
 
 err_release_irq:
@@ -601,7 +639,6 @@ err_release_gpio_sck:
 err_free_data:
 	kfree(data);
 error_ret:
-
 	return ret;
 }
 
@@ -609,8 +646,10 @@ static int __devexit sht15_remove(struct platform_device *pdev)
 {
 	struct sht15_data *data = platform_get_drvdata(pdev);
 
-	/* Make sure any reads from the device are done and
-	 * prevent new ones from beginning */
+	/*
+	 * Make sure any reads from the device are done and
+	 * prevent new ones beginning
+	 */
 	mutex_lock(&data->read_lock);
 	hwmon_device_unregister(data->hwmon_dev);
 	sysfs_remove_group(&pdev->dev.kobj, &sht15_attr_group);
@@ -625,10 +664,10 @@ static int __devexit sht15_remove(struct platform_device *pdev)
 	gpio_free(data->pdata->gpio_sck);
 	mutex_unlock(&data->read_lock);
 	kfree(data);
+
 	return 0;
 }
 
-
 /*
  * sht_drivers simultaneously refers to __devinit and __devexit function
  * which causes spurious section mismatch warning. So use __refdata to
@@ -673,7 +712,6 @@ static struct platform_driver __refdata sht_drivers[] = {
 	},
 };
 
-
 static int __init sht15_init(void)
 {
 	int ret;
diff --git a/include/linux/sht15.h b/include/linux/sht15.h
index 046bce05ecab..1e302146bdc8 100644
--- a/include/linux/sht15.h
+++ b/include/linux/sht15.h
@@ -8,14 +8,18 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
+ *
+ * For further information, see the Documentation/hwmon/sht15 file.
  */
 
 /**
  * struct sht15_platform_data - sht15 connectivity info
- * @gpio_data:	no. of gpio to which bidirectional data line is connected
- * @gpio_sck:	no. of gpio to which the data clock is connected.
- * @supply_mv:	supply voltage in mv. Overridden by regulator if available.
- **/
+ * @gpio_data:		no. of gpio to which bidirectional data line is
+ *			connected.
+ * @gpio_sck:		no. of gpio to which the data clock is connected.
+ * @supply_mv:		supply voltage in mv. Overridden by regulator if
+ *			available.
+ */
 struct sht15_platform_data {
 	int gpio_data;
 	int gpio_sck;
-- 
cgit v1.2.3-71-gd317


From cc15c7ebb424e45ba2c5ceecbe52d025219ee970 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Tue, 12 Apr 2011 15:34:38 -0400
Subject: hwmon: (sht15) add support for the status register

* Add support for:
  - Heater.
  - End of battery notice.
  - Ability not to reload from OTP.
  - Low resolution (12bit temp, 8bit humidity).
* Add an utility function to read individual bytes from the device.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Acked-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
---
 Documentation/hwmon/sht15 |  29 +++++-
 drivers/hwmon/sht15.c     | 247 +++++++++++++++++++++++++++++++++++++++-------
 include/linux/sht15.h     |   4 +
 3 files changed, 245 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/hwmon/sht15 b/Documentation/hwmon/sht15
index 2919c516fdbc..d1939b25eb16 100644
--- a/Documentation/hwmon/sht15
+++ b/Documentation/hwmon/sht15
@@ -4,6 +4,7 @@ Kernel driver sht15
 Authors:
   * Wouter Horre
   * Jonathan Cameron
+  * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
 
 Supported chips:
   * Sensirion SHT10
@@ -30,13 +31,37 @@ Description
 The SHT10, SHT11, SHT15, SHT71, and SHT75 are humidity and temperature
 sensors.
 
-The devices communicate using two GPIO lines and use the default
-resolution settings of 14 bits for temperature and 12 bits for humidity.
+The devices communicate using two GPIO lines.
+
+Supported resolutions for the measurements are 14 bits for temperature and 12
+bits for humidity, or 12 bits for temperature and 8 bits for humidity.
+
+The humidity calibration coefficients are programmed into an OTP memory on the
+chip. These coefficients are used to internally calibrate the signals from the
+sensors. Disabling the reload of those coefficients allows saving 10ms for each
+measurement and decrease power consumption, while loosing on precision.
+
+Some options may be set directly in the sht15_platform_data structure
+or via sysfs attributes.
 
 Note: The regulator supply name is set to "vcc".
 
+Platform data
+-------------
+
+* no_otp_reload:
+  flag to indicate not to reload from OTP (default to false).
+* low_resolution:
+  flag to indicate the temp/humidity resolution to use (default to false).
+
 Sysfs interface
 ---------------
 
 * temp1_input:     temperature input
 * humidity1_input: humidity input
+* heater_enable:   write 1 in this attribute to enable the on-chip heater,
+                   0 to disable it. Be careful not to enable the heater
+                   for too long.
+* temp1_fault:     if 1, this means that the voltage is low (below 2.47V) and
+                   measurement may be invalid.
+* humidity1_fault: same as temp1_fault.
diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c
index 3182b3f578e2..49da089b5de9 100644
--- a/drivers/hwmon/sht15.c
+++ b/drivers/hwmon/sht15.c
@@ -1,6 +1,9 @@
 /*
  * sht15.c - support for the SHT15 Temperature and Humidity Sensor
  *
+ * Portions Copyright (c) 2010-2011 Savoir-faire Linux Inc.
+ *          Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
  * Copyright (c) 2009 Jonathan Cameron
  *
  * Copyright (c) 2007 Wouter Horre
@@ -33,6 +36,8 @@
 /* Commands */
 #define SHT15_MEASURE_TEMP		0x03
 #define SHT15_MEASURE_RH		0x05
+#define SHT15_WRITE_STATUS		0x06
+#define SHT15_READ_STATUS		0x07
 #define SHT15_SOFT_RESET		0x1E
 
 /* Min timings */
@@ -41,6 +46,12 @@
 #define SHT15_TSU			150	/* (nsecs) data setup time */
 #define SHT15_TSRST			11	/* (msecs) soft reset time */
 
+/* Status Register Bits */
+#define SHT15_STATUS_LOW_RESOLUTION	0x01
+#define SHT15_STATUS_NO_OTP_RELOAD	0x02
+#define SHT15_STATUS_HEATER		0x04
+#define SHT15_STATUS_LOW_BATTERY	0x40
+
 /* Actions the driver may be doing */
 enum sht15_state {
 	SHT15_READING_NOTHING,
@@ -74,9 +85,12 @@ static const struct sht15_temppair temppoints[] = {
  * @wait_queue:		wait queue for getting values from device.
  * @val_temp:		last temperature value read from device.
  * @val_humid:		last humidity value read from device.
+ * @val_status:		last status register value read from device.
  * @state:		state identifying the action the driver is doing.
  * @measurements_valid:	are the current stored measures valid (start condition).
+ * @status_valid:	is the current stored status valid (start condition).
  * @last_measurement:	time of last measure.
+ * @last_status:	time of last status reading.
  * @read_lock:		mutex to ensure only one read in progress at a time.
  * @dev:		associate device structure.
  * @hwmon_dev:		device associated with hwmon subsystem.
@@ -97,9 +111,12 @@ struct sht15_data {
 	wait_queue_head_t		wait_queue;
 	uint16_t			val_temp;
 	uint16_t			val_humid;
+	u8				val_status;
 	enum sht15_state		state;
 	bool				measurements_valid;
+	bool				status_valid;
 	unsigned long			last_measurement;
+	unsigned long			last_status;
 	struct mutex			read_lock;
 	struct device			*dev;
 	struct device			*hwmon_dev;
@@ -244,10 +261,105 @@ static int sht15_soft_reset(struct sht15_data *data)
 	if (ret)
 		return ret;
 	msleep(SHT15_TSRST);
+	/* device resets default hardware status register value */
+	data->val_status = 0;
+
+	return ret;
+}
+
+/**
+ * sht15_end_transmission() - notify device of end of transmission
+ * @data:	device state.
+ *
+ * This is basically a NAK (single clock pulse, data high).
+ */
+static void sht15_end_transmission(struct sht15_data *data)
+{
+	gpio_direction_output(data->pdata->gpio_data, 1);
+	ndelay(SHT15_TSU);
+	gpio_set_value(data->pdata->gpio_sck, 1);
+	ndelay(SHT15_TSCKH);
+	gpio_set_value(data->pdata->gpio_sck, 0);
+	ndelay(SHT15_TSCKL);
+}
+
+/**
+ * sht15_read_byte() - Read a byte back from the device
+ * @data:	device state.
+ */
+static u8 sht15_read_byte(struct sht15_data *data)
+{
+	int i;
+	u8 byte = 0;
+
+	for (i = 0; i < 8; ++i) {
+		byte <<= 1;
+		gpio_set_value(data->pdata->gpio_sck, 1);
+		ndelay(SHT15_TSCKH);
+		byte |= !!gpio_get_value(data->pdata->gpio_data);
+		gpio_set_value(data->pdata->gpio_sck, 0);
+		ndelay(SHT15_TSCKL);
+	}
+	return byte;
+}
+
+/**
+ * sht15_send_status() - write the status register byte
+ * @data:	sht15 specific data.
+ * @status:	the byte to set the status register with.
+ *
+ * As described in figure 14 and table 5 of the datasheet.
+ */
+static int sht15_send_status(struct sht15_data *data, u8 status)
+{
+	int ret;
+
+	ret = sht15_send_cmd(data, SHT15_WRITE_STATUS);
+	if (ret)
+		return ret;
+	gpio_direction_output(data->pdata->gpio_data, 1);
+	ndelay(SHT15_TSU);
+	sht15_send_byte(data, status);
+	ret = sht15_wait_for_response(data);
+	if (ret)
+		return ret;
 
+	data->val_status = status;
 	return 0;
 }
 
+/**
+ * sht15_update_status() - get updated status register from device if too old
+ * @data:	device instance specific data.
+ *
+ * As described in figure 15 and table 5 of the datasheet.
+ */
+static int sht15_update_status(struct sht15_data *data)
+{
+	int ret = 0;
+	u8 status;
+	int timeout = HZ;
+
+	mutex_lock(&data->read_lock);
+	if (time_after(jiffies, data->last_status + timeout)
+			|| !data->status_valid) {
+		ret = sht15_send_cmd(data, SHT15_READ_STATUS);
+		if (ret)
+			goto error_ret;
+		status = sht15_read_byte(data);
+
+		sht15_end_transmission(data);
+
+		data->val_status = status;
+		data->status_valid = true;
+		data->last_status = jiffies;
+	}
+error_ret:
+	mutex_unlock(&data->read_lock);
+
+	return ret;
+}
+
 /**
  * sht15_measurement() - get a new value from device
  * @data:		device instance specific data
@@ -324,6 +436,7 @@ error_ret:
 static inline int sht15_calc_temp(struct sht15_data *data)
 {
 	int d1 = temppoints[0].d1;
+	int d2 = (data->val_status & SHT15_STATUS_LOW_RESOLUTION) ? 40 : 10;
 	int i;
 
 	for (i = ARRAY_SIZE(temppoints) - 1; i > 0; i--)
@@ -336,7 +449,7 @@ static inline int sht15_calc_temp(struct sht15_data *data)
 			break;
 		}
 
-	return data->val_temp * 10 + d1;
+	return data->val_temp * d2 + d1;
 }
 
 /**
@@ -353,18 +466,85 @@ static inline int sht15_calc_humid(struct sht15_data *data)
 {
 	int rh_linear; /* milli percent */
 	int temp = sht15_calc_temp(data);
-
+	int c2, c3;
+	int t2;
 	const int c1 = -4;
-	const int c2 = 40500; /* x 10 ^ -6 */
-	const int c3 = -28;   /* x 10 ^ -7 */
+
+	if (data->val_status & SHT15_STATUS_LOW_RESOLUTION) {
+		c2 = 648000; /* x 10 ^ -6 */
+		c3 = -7200;  /* x 10 ^ -7 */
+		t2 = 1280;
+	} else {
+		c2 = 40500;  /* x 10 ^ -6 */
+		c3 = -28;    /* x 10 ^ -7 */
+		t2 = 80;
+	}
 
 	rh_linear = c1 * 1000
 		+ c2 * data->val_humid / 1000
 		+ (data->val_humid * data->val_humid * c3) / 10000;
-	return (temp - 25000) * (10000 + 80 * data->val_humid)
+	return (temp - 25000) * (10000 + t2 * data->val_humid)
 		/ 1000000 + rh_linear;
 }
 
+/**
+ * sht15_show_status() - show status information in sysfs
+ * @dev:	device.
+ * @attr:	device attribute.
+ * @buf:	sysfs buffer where information is written to.
+ *
+ * Will be called on read access to temp1_fault, humidity1_fault
+ * and heater_enable sysfs attributes.
+ * Returns number of bytes written into buffer, negative errno on error.
+ */
+static ssize_t sht15_show_status(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	int ret;
+	struct sht15_data *data = dev_get_drvdata(dev);
+	u8 bit = to_sensor_dev_attr(attr)->index;
+
+	ret = sht15_update_status(data);
+
+	return ret ? ret : sprintf(buf, "%d\n", !!(data->val_status & bit));
+}
+
+/**
+ * sht15_store_heater() - change heater state via sysfs
+ * @dev:	device.
+ * @attr:	device attribute.
+ * @buf:	sysfs buffer to read the new heater state from.
+ * @count:	length of the data.
+ *
+ * Will be called on read access to heater_enable sysfs attribute.
+ * Returns number of bytes actually decoded, negative errno on error.
+ */
+static ssize_t sht15_store_heater(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	int ret;
+	struct sht15_data *data = dev_get_drvdata(dev);
+	long value;
+	u8 status;
+
+	if (strict_strtol(buf, 10, &value))
+		return -EINVAL;
+
+	mutex_lock(&data->read_lock);
+	status = data->val_status & 0x07;
+	if (!!value)
+		status |= SHT15_STATUS_HEATER;
+	else
+		status &= ~SHT15_STATUS_HEATER;
+
+	ret = sht15_send_status(data, status);
+	mutex_unlock(&data->read_lock);
+
+	return ret ? ret : count;
+}
+
 /**
  * sht15_show_temp() - show temperature measurement value in sysfs
  * @dev:	device.
@@ -407,7 +587,6 @@ static ssize_t sht15_show_humidity(struct device *dev,
 	ret = sht15_update_measurements(data);
 
 	return ret ? ret : sprintf(buf, "%d\n", sht15_calc_humid(data));
-
 }
 
 static ssize_t show_name(struct device *dev,
@@ -422,10 +601,19 @@ static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
 			  sht15_show_temp, NULL, 0);
 static SENSOR_DEVICE_ATTR(humidity1_input, S_IRUGO,
 			  sht15_show_humidity, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp1_fault, S_IRUGO, sht15_show_status, NULL,
+			  SHT15_STATUS_LOW_BATTERY);
+static SENSOR_DEVICE_ATTR(humidity1_fault, S_IRUGO, sht15_show_status, NULL,
+			  SHT15_STATUS_LOW_BATTERY);
+static SENSOR_DEVICE_ATTR(heater_enable, S_IRUGO | S_IWUSR, sht15_show_status,
+			  sht15_store_heater, SHT15_STATUS_HEATER);
 static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
 static struct attribute *sht15_attrs[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
 	&sensor_dev_attr_humidity1_input.dev_attr.attr,
+	&sensor_dev_attr_temp1_fault.dev_attr.attr,
+	&sensor_dev_attr_humidity1_fault.dev_attr.attr,
+	&sensor_dev_attr_heater_enable.dev_attr.attr,
 	&dev_attr_name.attr,
 	NULL,
 };
@@ -466,25 +654,8 @@ static void sht15_ack(struct sht15_data *data)
 	gpio_direction_input(data->pdata->gpio_data);
 }
 
-/**
- * sht15_end_transmission() - notify device of end of transmission
- * @data:	device state
- *
- * This is basically a NAK. (single clock pulse, data high)
- */
-static void sht15_end_transmission(struct sht15_data *data)
-{
-	gpio_direction_output(data->pdata->gpio_data, 1);
-	ndelay(SHT15_TSU);
-	gpio_set_value(data->pdata->gpio_sck, 1);
-	ndelay(SHT15_TSCKH);
-	gpio_set_value(data->pdata->gpio_sck, 0);
-	ndelay(SHT15_TSCKL);
-}
-
 static void sht15_bh_read_data(struct work_struct *work_s)
 {
-	int i;
 	uint16_t val = 0;
 	struct sht15_data *data
 		= container_of(work_s, struct sht15_data,
@@ -505,16 +676,10 @@ static void sht15_bh_read_data(struct work_struct *work_s)
 	}
 
 	/* Read the data back from the device */
-	for (i = 0; i < 16; ++i) {
-		val <<= 1;
-		gpio_set_value(data->pdata->gpio_sck, 1);
-		ndelay(SHT15_TSCKH);
-		val |= !!gpio_get_value(data->pdata->gpio_data);
-		gpio_set_value(data->pdata->gpio_sck, 0);
-		ndelay(SHT15_TSCKL);
-		if (i == 7)
-			sht15_ack(data);
-	}
+	val = sht15_read_byte(data);
+	val <<= 8;
+	sht15_ack(data);
+	val |= sht15_read_byte(data);
 
 	/* Tell the device we are done */
 	sht15_end_transmission(data);
@@ -568,6 +733,7 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 {
 	int ret = 0;
 	struct sht15_data *data = kzalloc(sizeof(*data), GFP_KERNEL);
+	u8 status = 0;
 
 	if (!data) {
 		ret = -ENOMEM;
@@ -588,6 +754,10 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 	}
 	data->pdata = pdev->dev.platform_data;
 	data->supply_uV = data->pdata->supply_mv * 1000;
+	if (data->pdata->no_otp_reload)
+		status |= SHT15_STATUS_NO_OTP_RELOAD;
+	if (data->pdata->low_resolution)
+		status |= SHT15_STATUS_LOW_RESOLUTION;
 
 	/*
 	 * If a regulator is available,
@@ -646,6 +816,13 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_release_irq;
 
+	/* write status with platform data options */
+	if (status) {
+		ret = sht15_send_status(data, status);
+		if (ret)
+			goto err_release_irq;
+	}
+
 	ret = sysfs_create_group(&pdev->dev.kobj, &sht15_attr_group);
 	if (ret) {
 		dev_err(&pdev->dev, "sysfs create failed\n");
@@ -689,6 +866,10 @@ static int __devexit sht15_remove(struct platform_device *pdev)
 	 * prevent new ones beginning
 	 */
 	mutex_lock(&data->read_lock);
+	if (sht15_soft_reset(data)) {
+		mutex_unlock(&data->read_lock);
+		return -EFAULT;
+	}
 	hwmon_device_unregister(data->hwmon_dev);
 	sysfs_remove_group(&pdev->dev.kobj, &sht15_attr_group);
 	if (!IS_ERR(data->reg)) {
diff --git a/include/linux/sht15.h b/include/linux/sht15.h
index 1e302146bdc8..d836ca617323 100644
--- a/include/linux/sht15.h
+++ b/include/linux/sht15.h
@@ -19,10 +19,14 @@
  * @gpio_sck:		no. of gpio to which the data clock is connected.
  * @supply_mv:		supply voltage in mv. Overridden by regulator if
  *			available.
+ * @no_otp_reload:	flag to indicate no reload from OTP.
+ * @low_resolution:	flag to indicate the temp/humidity resolution to use.
  */
 struct sht15_platform_data {
 	int gpio_data;
 	int gpio_sck;
 	int supply_mv;
+	bool no_otp_reload;
+	bool low_resolution;
 };
 
-- 
cgit v1.2.3-71-gd317


From 82c7465b4dd013d19858bfeac084849ae17682c0 Mon Sep 17 00:00:00 2001
From: Jerome Oufella <jerome.oufella@savoirfairelinux.com>
Date: Tue, 12 Apr 2011 15:34:39 -0400
Subject: hwmon: (sht15) add support for CRC validation

The sht15 sensor allows validating exchanges to and from the device
using a crc8 function. An utility function to reverse a byte has also
been added.

Signed-off-by: Jerome Oufella <jerome.oufella@savoirfairelinux.com>
Acked-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
---
 Documentation/hwmon/sht15 |   9 ++-
 drivers/hwmon/sht15.c     | 192 +++++++++++++++++++++++++++++++++++++++++-----
 include/linux/sht15.h     |   2 +
 3 files changed, 183 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/hwmon/sht15 b/Documentation/hwmon/sht15
index d1939b25eb16..02850bdfac18 100644
--- a/Documentation/hwmon/sht15
+++ b/Documentation/hwmon/sht15
@@ -5,6 +5,7 @@ Authors:
   * Wouter Horre
   * Jonathan Cameron
   * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+  * Jerome Oufella <jerome.oufella@savoirfairelinux.com>
 
 Supported chips:
   * Sensirion SHT10
@@ -44,11 +45,17 @@ measurement and decrease power consumption, while loosing on precision.
 Some options may be set directly in the sht15_platform_data structure
 or via sysfs attributes.
 
-Note: The regulator supply name is set to "vcc".
+Notes:
+  * The regulator supply name is set to "vcc".
+  * If a CRC validation fails, a soft reset command is sent, which resets
+    status register to its hardware default value, but the driver will try to
+    restore the previous device configuration.
 
 Platform data
 -------------
 
+* checksum:
+  set it to true to enable CRC validation of the readings (default to false).
 * no_otp_reload:
   flag to indicate not to reload from OTP (default to false).
 * low_resolution:
diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c
index 49da089b5de9..cf4330b352ef 100644
--- a/drivers/hwmon/sht15.c
+++ b/drivers/hwmon/sht15.c
@@ -2,6 +2,7 @@
  * sht15.c - support for the SHT15 Temperature and Humidity Sensor
  *
  * Portions Copyright (c) 2010-2011 Savoir-faire Linux Inc.
+ *          Jerome Oufella <jerome.oufella@savoirfairelinux.com>
  *          Vivien Didelot <vivien.didelot@savoirfairelinux.com>
  *
  * Copyright (c) 2009 Jonathan Cameron
@@ -78,6 +79,42 @@ static const struct sht15_temppair temppoints[] = {
 	{ 5000000, -40100 },
 };
 
+/* Table from CRC datasheet, section 2.4 */
+static const u8 sht15_crc8_table[] = {
+	0,	49,	98,	83,	196,	245,	166,	151,
+	185,	136,	219,	234,	125,	76,	31,	46,
+	67,	114,	33,	16,	135,	182,	229,	212,
+	250,	203,	152,	169,	62,	15,	92,	109,
+	134,	183,	228,	213,	66,	115,	32,	17,
+	63,	14,	93,	108,	251,	202,	153,	168,
+	197,	244,	167,	150,	1,	48,	99,	82,
+	124,	77,	30,	47,	184,	137,	218,	235,
+	61,	12,	95,	110,	249,	200,	155,	170,
+	132,	181,	230,	215,	64,	113,	34,	19,
+	126,	79,	28,	45,	186,	139,	216,	233,
+	199,	246,	165,	148,	3,	50,	97,	80,
+	187,	138,	217,	232,	127,	78,	29,	44,
+	2,	51,	96,	81,	198,	247,	164,	149,
+	248,	201,	154,	171,	60,	13,	94,	111,
+	65,	112,	35,	18,	133,	180,	231,	214,
+	122,	75,	24,	41,	190,	143,	220,	237,
+	195,	242,	161,	144,	7,	54,	101,	84,
+	57,	8,	91,	106,	253,	204,	159,	174,
+	128,	177,	226,	211,	68,	117,	38,	23,
+	252,	205,	158,	175,	56,	9,	90,	107,
+	69,	116,	39,	22,	129,	176,	227,	210,
+	191,	142,	221,	236,	123,	74,	25,	40,
+	6,	55,	100,	85,	194,	243,	160,	145,
+	71,	118,	37,	20,	131,	178,	225,	208,
+	254,	207,	156,	173,	58,	11,	88,	105,
+	4,	53,	102,	87,	192,	241,	162,	147,
+	189,	140,	223,	238,	121,	72,	27,	42,
+	193,	240,	163,	146,	5,	52,	103,	86,
+	120,	73,	26,	43,	188,	141,	222,	239,
+	130,	179,	224,	209,	70,	119,	36,	21,
+	59,	10,	89,	104,	255,	206,	157,	172
+};
+
 /**
  * struct sht15_data - device instance specific data
  * @pdata:		platform data (gpio's etc).
@@ -86,6 +123,8 @@ static const struct sht15_temppair temppoints[] = {
  * @val_temp:		last temperature value read from device.
  * @val_humid:		last humidity value read from device.
  * @val_status:		last status register value read from device.
+ * @checksum_ok:	last value read from the device passed CRC validation.
+ * @checksumming:	flag used to enable the data validation with CRC.
  * @state:		state identifying the action the driver is doing.
  * @measurements_valid:	are the current stored measures valid (start condition).
  * @status_valid:	is the current stored status valid (start condition).
@@ -112,6 +151,8 @@ struct sht15_data {
 	uint16_t			val_temp;
 	uint16_t			val_humid;
 	u8				val_status;
+	bool				checksum_ok;
+	bool				checksumming;
 	enum sht15_state		state;
 	bool				measurements_valid;
 	bool				status_valid;
@@ -128,6 +169,40 @@ struct sht15_data {
 	atomic_t			interrupt_handled;
 };
 
+/**
+ * sht15_reverse() - reverse a byte
+ * @byte:    byte to reverse.
+ */
+static u8 sht15_reverse(u8 byte)
+{
+	u8 i, c;
+
+	for (c = 0, i = 0; i < 8; i++)
+		c |= (!!(byte & (1 << i))) << (7 - i);
+	return c;
+}
+
+/**
+ * sht15_crc8() - compute crc8
+ * @data:	sht15 specific data.
+ * @value:	sht15 retrieved data.
+ *
+ * This implements section 2 of the CRC datasheet.
+ */
+static u8 sht15_crc8(struct sht15_data *data,
+		const u8 *value,
+		int len)
+{
+	u8 crc = sht15_reverse(data->val_status & 0x0F);
+
+	while (len--) {
+		crc = sht15_crc8_table[*value ^ crc];
+		value++;
+	}
+
+	return crc;
+}
+
 /**
  * sht15_connection_reset() - reset the comms interface
  * @data:	sht15 specific data
@@ -267,6 +342,26 @@ static int sht15_soft_reset(struct sht15_data *data)
 	return ret;
 }
 
+/**
+ * sht15_ack() - send a ack
+ * @data:	sht15 specific data.
+ *
+ * Each byte of data is acknowledged by pulling the data line
+ * low for one clock pulse.
+ */
+static void sht15_ack(struct sht15_data *data)
+{
+	gpio_direction_output(data->pdata->gpio_data, 0);
+	ndelay(SHT15_TSU);
+	gpio_set_value(data->pdata->gpio_sck, 1);
+	ndelay(SHT15_TSU);
+	gpio_set_value(data->pdata->gpio_sck, 0);
+	ndelay(SHT15_TSU);
+	gpio_set_value(data->pdata->gpio_data, 1);
+
+	gpio_direction_input(data->pdata->gpio_data);
+}
+
 /**
  * sht15_end_transmission() - notify device of end of transmission
  * @data:	device state.
@@ -338,6 +433,9 @@ static int sht15_update_status(struct sht15_data *data)
 {
 	int ret = 0;
 	u8 status;
+	u8 previous_config;
+	u8 dev_checksum = 0;
+	u8 checksum_vals[2];
 	int timeout = HZ;
 
 	mutex_lock(&data->read_lock);
@@ -348,8 +446,40 @@ static int sht15_update_status(struct sht15_data *data)
 			goto error_ret;
 		status = sht15_read_byte(data);
 
+		if (data->checksumming) {
+			sht15_ack(data);
+			dev_checksum = sht15_reverse(sht15_read_byte(data));
+			checksum_vals[0] = SHT15_READ_STATUS;
+			checksum_vals[1] = status;
+			data->checksum_ok = (sht15_crc8(data, checksum_vals, 2)
+					== dev_checksum);
+		}
+
 		sht15_end_transmission(data);
 
+		/*
+		 * Perform checksum validation on the received data.
+		 * Specification mentions that in case a checksum verification
+		 * fails, a soft reset command must be sent to the device.
+		 */
+		if (data->checksumming && !data->checksum_ok) {
+			previous_config = data->val_status & 0x07;
+			ret = sht15_soft_reset(data);
+			if (ret)
+				goto error_ret;
+			if (previous_config) {
+				ret = sht15_send_status(data, previous_config);
+				if (ret) {
+					dev_err(data->dev,
+						"CRC validation failed, unable "
+						"to restore device settings\n");
+					goto error_ret;
+				}
+			}
+			ret = -EAGAIN;
+			goto error_ret;
+		}
+
 		data->val_status = status;
 		data->status_valid = true;
 		data->last_status = jiffies;
@@ -372,6 +502,7 @@ static int sht15_measurement(struct sht15_data *data,
 			     int timeout_msecs)
 {
 	int ret;
+	u8 previous_config;
 
 	ret = sht15_send_cmd(data, command);
 	if (ret)
@@ -395,6 +526,29 @@ static int sht15_measurement(struct sht15_data *data,
 		sht15_connection_reset(data);
 		return -ETIME;
 	}
+
+	/*
+	 *  Perform checksum validation on the received data.
+	 *  Specification mentions that in case a checksum verification fails,
+	 *  a soft reset command must be sent to the device.
+	 */
+	if (data->checksumming && !data->checksum_ok) {
+		previous_config = data->val_status & 0x07;
+		ret = sht15_soft_reset(data);
+		if (ret)
+			return ret;
+		if (previous_config) {
+			ret = sht15_send_status(data, previous_config);
+			if (ret) {
+				dev_err(data->dev,
+					"CRC validation failed, unable "
+					"to restore device settings\n");
+				return ret;
+			}
+		}
+		return -EAGAIN;
+	}
+
 	return 0;
 }
 
@@ -635,28 +789,11 @@ static irqreturn_t sht15_interrupt_fired(int irq, void *d)
 	return IRQ_HANDLED;
 }
 
-/**
- * sht15_ack() - Send an ack to the device
- *
- * Each byte of data is acknowledged by pulling the data line
- * low for one clock pulse.
- */
-static void sht15_ack(struct sht15_data *data)
-{
-	gpio_direction_output(data->pdata->gpio_data, 0);
-	ndelay(SHT15_TSU);
-	gpio_set_value(data->pdata->gpio_sck, 1);
-	ndelay(SHT15_TSU);
-	gpio_set_value(data->pdata->gpio_sck, 0);
-	ndelay(SHT15_TSU);
-	gpio_set_value(data->pdata->gpio_data, 1);
-
-	gpio_direction_input(data->pdata->gpio_data);
-}
-
 static void sht15_bh_read_data(struct work_struct *work_s)
 {
 	uint16_t val = 0;
+	u8 dev_checksum = 0;
+	u8 checksum_vals[3];
 	struct sht15_data *data
 		= container_of(work_s, struct sht15_data,
 			       read_work);
@@ -681,6 +818,21 @@ static void sht15_bh_read_data(struct work_struct *work_s)
 	sht15_ack(data);
 	val |= sht15_read_byte(data);
 
+	if (data->checksumming) {
+		/*
+		 * Ask the device for a checksum and read it back.
+		 * Note: the device sends the checksum byte reversed.
+		 */
+		sht15_ack(data);
+		dev_checksum = sht15_reverse(sht15_read_byte(data));
+		checksum_vals[0] = (data->state == SHT15_READING_TEMP) ?
+			SHT15_MEASURE_TEMP : SHT15_MEASURE_RH;
+		checksum_vals[1] = (u8) (val >> 8);
+		checksum_vals[2] = (u8) val;
+		data->checksum_ok
+			= (sht15_crc8(data, checksum_vals, 3) == dev_checksum);
+	}
+
 	/* Tell the device we are done */
 	sht15_end_transmission(data);
 
@@ -754,6 +906,8 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 	}
 	data->pdata = pdev->dev.platform_data;
 	data->supply_uV = data->pdata->supply_mv * 1000;
+	if (data->pdata->checksum)
+		data->checksumming = true;
 	if (data->pdata->no_otp_reload)
 		status |= SHT15_STATUS_NO_OTP_RELOAD;
 	if (data->pdata->low_resolution)
diff --git a/include/linux/sht15.h b/include/linux/sht15.h
index d836ca617323..f85c7c523da0 100644
--- a/include/linux/sht15.h
+++ b/include/linux/sht15.h
@@ -19,6 +19,7 @@
  * @gpio_sck:		no. of gpio to which the data clock is connected.
  * @supply_mv:		supply voltage in mv. Overridden by regulator if
  *			available.
+ * @checksum:		flag to indicate the checksum should be validated.
  * @no_otp_reload:	flag to indicate no reload from OTP.
  * @low_resolution:	flag to indicate the temp/humidity resolution to use.
  */
@@ -26,6 +27,7 @@ struct sht15_platform_data {
 	int gpio_data;
 	int gpio_sck;
 	int supply_mv;
+	bool checksum;
 	bool no_otp_reload;
 	bool low_resolution;
 };
-- 
cgit v1.2.3-71-gd317


From 75d65a425c0163d3ec476ddc12b51087217a070c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 19 May 2011 13:50:07 -0700
Subject: hlist: remove software prefetching in hlist iterators

They not only increase the code footprint, they actually make things
slower rather than faster.  On internationally acclaimed benchmarks
("make -j16" on an already fully built kernel source tree) the hlist
prefetching slows down the build by up to 1%.

(Almost all of it comes from hlist_for_each_entry_rcu() as used by
avc_has_perm_noaudit(), which is very hot due to all the pathname
lookups to see if there is anything to do).

The cause seems to be two-fold:

 - on at least some Intel cores, prefetch(NULL) ends up with some
   microarchitectural stall due to the TLB miss that it incurs.  The
   hlist case triggers this very commonly, since the NULL pointer is the
   last entry in the list.

 - the prefetch appears to cause more D$ activity, probably because it
   prefetches hash list entries that are never actually used (because we
   ended the search early due to a hit).

Regardless, the numbers clearly say that the implicit prefetching is
simply a bad idea.  If some _particular_ user of the hlist iterators
wants to prefetch the next list entry, they can do so themselves
explicitly, rather than depend on all list iterators doing so
implicitly.

Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: linux-arch@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h    |  9 ++++-----
 include/linux/rculist.h | 10 +++++-----
 2 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 3a54266a1e85..9ac11148e037 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -664,8 +664,7 @@ static inline void hlist_move_list(struct hlist_head *old,
 #define hlist_entry(ptr, type, member) container_of(ptr,type,member)
 
 #define hlist_for_each(pos, head) \
-	for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
-	     pos = pos->next)
+	for (pos = (head)->first; pos ; pos = pos->next)
 
 #define hlist_for_each_safe(pos, n, head) \
 	for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
@@ -680,7 +679,7 @@ static inline void hlist_move_list(struct hlist_head *old,
  */
 #define hlist_for_each_entry(tpos, pos, head, member)			 \
 	for (pos = (head)->first;					 \
-	     pos && ({ prefetch(pos->next); 1;}) &&			 \
+	     pos &&							 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
 	     pos = pos->next)
 
@@ -692,7 +691,7 @@ static inline void hlist_move_list(struct hlist_head *old,
  */
 #define hlist_for_each_entry_continue(tpos, pos, member)		 \
 	for (pos = (pos)->next;						 \
-	     pos && ({ prefetch(pos->next); 1;}) &&			 \
+	     pos &&							 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
 	     pos = pos->next)
 
@@ -703,7 +702,7 @@ static inline void hlist_move_list(struct hlist_head *old,
  * @member:	the name of the hlist_node within the struct.
  */
 #define hlist_for_each_entry_from(tpos, pos, member)			 \
-	for (; pos && ({ prefetch(pos->next); 1;}) &&			 \
+	for (; pos &&							 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
 	     pos = pos->next)
 
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 2dea94fc4402..900a97a44769 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -427,7 +427,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 
 #define __hlist_for_each_rcu(pos, head)				\
 	for (pos = rcu_dereference(hlist_first_rcu(head));	\
-	     pos && ({ prefetch(pos->next); 1; });		\
+	     pos;						\
 	     pos = rcu_dereference(hlist_next_rcu(pos)))
 
 /**
@@ -443,7 +443,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  */
 #define hlist_for_each_entry_rcu(tpos, pos, head, member)		\
 	for (pos = rcu_dereference_raw(hlist_first_rcu(head));		\
-		pos && ({ prefetch(pos->next); 1; }) &&			 \
+		pos &&							 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference_raw(hlist_next_rcu(pos)))
 
@@ -460,7 +460,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  */
 #define hlist_for_each_entry_rcu_bh(tpos, pos, head, member)		 \
 	for (pos = rcu_dereference_bh((head)->first);			 \
-		pos && ({ prefetch(pos->next); 1; }) &&			 \
+		pos &&							 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference_bh(pos->next))
 
@@ -472,7 +472,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  */
 #define hlist_for_each_entry_continue_rcu(tpos, pos, member)		\
 	for (pos = rcu_dereference((pos)->next);			\
-	     pos && ({ prefetch(pos->next); 1; }) &&			\
+	     pos &&							\
 	     ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });  \
 	     pos = rcu_dereference(pos->next))
 
@@ -484,7 +484,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  */
 #define hlist_for_each_entry_continue_rcu_bh(tpos, pos, member)		\
 	for (pos = rcu_dereference_bh((pos)->next);			\
-	     pos && ({ prefetch(pos->next); 1; }) &&			\
+	     pos &&							\
 	     ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });  \
 	     pos = rcu_dereference_bh(pos->next))
 
-- 
cgit v1.2.3-71-gd317


From e66eed651fd18a961f11cda62f3b5286c8cc4f9f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 19 May 2011 14:15:29 -0700
Subject: list: remove prefetching from regular list iterators

This is removes the use of software prefetching from the regular list
iterators.  We don't want it.  If you do want to prefetch in some
iterator of yours, go right ahead.  Just don't expect the iterator to do
it, since normally the downsides are bigger than the upsides.

It also replaces <linux/prefetch.h> with <linux/const.h>, because the
use of LIST_POISON ends up needing it.  <linux/poison.h> is sadly not
self-contained, and including prefetch.h just happened to hide that.

Suggested by David Miller (networking has a lot of regular lists that
are often empty or a single entry, and prefetching is not going to do
anything but add useless instructions).

Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: linux-arch@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h    | 26 +++++++++++---------------
 include/linux/rculist.h |  6 +++---
 2 files changed, 14 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 9ac11148e037..cc6d2aa6b415 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -4,7 +4,7 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/poison.h>
-#include <linux/prefetch.h>
+#include <linux/const.h>
 
 /*
  * Simple doubly linked list implementation.
@@ -367,18 +367,15 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @head:	the head for your list.
  */
 #define list_for_each(pos, head) \
-	for (pos = (head)->next; prefetch(pos->next), pos != (head); \
-        	pos = pos->next)
+	for (pos = (head)->next; pos != (head); pos = pos->next)
 
 /**
  * __list_for_each	-	iterate over a list
  * @pos:	the &struct list_head to use as a loop cursor.
  * @head:	the head for your list.
  *
- * This variant differs from list_for_each() in that it's the
- * simplest possible list iteration code, no prefetching is done.
- * Use this for code that knows the list to be very short (empty
- * or 1 entry) most of the time.
+ * This variant doesn't differ from list_for_each() any more.
+ * We don't do prefetching in either case.
  */
 #define __list_for_each(pos, head) \
 	for (pos = (head)->next; pos != (head); pos = pos->next)
@@ -389,8 +386,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @head:	the head for your list.
  */
 #define list_for_each_prev(pos, head) \
-	for (pos = (head)->prev; prefetch(pos->prev), pos != (head); \
-        	pos = pos->prev)
+	for (pos = (head)->prev; pos != (head); pos = pos->prev)
 
 /**
  * list_for_each_safe - iterate over a list safe against removal of list entry
@@ -410,7 +406,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_prev_safe(pos, n, head) \
 	for (pos = (head)->prev, n = pos->prev; \
-	     prefetch(pos->prev), pos != (head); \
+	     pos != (head); \
 	     pos = n, n = pos->prev)
 
 /**
@@ -421,7 +417,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry(pos, head, member)				\
 	for (pos = list_entry((head)->next, typeof(*pos), member);	\
-	     prefetch(pos->member.next), &pos->member != (head); 	\
+	     &pos->member != (head); 	\
 	     pos = list_entry(pos->member.next, typeof(*pos), member))
 
 /**
@@ -432,7 +428,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry_reverse(pos, head, member)			\
 	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
-	     prefetch(pos->member.prev), &pos->member != (head); 	\
+	     &pos->member != (head); 	\
 	     pos = list_entry(pos->member.prev, typeof(*pos), member))
 
 /**
@@ -457,7 +453,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry_continue(pos, head, member) 		\
 	for (pos = list_entry(pos->member.next, typeof(*pos), member);	\
-	     prefetch(pos->member.next), &pos->member != (head);	\
+	     &pos->member != (head);	\
 	     pos = list_entry(pos->member.next, typeof(*pos), member))
 
 /**
@@ -471,7 +467,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry_continue_reverse(pos, head, member)		\
 	for (pos = list_entry(pos->member.prev, typeof(*pos), member);	\
-	     prefetch(pos->member.prev), &pos->member != (head);	\
+	     &pos->member != (head);	\
 	     pos = list_entry(pos->member.prev, typeof(*pos), member))
 
 /**
@@ -483,7 +479,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * Iterate over list of given type, continuing from current position.
  */
 #define list_for_each_entry_from(pos, head, member) 			\
-	for (; prefetch(pos->member.next), &pos->member != (head);	\
+	for (; &pos->member != (head);	\
 	     pos = list_entry(pos->member.next, typeof(*pos), member))
 
 /**
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 900a97a44769..e3beb315517a 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -253,7 +253,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  */
 #define list_for_each_entry_rcu(pos, head, member) \
 	for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \
-		prefetch(pos->member.next), &pos->member != (head); \
+		&pos->member != (head); \
 		pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
 
 
@@ -270,7 +270,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  */
 #define list_for_each_continue_rcu(pos, head) \
 	for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \
-		prefetch((pos)->next), (pos) != (head); \
+		(pos) != (head); \
 		(pos) = rcu_dereference_raw(list_next_rcu(pos)))
 
 /**
@@ -284,7 +284,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  */
 #define list_for_each_entry_continue_rcu(pos, head, member) 		\
 	for (pos = list_entry_rcu(pos->member.next, typeof(*pos), member); \
-	     prefetch(pos->member.next), &pos->member != (head);	\
+	     &pos->member != (head);	\
 	     pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
 
 /**
-- 
cgit v1.2.3-71-gd317


From 4539c24fe4f92c09ee668ef959d3e8180df619b9 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Tue, 17 May 2011 16:12:36 -0600
Subject: tty/serial: Add explicit PORT_TEGRA type

Tegra's UART is currently auto-detected as PORT_XSCALE due to register
bit UART_IER.UUE being writable. However, the Tegra documentation states
that this register bit is reserved. Hence, we should not program it.

Instead, the documentation specifies that the UART is 16550 compatible.
However, Tegra does need register bit UART_IER.RTOIE set, which is not
enabled by any 16550 port type. This was not noticed before, since
PORT_XSCALE enables CAP_UUE, which conflates both UUE and RTOIE bit
programming.

This change defines PORT_TEGRA that doesn't set UART_CAP_UUE, but does
set UART_CAP_RTOIE, which is a new capability indicating that the RTOIE
bit needs to be enabled.

Based-on-code-by: Laxman Dewangan <ldewangan@nvidia.com>
Cc: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/8250.c   | 14 ++++++++++++--
 drivers/tty/serial/8250.h   |  1 +
 include/linux/serial_core.h |  3 ++-
 include/linux/serial_reg.h  |  1 +
 4 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250.c b/drivers/tty/serial/8250.c
index 54482d724fee..a5e290de8c93 100644
--- a/drivers/tty/serial/8250.c
+++ b/drivers/tty/serial/8250.c
@@ -271,7 +271,7 @@ static const struct serial8250_config uart_config[] = {
 		.fifo_size	= 32,
 		.tx_loadsz	= 32,
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
-		.flags		= UART_CAP_FIFO | UART_CAP_UUE,
+		.flags		= UART_CAP_FIFO | UART_CAP_UUE | UART_CAP_RTOIE,
 	},
 	[PORT_RM9000] = {
 		.name		= "RM9000",
@@ -301,6 +301,14 @@ static const struct serial8250_config uart_config[] = {
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
 		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
 	},
+	[PORT_TEGRA] = {
+		.name		= "Tegra",
+		.fifo_size	= 32,
+		.tx_loadsz	= 8,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 |
+				  UART_FCR_T_TRIG_01,
+		.flags		= UART_CAP_FIFO | UART_CAP_RTOIE,
+	},
 };
 
 #if defined(CONFIG_MIPS_ALCHEMY)
@@ -2403,7 +2411,9 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 			UART_ENABLE_MS(&up->port, termios->c_cflag))
 		up->ier |= UART_IER_MSI;
 	if (up->capabilities & UART_CAP_UUE)
-		up->ier |= UART_IER_UUE | UART_IER_RTOIE;
+		up->ier |= UART_IER_UUE;
+	if (up->capabilities & UART_CAP_RTOIE)
+		up->ier |= UART_IER_RTOIE;
 
 	serial_out(up, UART_IER, up->ier);
 
diff --git a/drivers/tty/serial/8250.h b/drivers/tty/serial/8250.h
index d13b586c0f72..6edf4a6a22d4 100644
--- a/drivers/tty/serial/8250.h
+++ b/drivers/tty/serial/8250.h
@@ -42,6 +42,7 @@ struct serial8250_config {
 #define UART_CAP_SLEEP	(1 << 10)	/* UART has IER sleep */
 #define UART_CAP_AFE	(1 << 11)	/* MCR-based hw flow control */
 #define UART_CAP_UUE	(1 << 12)	/* UART needs IER bit 6 set (Xscale) */
+#define UART_CAP_RTOIE	(1 << 13)	/* UART needs IER bit 4 set (Xscale, Tegra) */
 
 #define UART_BUG_QUOT	(1 << 0)	/* UART has buggy quot LSB */
 #define UART_BUG_TXEN	(1 << 1)	/* UART has buggy TX IIR status */
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 95d479ba514e..a5c31146a337 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -45,7 +45,8 @@
 #define PORT_OCTEON	17	/* Cavium OCTEON internal UART */
 #define PORT_AR7	18	/* Texas Instruments AR7 internal UART */
 #define PORT_U6_16550A	19	/* ST-Ericsson U6xxx internal UART */
-#define PORT_MAX_8250	19	/* max port ID */
+#define PORT_TEGRA	20	/* NVIDIA Tegra internal UART */
+#define PORT_MAX_8250	20	/* max port ID */
 
 /*
  * ARM specific type numbers.  These are not currently guaranteed
diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h
index 3ecb71a9e505..5f66e8499fb9 100644
--- a/include/linux/serial_reg.h
+++ b/include/linux/serial_reg.h
@@ -57,6 +57,7 @@
  * ST16C654:	 8  16  56  60		 8  16  32  56	PORT_16654
  * TI16C750:	 1  16  32  56		xx  xx  xx  xx	PORT_16750
  * TI16C752:	 8  16  56  60		 8  16  32  56
+ * Tegra:	 1   4   8  14		16   8   4   1	PORT_TEGRA
  */
 #define UART_FCR_R_TRIG_00	0x00
 #define UART_FCR_R_TRIG_01	0x40
-- 
cgit v1.2.3-71-gd317


From 5f873bae704cf8b7cbd64b5720912266286c9146 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Tue, 17 May 2011 16:12:37 -0600
Subject: tty/serial: Fix break handling for PORT_TEGRA

When a break is received, Tegra's UART apparently fills the FIFO with
0 bytes. These must be drained so that they aren't interpreted as actual
data received. This allows e.g. MAGIC_SYSRQ to work on Tegra's UARTs.

v2: Added FIXME comment to clear_rx_fifo

Originally-by: Laxman Dewangan <ldewangan@nvidia.com>
Cc: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/8250.c  | 28 ++++++++++++++++++++++++++++
 include/linux/serial_reg.h |  1 +
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250.c b/drivers/tty/serial/8250.c
index a5e290de8c93..b40f7b90c81d 100644
--- a/drivers/tty/serial/8250.c
+++ b/drivers/tty/serial/8250.c
@@ -1433,6 +1433,27 @@ static void serial8250_enable_ms(struct uart_port *port)
 	serial_out(up, UART_IER, up->ier);
 }
 
+/*
+ * Clear the Tegra rx fifo after a break
+ *
+ * FIXME: This needs to become a port specific callback once we have a
+ * framework for this
+ */
+static void clear_rx_fifo(struct uart_8250_port *up)
+{
+	unsigned int status, tmout = 10000;
+	do {
+		status = serial_in(up, UART_LSR);
+		if (status & (UART_LSR_FIFOE | UART_LSR_BRK_ERROR_BITS))
+			status = serial_in(up, UART_RX);
+		else
+			break;
+		if (--tmout == 0)
+			break;
+		udelay(1);
+	} while (1);
+}
+
 static void
 receive_chars(struct uart_8250_port *up, unsigned int *status)
 {
@@ -1467,6 +1488,13 @@ receive_chars(struct uart_8250_port *up, unsigned int *status)
 			if (lsr & UART_LSR_BI) {
 				lsr &= ~(UART_LSR_FE | UART_LSR_PE);
 				up->port.icount.brk++;
+				/*
+				 * If tegra port then clear the rx fifo to
+				 * accept another break/character.
+				 */
+				if (up->port.type == PORT_TEGRA)
+					clear_rx_fifo(up);
+
 				/*
 				 * We do the SysRQ and SAK checking
 				 * here because otherwise the break
diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h
index 5f66e8499fb9..c75bda37c18e 100644
--- a/include/linux/serial_reg.h
+++ b/include/linux/serial_reg.h
@@ -119,6 +119,7 @@
 #define UART_MCR_DTR		0x01 /* DTR complement */
 
 #define UART_LSR	5	/* In:  Line Status Register */
+#define UART_LSR_FIFOE		0x80 /* Fifo error */
 #define UART_LSR_TEMT		0x40 /* Transmitter empty */
 #define UART_LSR_THRE		0x20 /* Transmit-hold-register empty */
 #define UART_LSR_BI		0x10 /* Break interrupt indicator */
-- 
cgit v1.2.3-71-gd317


From 8a745f1f39b7a20047a362b67ce9151c07d14440 Mon Sep 17 00:00:00 2001
From: Kristen Carlson Accardi <kristen@linux.intel.com>
Date: Fri, 4 Mar 2011 10:24:11 -0800
Subject: libata: Power off empty ports

Give users the option of completely powering off unoccupied
SATA ports using the existing min_power link_power_management_policy
option.  When the use selects this option on an empty port, we
will power the port off by setting DET to off.  For occupied ports,
behavior is unchanged.

Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/ata/libata-core.c | 10 ++++++++--
 drivers/ata/libata-eh.c   |  2 +-
 include/linux/libata.h    |  1 +
 3 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 76c3c15cb1e6..736bee5dafeb 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3619,8 +3619,14 @@ int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 		scontrol |= (0x2 << 8);
 		break;
 	case ATA_LPM_MIN_POWER:
-		/* no restrictions on LPM transitions */
-		scontrol &= ~(0x3 << 8);
+		if (ata_link_nr_enabled(link) > 0)
+			/* no restrictions on LPM transitions */
+			scontrol &= ~(0x3 << 8);
+		else {
+			/* empty port, power off */
+			scontrol &= ~0xf;
+			scontrol |= (0x1 << 2);
+		}
 		break;
 	default:
 		WARN_ON(1);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index dad9fd660f37..dfb6e9d3d759 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -3423,7 +3423,7 @@ fail:
 	return rc;
 }
 
-static int ata_link_nr_enabled(struct ata_link *link)
+int ata_link_nr_enabled(struct ata_link *link)
 {
 	struct ata_device *dev;
 	int cnt = 0;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 04f32a3eb26b..5a9926b34072 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1151,6 +1151,7 @@ extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
 		      ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
 		      ata_postreset_fn_t postreset);
 extern void ata_std_error_handler(struct ata_port *ap);
+extern int ata_link_nr_enabled(struct ata_link *link);
 
 /*
  * Base operations to inherit from and initializers for sht
-- 
cgit v1.2.3-71-gd317


From 1477fcc290b3d5c2614bde98bf3b1154c538860d Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 20 May 2011 11:11:53 +1000
Subject: signal.h need a definition of struct task_struct

This fixes these build errors on powerpc:

  In file included from arch/powerpc/mm/fault.c:18:
  include/linux/signal.h:239: error: 'struct task_struct' declared inside parameter list
  include/linux/signal.h:239: error: its scope is only this definition or declaration, which is probably not what you want
  include/linux/signal.h:240: error: 'struct task_struct' declared inside parameter list
  ..

Exposed by commit e66eed651fd1 ("list: remove prefetching from regular
list iterators"), which removed the include of <linux/prefetch.h> from
<linux/list.h>.

Without that, linux/signal.h no longer accidentally got the declaration
of 'struct task_struct'.

Fix by properly declaring the struct, rather than introducing any new
header file dependency.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index fcd2b14b1932..29a68ac7af83 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -7,6 +7,8 @@
 #ifdef __KERNEL__
 #include <linux/list.h>
 
+struct task_struct;
+
 /* for sysctl */
 extern int print_fatal_signals;
 /*
-- 
cgit v1.2.3-71-gd317


From 449f4544267e73d5db372971da63634707c32299 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 19 May 2011 12:24:16 +0000
Subject: macvlan: remove one synchronize_rcu() call

When one macvlan device is dismantled, we can avoid one
synchronize_rcu() call done after deletion from hash list, since caller
will perform a synchronize_net() call after its ndo_stop() call.

Add a new netdev->dismantle field to signal this dismantle intent.

Reduces RTNL hold time.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
CC: Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c     | 9 +++++----
 include/linux/netdevice.h | 4 +++-
 net/core/dev.c            | 2 +-
 3 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index d7c0bc62da7f..07bcb8084d78 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -70,16 +70,17 @@ static void macvlan_hash_add(struct macvlan_dev *vlan)
 	hlist_add_head_rcu(&vlan->hlist, &port->vlan_hash[addr[5]]);
 }
 
-static void macvlan_hash_del(struct macvlan_dev *vlan)
+static void macvlan_hash_del(struct macvlan_dev *vlan, bool sync)
 {
 	hlist_del_rcu(&vlan->hlist);
-	synchronize_rcu();
+	if (sync)
+		synchronize_rcu();
 }
 
 static void macvlan_hash_change_addr(struct macvlan_dev *vlan,
 					const unsigned char *addr)
 {
-	macvlan_hash_del(vlan);
+	macvlan_hash_del(vlan, true);
 	/* Now that we are unhashed it is safe to change the device
 	 * address without confusing packet delivery.
 	 */
@@ -345,7 +346,7 @@ static int macvlan_stop(struct net_device *dev)
 	dev_uc_del(lowerdev, dev->dev_addr);
 
 hash_del:
-	macvlan_hash_del(vlan);
+	macvlan_hash_del(vlan, !dev->dismantle);
 	return 0;
 }
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a134d809125b..ca333e79e10f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1293,7 +1293,9 @@ struct net_device {
 	       NETREG_UNREGISTERED,	/* completed unregister todo */
 	       NETREG_RELEASED,		/* called free_netdev */
 	       NETREG_DUMMY,		/* dummy device for NAPI poll */
-	} reg_state:16;
+	} reg_state:8;
+
+	bool dismantle; /* device is going do be freed */
 
 	enum {
 		RTNL_LINK_INITIALIZED,
diff --git a/net/core/dev.c b/net/core/dev.c
index 155de2094e71..d94537914a71 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5126,7 +5126,7 @@ static void rollback_registered_many(struct list_head *head)
 			list_del(&dev->unreg_list);
 			continue;
 		}
-
+		dev->dismantle = true;
 		BUG_ON(dev->reg_state != NETREG_REGISTERED);
 	}
 
-- 
cgit v1.2.3-71-gd317


From 1399fa7807a1a5998bbf147e80668e9950661dfa Mon Sep 17 00:00:00 2001
From: Nikhil Rao <ncrao@google.com>
Date: Wed, 18 May 2011 10:09:39 -0700
Subject: sched: Introduce SCHED_POWER_SCALE to scale cpu_power calculations

SCHED_LOAD_SCALE is used to increase nice resolution and to
scale cpu_power calculations in the scheduler. This patch
introduces SCHED_POWER_SCALE and converts all uses of
SCHED_LOAD_SCALE for scaling cpu_power to use SCHED_POWER_SCALE
instead.

This is a preparatory patch for increasing the resolution of
SCHED_LOAD_SCALE, and there is no need to increase resolution
for cpu_power calculations.

Signed-off-by: Nikhil Rao <ncrao@google.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Nikunj A. Dadhania <nikunj@linux.vnet.ibm.com>
Cc: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Cc: Stephan Barwolf <stephan.baerwolf@tu-ilmenau.de>
Cc: Mike Galbraith <efault@gmx.de>
Link: http://lkml.kernel.org/r/1305738580-9924-3-git-send-email-ncrao@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 13 ++++++++-----
 kernel/sched.c        |  4 ++--
 kernel/sched_fair.c   | 52 ++++++++++++++++++++++++++-------------------------
 3 files changed, 37 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 12211e1666e2..f2f440221b70 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -787,18 +787,21 @@ enum cpu_idle_type {
 	CPU_MAX_IDLE_TYPES
 };
 
-/*
- * sched-domains (multiprocessor balancing) declarations:
- */
-
 /*
  * Increase resolution of nice-level calculations:
  */
 #define SCHED_LOAD_SHIFT	10
 #define SCHED_LOAD_SCALE	(1L << SCHED_LOAD_SHIFT)
 
-#define SCHED_LOAD_SCALE_FUZZ	SCHED_LOAD_SCALE
+/*
+ * Increase resolution of cpu_power calculations
+ */
+#define SCHED_POWER_SHIFT	10
+#define SCHED_POWER_SCALE	(1L << SCHED_POWER_SHIFT)
 
+/*
+ * sched-domains (multiprocessor balancing) declarations:
+ */
 #ifdef CONFIG_SMP
 #define SD_LOAD_BALANCE		0x0001	/* Do load balancing on this domain. */
 #define SD_BALANCE_NEWIDLE	0x0002	/* Balance when about to become idle */
diff --git a/kernel/sched.c b/kernel/sched.c
index d036048b59a4..375e9c677d58 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6530,7 +6530,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 		cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
 
 		printk(KERN_CONT " %s", str);
-		if (group->cpu_power != SCHED_LOAD_SCALE) {
+		if (group->cpu_power != SCHED_POWER_SCALE) {
 			printk(KERN_CONT " (cpu_power = %d)",
 				group->cpu_power);
 		}
@@ -7905,7 +7905,7 @@ void __init sched_init(void)
 #ifdef CONFIG_SMP
 		rq->sd = NULL;
 		rq->rd = NULL;
-		rq->cpu_power = SCHED_LOAD_SCALE;
+		rq->cpu_power = SCHED_POWER_SCALE;
 		rq->post_schedule = 0;
 		rq->active_balance = 0;
 		rq->next_balance = jiffies;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 37f22626225e..e32a9b70ee9c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1584,7 +1584,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 		}
 
 		/* Adjust by relative CPU power of the group */
-		avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
+		avg_load = (avg_load * SCHED_POWER_SCALE) / group->cpu_power;
 
 		if (local_group) {
 			this_load = avg_load;
@@ -1722,7 +1722,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 				nr_running += cpu_rq(i)->cfs.nr_running;
 			}
 
-			capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
+			capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE);
 
 			if (tmp->flags & SD_POWERSAVINGS_BALANCE)
 				nr_running /= 2;
@@ -2570,7 +2570,7 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
 
 unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
 {
-	return SCHED_LOAD_SCALE;
+	return SCHED_POWER_SCALE;
 }
 
 unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
@@ -2607,10 +2607,10 @@ unsigned long scale_rt_power(int cpu)
 		available = total - rq->rt_avg;
 	}
 
-	if (unlikely((s64)total < SCHED_LOAD_SCALE))
-		total = SCHED_LOAD_SCALE;
+	if (unlikely((s64)total < SCHED_POWER_SCALE))
+		total = SCHED_POWER_SCALE;
 
-	total >>= SCHED_LOAD_SHIFT;
+	total >>= SCHED_POWER_SHIFT;
 
 	return div_u64(available, total);
 }
@@ -2618,7 +2618,7 @@ unsigned long scale_rt_power(int cpu)
 static void update_cpu_power(struct sched_domain *sd, int cpu)
 {
 	unsigned long weight = sd->span_weight;
-	unsigned long power = SCHED_LOAD_SCALE;
+	unsigned long power = SCHED_POWER_SCALE;
 	struct sched_group *sdg = sd->groups;
 
 	if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
@@ -2627,7 +2627,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
 		else
 			power *= default_scale_smt_power(sd, cpu);
 
-		power >>= SCHED_LOAD_SHIFT;
+		power >>= SCHED_POWER_SHIFT;
 	}
 
 	sdg->cpu_power_orig = power;
@@ -2637,10 +2637,10 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
 	else
 		power *= default_scale_freq_power(sd, cpu);
 
-	power >>= SCHED_LOAD_SHIFT;
+	power >>= SCHED_POWER_SHIFT;
 
 	power *= scale_rt_power(cpu);
-	power >>= SCHED_LOAD_SHIFT;
+	power >>= SCHED_POWER_SHIFT;
 
 	if (!power)
 		power = 1;
@@ -2682,7 +2682,7 @@ static inline int
 fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
 {
 	/*
-	 * Only siblings can have significantly less than SCHED_LOAD_SCALE
+	 * Only siblings can have significantly less than SCHED_POWER_SCALE
 	 */
 	if (!(sd->flags & SD_SHARE_CPUPOWER))
 		return 0;
@@ -2770,7 +2770,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
 	}
 
 	/* Adjust by relative CPU power of the group */
-	sgs->avg_load = (sgs->group_load * SCHED_LOAD_SCALE) / group->cpu_power;
+	sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->cpu_power;
 
 	/*
 	 * Consider the group unbalanced when the imbalance is larger
@@ -2787,7 +2787,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
 	if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
 		sgs->group_imb = 1;
 
-	sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
+	sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power,
+						SCHED_POWER_SCALE);
 	if (!sgs->group_capacity)
 		sgs->group_capacity = fix_small_capacity(sd, group);
 	sgs->group_weight = group->group_weight;
@@ -2961,7 +2962,7 @@ static int check_asym_packing(struct sched_domain *sd,
 		return 0;
 
 	*imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power,
-				       SCHED_LOAD_SCALE);
+				       SCHED_POWER_SCALE);
 	return 1;
 }
 
@@ -2990,7 +2991,7 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
 			cpu_avg_load_per_task(this_cpu);
 
 	scaled_busy_load_per_task = sds->busiest_load_per_task
-						 * SCHED_LOAD_SCALE;
+					 * SCHED_POWER_SCALE;
 	scaled_busy_load_per_task /= sds->busiest->cpu_power;
 
 	if (sds->max_load - sds->this_load + scaled_busy_load_per_task >=
@@ -3009,10 +3010,10 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
 			min(sds->busiest_load_per_task, sds->max_load);
 	pwr_now += sds->this->cpu_power *
 			min(sds->this_load_per_task, sds->this_load);
-	pwr_now /= SCHED_LOAD_SCALE;
+	pwr_now /= SCHED_POWER_SCALE;
 
 	/* Amount of load we'd subtract */
-	tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) /
+	tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
 		sds->busiest->cpu_power;
 	if (sds->max_load > tmp)
 		pwr_move += sds->busiest->cpu_power *
@@ -3020,15 +3021,15 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
 
 	/* Amount of load we'd add */
 	if (sds->max_load * sds->busiest->cpu_power <
-		sds->busiest_load_per_task * SCHED_LOAD_SCALE)
+		sds->busiest_load_per_task * SCHED_POWER_SCALE)
 		tmp = (sds->max_load * sds->busiest->cpu_power) /
 			sds->this->cpu_power;
 	else
-		tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) /
+		tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
 			sds->this->cpu_power;
 	pwr_move += sds->this->cpu_power *
 			min(sds->this_load_per_task, sds->this_load + tmp);
-	pwr_move /= SCHED_LOAD_SCALE;
+	pwr_move /= SCHED_POWER_SCALE;
 
 	/* Move if we gain throughput */
 	if (pwr_move > pwr_now)
@@ -3070,7 +3071,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
 		load_above_capacity = (sds->busiest_nr_running -
 						sds->busiest_group_capacity);
 
-		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_LOAD_SCALE);
+		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
 
 		load_above_capacity /= sds->busiest->cpu_power;
 	}
@@ -3090,7 +3091,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
 	/* How much load to actually move to equalise the imbalance */
 	*imbalance = min(max_pull * sds->busiest->cpu_power,
 		(sds->avg_load - sds->this_load) * sds->this->cpu_power)
-			/ SCHED_LOAD_SCALE;
+			/ SCHED_POWER_SCALE;
 
 	/*
 	 * if *imbalance is less than the average load per runnable task
@@ -3159,7 +3160,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 	if (!sds.busiest || sds.busiest_nr_running == 0)
 		goto out_balanced;
 
-	sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr;
+	sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_pwr;
 
 	/*
 	 * If the busiest group is imbalanced the below checks don't
@@ -3238,7 +3239,8 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
 
 	for_each_cpu(i, sched_group_cpus(group)) {
 		unsigned long power = power_of(i);
-		unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
+		unsigned long capacity = DIV_ROUND_CLOSEST(power,
+							   SCHED_POWER_SCALE);
 		unsigned long wl;
 
 		if (!capacity)
@@ -3263,7 +3265,7 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
 		 * the load can be moved away from the cpu that is potentially
 		 * running at a lower capacity.
 		 */
-		wl = (wl * SCHED_LOAD_SCALE) / power;
+		wl = (wl * SCHED_POWER_SCALE) / power;
 
 		if (wl > max_load) {
 			max_load = wl;
-- 
cgit v1.2.3-71-gd317


From c8b281161dfa4bb5d5be63fb036ce19347b88c63 Mon Sep 17 00:00:00 2001
From: Nikhil Rao <ncrao@google.com>
Date: Wed, 18 May 2011 14:37:48 -0700
Subject: sched: Increase SCHED_LOAD_SCALE resolution

Introduce SCHED_LOAD_RESOLUTION, which scales is added to
SCHED_LOAD_SHIFT and increases the resolution of
SCHED_LOAD_SCALE. This patch sets the value of
SCHED_LOAD_RESOLUTION to 10, scaling up the weights for all
sched entities by a factor of 1024. With this extra resolution,
we can handle deeper cgroup hiearchies and the scheduler can do
better shares distribution and load load balancing on larger
systems (especially for low weight task groups).

This does not change the existing user interface, the scaled
weights are only used internally. We do not modify
prio_to_weight values or inverses, but use the original weights
when calculating the inverse which is used to scale execution
time delta in calc_delta_mine(). This ensures we do not lose
accuracy when accounting time to the sched entities. Thanks to
Nikunj Dadhania for fixing an bug in c_d_m() that broken fairness.

Below is some analysis of the performance costs/improvements of
this patch.

1. Micro-arch performance costs:

Experiment was to run Ingo's pipe_test_100k 200 times with the
task pinned to one cpu. I measured instruction, cycles and
stalled-cycles for the runs. See:

   http://thread.gmane.org/gmane.linux.kernel/1129232/focus=1129389

for more info.

-tip (baseline):

 Performance counter stats for '/root/load-scale/pipe-test-100k' (200 runs):

       964,991,769 instructions             #    0.82  insns per cycle
                                            #    0.33  stalled cycles per insn
                                            #    ( +-  0.05% )
     1,171,186,635 cycles                   #    0.000 GHz                      ( +-  0.08% )
       306,373,664 stalled-cycles-backend   #   26.16% backend  cycles idle     ( +-  0.28% )
       314,933,621 stalled-cycles-frontend  #   26.89% frontend cycles idle     ( +-  0.34% )

        1.122405684  seconds time elapsed  ( +-  0.05% )

-tip+patches:

 Performance counter stats for './load-scale/pipe-test-100k' (200 runs):

       963,624,821 instructions             #    0.82  insns per cycle
                                            #    0.33  stalled cycles per insn
                                            #    ( +-  0.04% )
     1,175,215,649 cycles                   #    0.000 GHz                      ( +-  0.08% )
       315,321,126 stalled-cycles-backend   #   26.83% backend  cycles idle     ( +-  0.28% )
       316,835,873 stalled-cycles-frontend  #   26.96% frontend cycles idle     ( +-  0.29% )

        1.122238659  seconds time elapsed  ( +-  0.06% )

With this patch, instructions decrease by ~0.10% and cycles
increase by 0.27%. This doesn't look statistically significant.
The number of stalled cycles in the backend increased from
26.16% to 26.83%. This can be attributed to the shifts we do in
c_d_m() and other places. The fraction of stalled cycles in the
frontend remains about the same, at 26.96% compared to 26.89% in -tip.

2. Balancing low-weight task groups

Test setup: run 50 tasks with random sleep/busy times (biased
around 100ms) in a low weight container (with cpu.shares = 2).
Measure %idle as reported by mpstat over a 10s window.

-tip (baseline):

06:47:48 PM  CPU    %usr   %nice    %sys %iowait    %irq   %soft  %steal  %guest   %idle    intr/s
06:47:49 PM  all   94.32    0.00    0.06    0.00    0.00    0.00    0.00    0.00    5.62  15888.00
06:47:50 PM  all   94.57    0.00    0.62    0.00    0.00    0.00    0.00    0.00    4.81  16180.00
06:47:51 PM  all   94.69    0.00    0.06    0.00    0.00    0.00    0.00    0.00    5.25  15966.00
06:47:52 PM  all   95.81    0.00    0.00    0.00    0.00    0.00    0.00    0.00    4.19  16053.00
06:47:53 PM  all   94.88    0.06    0.00    0.00    0.00    0.00    0.00    0.00    5.06  15984.00
06:47:54 PM  all   93.31    0.00    0.00    0.00    0.00    0.00    0.00    0.00    6.69  15806.00
06:47:55 PM  all   94.19    0.00    0.06    0.00    0.00    0.00    0.00    0.00    5.75  15896.00
06:47:56 PM  all   92.87    0.00    0.00    0.00    0.00    0.00    0.00    0.00    7.13  15716.00
06:47:57 PM  all   94.88    0.00    0.00    0.00    0.00    0.00    0.00    0.00    5.12  15982.00
06:47:58 PM  all   95.44    0.00    0.00    0.00    0.00    0.00    0.00    0.00    4.56  16075.00
Average:     all   94.49    0.01    0.08    0.00    0.00    0.00    0.00    0.00    5.42  15954.60

-tip+patches:

06:47:03 PM  CPU    %usr   %nice    %sys %iowait    %irq   %soft  %steal  %guest   %idle    intr/s
06:47:04 PM  all  100.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00  16630.00
06:47:05 PM  all   99.69    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.31  16580.20
06:47:06 PM  all   99.69    0.00    0.06    0.00    0.00    0.00    0.00    0.00    0.25  16596.00
06:47:07 PM  all   99.20    0.00    0.74    0.00    0.00    0.06    0.00    0.00    0.00  17838.61
06:47:08 PM  all  100.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00  16540.00
06:47:09 PM  all  100.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00  16575.00
06:47:10 PM  all  100.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00  16614.00
06:47:11 PM  all   99.94    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.06  16588.00
06:47:12 PM  all   99.94    0.00    0.06    0.00    0.00    0.00    0.00    0.00    0.00  16593.00
06:47:13 PM  all   99.94    0.00    0.06    0.00    0.00    0.00    0.00    0.00    0.00  16551.00
Average:     all   99.84    0.00    0.09    0.00    0.00    0.01    0.00    0.00    0.06  16711.58

We see an improvement in idle% on the system (drops from 5.42% on -tip to 0.06%
with the patches).

We see an improvement in idle% on the system (drops from 5.42%
on -tip to 0.06% with the patches).

Signed-off-by: Nikhil Rao <ncrao@google.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Nikunj A. Dadhania <nikunj@linux.vnet.ibm.com>
Cc: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Cc: Stephan Barwolf <stephan.baerwolf@tu-ilmenau.de>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/1305754668-18792-1-git-send-email-ncrao@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 23 +++++++++++++++++++++--
 kernel/sched.c        | 28 ++++++++++++++++++++--------
 2 files changed, 41 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f2f440221b70..c34a718e20dd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -788,9 +788,28 @@ enum cpu_idle_type {
 };
 
 /*
- * Increase resolution of nice-level calculations:
+ * Increase resolution of nice-level calculations for 64-bit architectures.
+ * The extra resolution improves shares distribution and load balancing of
+ * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
+ * hierarchies, especially on larger systems. This is not a user-visible change
+ * and does not change the user-interface for setting shares/weights.
+ *
+ * We increase resolution only if we have enough bits to allow this increased
+ * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution
+ * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the
+ * increased costs.
  */
-#define SCHED_LOAD_SHIFT	10
+#if BITS_PER_LONG > 32
+# define SCHED_LOAD_RESOLUTION	10
+# define scale_load(w)		((w) << SCHED_LOAD_RESOLUTION)
+# define scale_load_down(w)	((w) >> SCHED_LOAD_RESOLUTION)
+#else
+# define SCHED_LOAD_RESOLUTION	0
+# define scale_load(w)		(w)
+# define scale_load_down(w)	(w)
+#endif
+
+#define SCHED_LOAD_SHIFT	(10 + SCHED_LOAD_RESOLUTION)
 #define SCHED_LOAD_SCALE	(1L << SCHED_LOAD_SHIFT)
 
 /*
diff --git a/kernel/sched.c b/kernel/sched.c
index 375e9c677d58..bb504e1839e5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -293,7 +293,7 @@ static DEFINE_SPINLOCK(task_group_lock);
  *  limitation from this.)
  */
 #define MIN_SHARES	2
-#define MAX_SHARES	(1UL << 18)
+#define MAX_SHARES	(1UL << (18 + SCHED_LOAD_RESOLUTION))
 
 static int root_task_group_load = ROOT_TASK_GROUP_LOAD;
 #endif
@@ -1330,13 +1330,25 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
 {
 	u64 tmp;
 
-	tmp = (u64)delta_exec * weight;
+	/*
+	 * weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched
+	 * entities since MIN_SHARES = 2. Treat weight as 1 if less than
+	 * 2^SCHED_LOAD_RESOLUTION.
+	 */
+	if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION)))
+		tmp = (u64)delta_exec * scale_load_down(weight);
+	else
+		tmp = (u64)delta_exec;
 
 	if (!lw->inv_weight) {
-		if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST))
+		unsigned long w = scale_load_down(lw->weight);
+
+		if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST))
 			lw->inv_weight = 1;
+		else if (unlikely(!w))
+			lw->inv_weight = WMULT_CONST;
 		else
-			lw->inv_weight = WMULT_CONST / lw->weight;
+			lw->inv_weight = WMULT_CONST / w;
 	}
 
 	/*
@@ -1785,12 +1797,12 @@ static void set_load_weight(struct task_struct *p)
 	 * SCHED_IDLE tasks get minimal weight:
 	 */
 	if (p->policy == SCHED_IDLE) {
-		load->weight = WEIGHT_IDLEPRIO;
+		load->weight = scale_load(WEIGHT_IDLEPRIO);
 		load->inv_weight = WMULT_IDLEPRIO;
 		return;
 	}
 
-	load->weight = prio_to_weight[prio];
+	load->weight = scale_load(prio_to_weight[prio]);
 	load->inv_weight = prio_to_wmult[prio];
 }
 
@@ -8809,14 +8821,14 @@ cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
 static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype,
 				u64 shareval)
 {
-	return sched_group_set_shares(cgroup_tg(cgrp), shareval);
+	return sched_group_set_shares(cgroup_tg(cgrp), scale_load(shareval));
 }
 
 static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
 {
 	struct task_group *tg = cgroup_tg(cgrp);
 
-	return (u64) tg->shares;
+	return (u64) scale_load_down(tg->shares);
 }
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
-- 
cgit v1.2.3-71-gd317


From 8bb36c2139f7bcea32a78472272f1d0de3b00f7b Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Thu, 7 Apr 2011 12:45:51 -0300
Subject: [media] Add Y10B, a 10 bpp bit-packed greyscale format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a 10 bits per pixel greyscale format in a bit-packed array representation,
naming it Y10B. Such pixel format is supplied for instance by the Kinect
sensor device.

Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Signed-off-by: Jean-François Moine <moinejf@free.fr>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media-entities.tmpl |  1 +
 Documentation/DocBook/v4l/pixfmt-y10b.xml | 43 +++++++++++++++++++++++++++++++
 Documentation/DocBook/v4l/pixfmt.xml      |  1 +
 Documentation/DocBook/v4l/videodev2.h.xml |  3 +++
 include/linux/videodev2.h                 |  3 +++
 5 files changed, 51 insertions(+)
 create mode 100644 Documentation/DocBook/v4l/pixfmt-y10b.xml

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media-entities.tmpl b/Documentation/DocBook/media-entities.tmpl
index fea63b45471a..7a9570887d21 100644
--- a/Documentation/DocBook/media-entities.tmpl
+++ b/Documentation/DocBook/media-entities.tmpl
@@ -295,6 +295,7 @@
 <!ENTITY sub-srggb8 SYSTEM "v4l/pixfmt-srggb8.xml">
 <!ENTITY sub-y10 SYSTEM "v4l/pixfmt-y10.xml">
 <!ENTITY sub-y12 SYSTEM "v4l/pixfmt-y12.xml">
+<!ENTITY sub-y10b SYSTEM "v4l/pixfmt-y10b.xml">
 <!ENTITY sub-pixfmt SYSTEM "v4l/pixfmt.xml">
 <!ENTITY sub-cropcap SYSTEM "v4l/vidioc-cropcap.xml">
 <!ENTITY sub-dbg-g-register SYSTEM "v4l/vidioc-dbg-g-register.xml">
diff --git a/Documentation/DocBook/v4l/pixfmt-y10b.xml b/Documentation/DocBook/v4l/pixfmt-y10b.xml
new file mode 100644
index 000000000000..adb0ad808c93
--- /dev/null
+++ b/Documentation/DocBook/v4l/pixfmt-y10b.xml
@@ -0,0 +1,43 @@
+<refentry id="V4L2-PIX-FMT-Y10BPACK">
+  <refmeta>
+    <refentrytitle>V4L2_PIX_FMT_Y10BPACK ('Y10B')</refentrytitle>
+    &manvol;
+  </refmeta>
+  <refnamediv>
+    <refname><constant>V4L2_PIX_FMT_Y10BPACK</constant></refname>
+    <refpurpose>Grey-scale image as a bit-packed array</refpurpose>
+  </refnamediv>
+  <refsect1>
+    <title>Description</title>
+
+    <para>This is a packed grey-scale image format with a depth of 10 bits per
+      pixel. Pixels are stored in a bit-packed array of 10bit bits per pixel,
+      with no padding between them and with the most significant bits coming
+      first from the left.</para>
+
+    <example>
+      <title><constant>V4L2_PIX_FMT_Y10BPACK</constant> 4 pixel data stream taking 5 bytes</title>
+
+      <formalpara>
+	<title>Bit-packed representation</title>
+	<para>pixels cross the byte boundary and have a ratio of 5 bytes for each 4
+          pixels.
+	  <informaltable frame="all">
+	    <tgroup cols="5" align="center">
+	      <colspec align="left" colwidth="2*" />
+	      <tbody valign="top">
+		<row>
+		  <entry>Y'<subscript>00[9:2]</subscript></entry>
+		  <entry>Y'<subscript>00[1:0]</subscript>Y'<subscript>01[9:4]</subscript></entry>
+		  <entry>Y'<subscript>01[3:0]</subscript>Y'<subscript>02[9:6]</subscript></entry>
+		  <entry>Y'<subscript>02[5:0]</subscript>Y'<subscript>03[9:8]</subscript></entry>
+		  <entry>Y'<subscript>03[7:0]</subscript></entry>
+		</row>
+	      </tbody>
+	    </tgroup>
+	  </informaltable>
+	</para>
+      </formalpara>
+    </example>
+  </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/v4l/pixfmt.xml b/Documentation/DocBook/v4l/pixfmt.xml
index 40af4beb48b9..3486a068fe46 100644
--- a/Documentation/DocBook/v4l/pixfmt.xml
+++ b/Documentation/DocBook/v4l/pixfmt.xml
@@ -697,6 +697,7 @@ information.</para>
     &sub-grey;
     &sub-y10;
     &sub-y12;
+    &sub-y10b;
     &sub-y16;
     &sub-yuyv;
     &sub-uyvy;
diff --git a/Documentation/DocBook/v4l/videodev2.h.xml b/Documentation/DocBook/v4l/videodev2.h.xml
index 2b796a2ee98a..937acf54da9c 100644
--- a/Documentation/DocBook/v4l/videodev2.h.xml
+++ b/Documentation/DocBook/v4l/videodev2.h.xml
@@ -311,6 +311,9 @@ struct <link linkend="v4l2-pix-format">v4l2_pix_format</link> {
 #define <link linkend="V4L2-PIX-FMT-Y10">V4L2_PIX_FMT_Y10</link>     v4l2_fourcc('Y', '1', '0', ' ') /* 10  Greyscale     */
 #define <link linkend="V4L2-PIX-FMT-Y16">V4L2_PIX_FMT_Y16</link>     v4l2_fourcc('Y', '1', '6', ' ') /* 16  Greyscale     */
 
+/* Grey bit-packed formats */
+#define <link linkend="V4L2-PIX-FMT-Y10BPACK">V4L2_PIX_FMT_Y10BPACK</link>    v4l2_fourcc('Y', '1', '0', 'B') /* 10  Greyscale bit-packed */
+
 /* Palette formats */
 #define <link linkend="V4L2-PIX-FMT-PAL8">V4L2_PIX_FMT_PAL8</link>    v4l2_fourcc('P', 'A', 'L', '8') /*  8  8-bit palette */
 
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index be82c8ead1af..a417270b337f 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -311,6 +311,9 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_Y12     v4l2_fourcc('Y', '1', '2', ' ') /* 12  Greyscale     */
 #define V4L2_PIX_FMT_Y16     v4l2_fourcc('Y', '1', '6', ' ') /* 16  Greyscale     */
 
+/* Grey bit-packed formats */
+#define V4L2_PIX_FMT_Y10BPACK    v4l2_fourcc('Y', '1', '0', 'B') /* 10  Greyscale bit-packed */
+
 /* Palette formats */
 #define V4L2_PIX_FMT_PAL8    v4l2_fourcc('P', 'A', 'L', '8') /*  8  8-bit palette */
 
-- 
cgit v1.2.3-71-gd317


From 5f7088127e800df2cd5ff08140bdca087ab0fbac Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Sun, 21 Nov 2010 17:15:44 -0300
Subject: [media] uvcvideo: Make the API public

Move the public API definitions to include/linux/uvcvideo.h and bump the
version number to 1.1.0. Compatibility with the old API is kept,
application can still be compiled against the private header and will
not break.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/ioctl/ioctl-number.txt |  2 +-
 drivers/media/video/uvc/uvc_v4l2.c   | 13 +++----
 drivers/media/video/uvc/uvcvideo.h   | 68 ++++++++++++++++-------------------
 include/linux/Kbuild                 |  1 +
 include/linux/uvcvideo.h             | 69 ++++++++++++++++++++++++++++++++++++
 5 files changed, 108 insertions(+), 45 deletions(-)
 create mode 100644 include/linux/uvcvideo.h

(limited to 'include/linux')

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index a0a5d82b6b0b..2d1ad12e2b3e 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -166,7 +166,6 @@ Code  Seq#(hex)	Include File		Comments
 'T'	all	arch/x86/include/asm/ioctls.h	conflict!
 'T'	C0-DF	linux/if_tun.h		conflict!
 'U'	all	sound/asound.h		conflict!
-'U'	00-0F	drivers/media/video/uvc/uvcvideo.h	conflict!
 'U'	00-CF	linux/uinput.h		conflict!
 'U'	00-EF	linux/usbdevice_fs.h
 'U'	C0-CF	drivers/bluetooth/hci_uart.h
@@ -259,6 +258,7 @@ Code  Seq#(hex)	Include File		Comments
 't'	80-8F	linux/isdn_ppp.h
 't'	90	linux/toshiba.h
 'u'	00-1F	linux/smb_fs.h		gone
+'u'	20-3F	linux/uvcvideo.h	USB video class host driver
 'v'	00-1F	linux/ext2_fs.h		conflict!
 'v'	00-1F	linux/fs.h		conflict!
 'v'	00-0F	linux/sonypi.h		conflict!
diff --git a/drivers/media/video/uvc/uvc_v4l2.c b/drivers/media/video/uvc/uvc_v4l2.c
index 2e2a556d1666..6e8aad44c4bd 100644
--- a/drivers/media/video/uvc/uvc_v4l2.c
+++ b/drivers/media/video/uvc/uvc_v4l2.c
@@ -1036,24 +1036,25 @@ static long uvc_v4l2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	 * UVCIOC_CTRL_GET and UVCIOC_CTRL_SET are deprecated and scheduled for
 	 * removal in 2.6.42.
 	 */
-	case UVCIOC_CTRL_ADD:
+	case __UVCIOC_CTRL_ADD:
 		uvc_v4l2_ioctl_warn();
 		return -EEXIST;
 
-	case UVCIOC_CTRL_MAP_OLD:
+	case __UVCIOC_CTRL_MAP_OLD:
 		uvc_v4l2_ioctl_warn();
+	case __UVCIOC_CTRL_MAP:
 	case UVCIOC_CTRL_MAP:
 		return uvc_ioctl_ctrl_map(chain, arg,
-					  cmd == UVCIOC_CTRL_MAP_OLD);
+					  cmd == __UVCIOC_CTRL_MAP_OLD);
 
-	case UVCIOC_CTRL_GET:
-	case UVCIOC_CTRL_SET:
+	case __UVCIOC_CTRL_GET:
+	case __UVCIOC_CTRL_SET:
 	{
 		struct uvc_xu_control *xctrl = arg;
 		struct uvc_xu_control_query xqry = {
 			.unit		= xctrl->unit,
 			.selector	= xctrl->selector,
-			.query		= cmd == UVCIOC_CTRL_GET
+			.query		= cmd == __UVCIOC_CTRL_GET
 					? UVC_GET_CUR : UVC_SET_CUR,
 			.size		= xctrl->size,
 			.data		= xctrl->data,
diff --git a/drivers/media/video/uvc/uvcvideo.h b/drivers/media/video/uvc/uvcvideo.h
index cd58ea81320b..38dd4e18a2ca 100644
--- a/drivers/media/video/uvc/uvcvideo.h
+++ b/drivers/media/video/uvc/uvcvideo.h
@@ -4,6 +4,14 @@
 #include <linux/kernel.h>
 #include <linux/videodev2.h>
 
+#ifndef __KERNEL__
+/*
+ * This header provides binary compatibility with applications using the private
+ * uvcvideo API. This API is deprecated and will be removed in 2.6.42.
+ * Applications should be recompiled against the public linux/uvcvideo.h header.
+ */
+#warn "The uvcvideo.h header is deprecated, use linux/uvcvideo.h instead."
+
 /*
  * Dynamic controls
  */
@@ -17,23 +25,6 @@
 #define UVC_CTRL_DATA_TYPE_BITMASK	5
 
 /* Control flags */
-#define UVC_CTRL_FLAG_SET_CUR		(1 << 0)
-#define UVC_CTRL_FLAG_GET_CUR		(1 << 1)
-#define UVC_CTRL_FLAG_GET_MIN		(1 << 2)
-#define UVC_CTRL_FLAG_GET_MAX		(1 << 3)
-#define UVC_CTRL_FLAG_GET_RES		(1 << 4)
-#define UVC_CTRL_FLAG_GET_DEF		(1 << 5)
-/* Control should be saved at suspend and restored at resume. */
-#define UVC_CTRL_FLAG_RESTORE		(1 << 6)
-/* Control can be updated by the camera. */
-#define UVC_CTRL_FLAG_AUTO_UPDATE	(1 << 7)
-
-#define UVC_CTRL_FLAG_GET_RANGE \
-	(UVC_CTRL_FLAG_GET_CUR | UVC_CTRL_FLAG_GET_MIN | \
-	 UVC_CTRL_FLAG_GET_MAX | UVC_CTRL_FLAG_GET_RES | \
-	 UVC_CTRL_FLAG_GET_DEF)
-
-/* Old control flags, don't use */
 #define UVC_CONTROL_SET_CUR	(1 << 0)
 #define UVC_CONTROL_GET_CUR	(1 << 1)
 #define UVC_CONTROL_GET_MIN	(1 << 2)
@@ -47,23 +38,11 @@
 				 UVC_CONTROL_GET_MAX | UVC_CONTROL_GET_RES | \
 				 UVC_CONTROL_GET_DEF)
 
-struct uvc_xu_control_info {
-	__u8 entity[16];
-	__u8 index;
-	__u8 selector;
-	__u16 size;
-	__u32 flags;
-};
-
 struct uvc_menu_info {
 	__u32 value;
 	__u8 name[32];
 };
 
-struct uvc_xu_control_mapping_old {
-	__u8 reserved[64];
-};
-
 struct uvc_xu_control_mapping {
 	__u32 id;
 	__u8 name[32];
@@ -81,32 +60,46 @@ struct uvc_xu_control_mapping {
 	__u32 reserved[4];
 };
 
-struct uvc_xu_control {
-	__u8 unit;
+#endif
+
+struct uvc_xu_control_info {
+	__u8 entity[16];
+	__u8 index;
 	__u8 selector;
 	__u16 size;
-	__u8 __user *data;
+	__u32 flags;
 };
 
-struct uvc_xu_control_query {
+struct uvc_xu_control_mapping_old {
+	__u8 reserved[64];
+};
+
+struct uvc_xu_control {
 	__u8 unit;
 	__u8 selector;
-	__u8 query;
 	__u16 size;
 	__u8 __user *data;
 };
 
+#ifndef __KERNEL__
 #define UVCIOC_CTRL_ADD		_IOW('U', 1, struct uvc_xu_control_info)
 #define UVCIOC_CTRL_MAP_OLD	_IOWR('U', 2, struct uvc_xu_control_mapping_old)
 #define UVCIOC_CTRL_MAP		_IOWR('U', 2, struct uvc_xu_control_mapping)
 #define UVCIOC_CTRL_GET		_IOWR('U', 3, struct uvc_xu_control)
 #define UVCIOC_CTRL_SET		_IOW('U', 4, struct uvc_xu_control)
-#define UVCIOC_CTRL_QUERY	_IOWR('U', 5, struct uvc_xu_control_query)
+#else
+#define __UVCIOC_CTRL_ADD	_IOW('U', 1, struct uvc_xu_control_info)
+#define __UVCIOC_CTRL_MAP_OLD	_IOWR('U', 2, struct uvc_xu_control_mapping_old)
+#define __UVCIOC_CTRL_MAP	_IOWR('U', 2, struct uvc_xu_control_mapping)
+#define __UVCIOC_CTRL_GET	_IOWR('U', 3, struct uvc_xu_control)
+#define __UVCIOC_CTRL_SET	_IOW('U', 4, struct uvc_xu_control)
+#endif
 
 #ifdef __KERNEL__
 
 #include <linux/poll.h>
 #include <linux/usb/video.h>
+#include <linux/uvcvideo.h>
 
 /* --------------------------------------------------------------------------
  * UVC constants
@@ -184,8 +177,8 @@ struct uvc_xu_control_query {
  * Driver specific constants.
  */
 
-#define DRIVER_VERSION_NUMBER	KERNEL_VERSION(1, 0, 0)
-#define DRIVER_VERSION		"v1.0.0"
+#define DRIVER_VERSION_NUMBER	KERNEL_VERSION(1, 1, 0)
+#define DRIVER_VERSION		"v1.1.0"
 
 /* Number of isochronous URBs. */
 #define UVC_URBS		5
@@ -682,4 +675,3 @@ void uvc_video_decode_isight(struct urb *urb, struct uvc_streaming *stream,
 #endif /* __KERNEL__ */
 
 #endif
-
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 75cf611641e6..cb1ded2bd545 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -372,6 +372,7 @@ header-y += unistd.h
 header-y += usbdevice_fs.h
 header-y += utime.h
 header-y += utsname.h
+header-y += uvcvideo.h
 header-y += v4l2-mediabus.h
 header-y += v4l2-subdev.h
 header-y += veth.h
diff --git a/include/linux/uvcvideo.h b/include/linux/uvcvideo.h
new file mode 100644
index 000000000000..f46a53f060d7
--- /dev/null
+++ b/include/linux/uvcvideo.h
@@ -0,0 +1,69 @@
+#ifndef __LINUX_UVCVIDEO_H_
+#define __LINUX_UVCVIDEO_H_
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * Dynamic controls
+ */
+
+/* Data types for UVC control data */
+#define UVC_CTRL_DATA_TYPE_RAW		0
+#define UVC_CTRL_DATA_TYPE_SIGNED	1
+#define UVC_CTRL_DATA_TYPE_UNSIGNED	2
+#define UVC_CTRL_DATA_TYPE_BOOLEAN	3
+#define UVC_CTRL_DATA_TYPE_ENUM		4
+#define UVC_CTRL_DATA_TYPE_BITMASK	5
+
+/* Control flags */
+#define UVC_CTRL_FLAG_SET_CUR		(1 << 0)
+#define UVC_CTRL_FLAG_GET_CUR		(1 << 1)
+#define UVC_CTRL_FLAG_GET_MIN		(1 << 2)
+#define UVC_CTRL_FLAG_GET_MAX		(1 << 3)
+#define UVC_CTRL_FLAG_GET_RES		(1 << 4)
+#define UVC_CTRL_FLAG_GET_DEF		(1 << 5)
+/* Control should be saved at suspend and restored at resume. */
+#define UVC_CTRL_FLAG_RESTORE		(1 << 6)
+/* Control can be updated by the camera. */
+#define UVC_CTRL_FLAG_AUTO_UPDATE	(1 << 7)
+
+#define UVC_CTRL_FLAG_GET_RANGE \
+	(UVC_CTRL_FLAG_GET_CUR | UVC_CTRL_FLAG_GET_MIN | \
+	 UVC_CTRL_FLAG_GET_MAX | UVC_CTRL_FLAG_GET_RES | \
+	 UVC_CTRL_FLAG_GET_DEF)
+
+struct uvc_menu_info {
+	__u32 value;
+	__u8 name[32];
+};
+
+struct uvc_xu_control_mapping {
+	__u32 id;
+	__u8 name[32];
+	__u8 entity[16];
+	__u8 selector;
+
+	__u8 size;
+	__u8 offset;
+	__u32 v4l2_type;
+	__u32 data_type;
+
+	struct uvc_menu_info __user *menu_info;
+	__u32 menu_count;
+
+	__u32 reserved[4];
+};
+
+struct uvc_xu_control_query {
+	__u8 unit;
+	__u8 selector;
+	__u8 query;
+	__u16 size;
+	__u8 __user *data;
+};
+
+#define UVCIOC_CTRL_MAP		_IOWR('u', 0x20, struct uvc_xu_control_mapping)
+#define UVCIOC_CTRL_QUERY	_IOWR('u', 0x21, struct uvc_xu_control_query)
+
+#endif
-- 
cgit v1.2.3-71-gd317


From 16846524afc200e795aaae659356001780fa91db Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Fri, 25 Mar 2011 12:09:43 -0300
Subject: [media] v4l: Add V4L2_MBUS_FMT_JPEG_1X8 media bus format

Add V4L2_MBUS_FMT_JPEG_1X8 format and the corresponding Docbook
documentation.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/v4l/subdev-formats.xml | 46 ++++++++++++++++++++++++++++
 include/linux/v4l2-mediabus.h                |  3 ++
 2 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/v4l/subdev-formats.xml b/Documentation/DocBook/v4l/subdev-formats.xml
index d7ccd25edcc1..a26b10c07857 100644
--- a/Documentation/DocBook/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/v4l/subdev-formats.xml
@@ -2522,5 +2522,51 @@
 	</tgroup>
       </table>
     </section>
+
+    <section>
+      <title>JPEG Compressed Formats</title>
+
+      <para>Those data formats consist of an ordered sequence of 8-bit bytes
+	obtained from JPEG compression process. Additionally to the
+	<constant>_JPEG</constant> prefix the format code is made of
+	the following information.
+	<itemizedlist>
+	  <listitem>The number of bus samples per entropy encoded byte.</listitem>
+	  <listitem>The bus width.</listitem>
+	</itemizedlist>
+
+	<para>For instance, for a JPEG baseline process and an 8-bit bus width
+	  the format will be named <constant>V4L2_MBUS_FMT_JPEG_1X8</constant>.
+	</para>
+      </para>
+
+      <para>The following table lists existing JPEG compressed formats.</para>
+
+      <table pgwide="0" frame="none" id="v4l2-mbus-pixelcode-jpeg">
+	<title>JPEG Formats</title>
+	<tgroup cols="3">
+	  <colspec colname="id" align="left" />
+	  <colspec colname="code" align="left"/>
+	  <colspec colname="remarks" align="left"/>
+	  <thead>
+	    <row>
+	      <entry>Identifier</entry>
+	      <entry>Code</entry>
+	      <entry>Remarks</entry>
+	    </row>
+	  </thead>
+	  <tbody valign="top">
+	    <row id="V4L2-MBUS-FMT-JPEG-1X8">
+	      <entry>V4L2_MBUS_FMT_JPEG_1X8</entry>
+	      <entry>0x4001</entry>
+	      <entry>Besides of its usage for the parallel bus this format is
+		recommended for transmission of JPEG data over MIPI CSI bus
+		using the User Defined 8-bit Data types.
+	      </entry>
+	    </row>
+	  </tbody>
+	</tgroup>
+      </table>
+    </section>
   </section>
 </section>
diff --git a/include/linux/v4l2-mediabus.h b/include/linux/v4l2-mediabus.h
index de5c15921025..5ea7f753a348 100644
--- a/include/linux/v4l2-mediabus.h
+++ b/include/linux/v4l2-mediabus.h
@@ -89,6 +89,9 @@ enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_SGBRG12_1X12 = 0x3010,
 	V4L2_MBUS_FMT_SGRBG12_1X12 = 0x3011,
 	V4L2_MBUS_FMT_SRGGB12_1X12 = 0x3012,
+
+	/* JPEG compressed formats - next is 0x4002 */
+	V4L2_MBUS_FMT_JPEG_1X8 = 0x4001,
 };
 
 /**
-- 
cgit v1.2.3-71-gd317


From 0e59fd0592430cee4595c600427899a3404861e7 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 11 May 2011 09:56:20 -0300
Subject: [media] v4l: Add M420 format definition

M420 is a hybrid YUV 4:2:0 packet/planar format. Two Y lines are
followed by an interleaved U/V line.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
[laurent.pinchart@ideasonboard.com: split into v4l/uvcvideo patches]
[laurent.pinchart@ideasonboard.com: add documentation]
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media-entities.tmpl |   1 +
 Documentation/DocBook/v4l/pixfmt-m420.xml | 147 ++++++++++++++++++++++++++++++
 Documentation/DocBook/v4l/pixfmt.xml      |   1 +
 Documentation/DocBook/v4l/videodev2.h.xml |   1 +
 include/linux/videodev2.h                 |   1 +
 5 files changed, 151 insertions(+)
 create mode 100644 Documentation/DocBook/v4l/pixfmt-m420.xml

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media-entities.tmpl b/Documentation/DocBook/media-entities.tmpl
index 7a9570887d21..c8abb23ef1e7 100644
--- a/Documentation/DocBook/media-entities.tmpl
+++ b/Documentation/DocBook/media-entities.tmpl
@@ -270,6 +270,7 @@
 <!ENTITY sub-write SYSTEM "v4l/func-write.xml">
 <!ENTITY sub-io SYSTEM "v4l/io.xml">
 <!ENTITY sub-grey SYSTEM "v4l/pixfmt-grey.xml">
+<!ENTITY sub-m420 SYSTEM "v4l/pixfmt-m420.xml">
 <!ENTITY sub-nv12 SYSTEM "v4l/pixfmt-nv12.xml">
 <!ENTITY sub-nv12m SYSTEM "v4l/pixfmt-nv12m.xml">
 <!ENTITY sub-nv12mt SYSTEM "v4l/pixfmt-nv12mt.xml">
diff --git a/Documentation/DocBook/v4l/pixfmt-m420.xml b/Documentation/DocBook/v4l/pixfmt-m420.xml
new file mode 100644
index 000000000000..ce4bc019e5c0
--- /dev/null
+++ b/Documentation/DocBook/v4l/pixfmt-m420.xml
@@ -0,0 +1,147 @@
+    <refentry id="V4L2-PIX-FMT-M420">
+      <refmeta>
+	<refentrytitle>V4L2_PIX_FMT_M420 ('M420')</refentrytitle>
+	&manvol;
+      </refmeta>
+      <refnamediv>
+	<refname><constant>V4L2_PIX_FMT_M420</constant></refname>
+	<refpurpose>Format with &frac12; horizontal and vertical chroma
+	resolution, also known as YUV 4:2:0. Hybrid plane line-interleaved
+	layout.</refpurpose>
+      </refnamediv>
+      <refsect1>
+	<title>Description</title>
+
+	<para>M420 is a YUV format with &frac12; horizontal and vertical chroma
+	subsampling (YUV 4:2:0). Pixels are organized as interleaved luma and
+	chroma planes. Two lines of luma data are followed by one line of chroma
+	data.</para>
+	<para>The luma plane has one byte per pixel. The chroma plane contains
+	interleaved CbCr pixels subsampled by &frac12; in the horizontal and
+	vertical directions. Each CbCr pair belongs to four pixels. For example,
+Cb<subscript>0</subscript>/Cr<subscript>0</subscript> belongs to
+Y'<subscript>00</subscript>, Y'<subscript>01</subscript>,
+Y'<subscript>10</subscript>, Y'<subscript>11</subscript>.</para>
+
+	<para>All line lengths are identical: if the Y lines include pad bytes
+	so do the CbCr lines.</para>
+
+	<example>
+	  <title><constant>V4L2_PIX_FMT_M420</constant> 4 &times; 4
+pixel image</title>
+
+	  <formalpara>
+	    <title>Byte Order.</title>
+	    <para>Each cell is one byte.
+		<informaltable frame="none">
+		<tgroup cols="5" align="center">
+		  <colspec align="left" colwidth="2*" />
+		  <tbody valign="top">
+		    <row>
+		      <entry>start&nbsp;+&nbsp;0:</entry>
+		      <entry>Y'<subscript>00</subscript></entry>
+		      <entry>Y'<subscript>01</subscript></entry>
+		      <entry>Y'<subscript>02</subscript></entry>
+		      <entry>Y'<subscript>03</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;4:</entry>
+		      <entry>Y'<subscript>10</subscript></entry>
+		      <entry>Y'<subscript>11</subscript></entry>
+		      <entry>Y'<subscript>12</subscript></entry>
+		      <entry>Y'<subscript>13</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;8:</entry>
+		      <entry>Cb<subscript>00</subscript></entry>
+		      <entry>Cr<subscript>00</subscript></entry>
+		      <entry>Cb<subscript>01</subscript></entry>
+		      <entry>Cr<subscript>01</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;16:</entry>
+		      <entry>Y'<subscript>20</subscript></entry>
+		      <entry>Y'<subscript>21</subscript></entry>
+		      <entry>Y'<subscript>22</subscript></entry>
+		      <entry>Y'<subscript>23</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;20:</entry>
+		      <entry>Y'<subscript>30</subscript></entry>
+		      <entry>Y'<subscript>31</subscript></entry>
+		      <entry>Y'<subscript>32</subscript></entry>
+		      <entry>Y'<subscript>33</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;24:</entry>
+		      <entry>Cb<subscript>10</subscript></entry>
+		      <entry>Cr<subscript>10</subscript></entry>
+		      <entry>Cb<subscript>11</subscript></entry>
+		      <entry>Cr<subscript>11</subscript></entry>
+		    </row>
+		  </tbody>
+		</tgroup>
+		</informaltable>
+	      </para>
+	  </formalpara>
+
+	  <formalpara>
+	    <title>Color Sample Location.</title>
+	    <para>
+		<informaltable frame="none">
+		<tgroup cols="7" align="center">
+		  <tbody valign="top">
+		    <row>
+		      <entry></entry>
+		      <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+		      <entry>2</entry><entry></entry><entry>3</entry>
+		    </row>
+		    <row>
+		      <entry>0</entry>
+		      <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+		      <entry>Y</entry><entry></entry><entry>Y</entry>
+		    </row>
+		    <row>
+		      <entry></entry>
+		      <entry></entry><entry>C</entry><entry></entry><entry></entry>
+		      <entry></entry><entry>C</entry><entry></entry>
+		    </row>
+		    <row>
+		      <entry>1</entry>
+		      <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+		      <entry>Y</entry><entry></entry><entry>Y</entry>
+		    </row>
+		    <row>
+		      <entry></entry>
+		    </row>
+		    <row>
+		      <entry>2</entry>
+		      <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+		      <entry>Y</entry><entry></entry><entry>Y</entry>
+		    </row>
+		    <row>
+		      <entry></entry>
+		      <entry></entry><entry>C</entry><entry></entry><entry></entry>
+		      <entry></entry><entry>C</entry><entry></entry>
+		    </row>
+		    <row>
+		      <entry>3</entry>
+		      <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+		      <entry>Y</entry><entry></entry><entry>Y</entry>
+		    </row>
+		  </tbody>
+		</tgroup>
+		</informaltable>
+	      </para>
+	  </formalpara>
+	</example>
+      </refsect1>
+    </refentry>
+
+  <!--
+Local Variables:
+mode: sgml
+sgml-parent-document: "pixfmt.sgml"
+indent-tabs-mode: nil
+End:
+  -->
diff --git a/Documentation/DocBook/v4l/pixfmt.xml b/Documentation/DocBook/v4l/pixfmt.xml
index 3486a068fe46..dbfe3b08435f 100644
--- a/Documentation/DocBook/v4l/pixfmt.xml
+++ b/Documentation/DocBook/v4l/pixfmt.xml
@@ -713,6 +713,7 @@ information.</para>
     &sub-nv12m;
     &sub-nv12mt;
     &sub-nv16;
+    &sub-m420;
   </section>
 
   <section>
diff --git a/Documentation/DocBook/v4l/videodev2.h.xml b/Documentation/DocBook/v4l/videodev2.h.xml
index 937acf54da9c..c50536a4f596 100644
--- a/Documentation/DocBook/v4l/videodev2.h.xml
+++ b/Documentation/DocBook/v4l/videodev2.h.xml
@@ -336,6 +336,7 @@ struct <link linkend="v4l2-pix-format">v4l2_pix_format</link> {
 #define <link linkend="V4L2-PIX-FMT-YUV420">V4L2_PIX_FMT_YUV420</link>  v4l2_fourcc('Y', 'U', '1', '2') /* 12  YUV 4:2:0     */
 #define <link linkend="V4L2-PIX-FMT-HI240">V4L2_PIX_FMT_HI240</link>   v4l2_fourcc('H', 'I', '2', '4') /*  8  8-bit color   */
 #define <link linkend="V4L2-PIX-FMT-HM12">V4L2_PIX_FMT_HM12</link>    v4l2_fourcc('H', 'M', '1', '2') /*  8  YUV 4:2:0 16x16 macroblocks */
+#define <link linkend="V4L2-PIX-FMT-M420">V4L2_PIX_FMT_M420</link>    v4l2_fourcc('M', '4', '2', '0') /* 12  YUV 4:2:0 2 lines y, 1 line uv interleaved */
 
 /* two planes -- one Y, one Cr + Cb interleaved  */
 #define <link linkend="V4L2-PIX-FMT-NV12">V4L2_PIX_FMT_NV12</link>    v4l2_fourcc('N', 'V', '1', '2') /* 12  Y/CbCr 4:2:0  */
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index a417270b337f..8a4c309d2344 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -336,6 +336,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_YUV420  v4l2_fourcc('Y', 'U', '1', '2') /* 12  YUV 4:2:0     */
 #define V4L2_PIX_FMT_HI240   v4l2_fourcc('H', 'I', '2', '4') /*  8  8-bit color   */
 #define V4L2_PIX_FMT_HM12    v4l2_fourcc('H', 'M', '1', '2') /*  8  YUV 4:2:0 16x16 macroblocks */
+#define V4L2_PIX_FMT_M420    v4l2_fourcc('M', '4', '2', '0') /* 12  YUV 4:2:0 2 lines y, 1 line uv interleaved */
 
 /* two planes -- one Y, one Cr + Cb interleaved  */
 #define V4L2_PIX_FMT_NV12    v4l2_fourcc('N', 'V', '1', '2') /* 12  Y/CbCr 4:2:0  */
-- 
cgit v1.2.3-71-gd317


From d974d905cbfc1039a73ba0c7eea3f4d4e13c0624 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 20 May 2011 15:48:17 +1000
Subject: spinlock_up.h: include asm/processor.h in for cpu_relax

Commit e66eed651fd1 ("list: remove prefetching from regular list
iterators") removed the include of prefetch.h from list.h and this was a
path to including asm/processor.h.  We need to include it excplicitly
now.

Fixes this build error on sparc32 (at least):

  In file included from include/linux/seqlock.h:29,
                   from include/linux/time.h:8,
                   from include/linux/timex.h:56,
                   from include/linux/sched.h:57,
                   from arch/sparc/kernel/asm-offsets.c:13:
  include/linux/spinlock.h: In function 'spin_unlock_wait':
  include/linux/spinlock.h:360: error: implicit declaration of function 'cpu_relax

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/spinlock_up.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index b14f6a91e19f..a26e2fb604e6 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -5,6 +5,8 @@
 # error "please don't include this file directly"
 #endif
 
+#include <asm/processor.h>	/* for cpu_relax() */
+
 /*
  * include/linux/spinlock_up.h - UP-debug version of spinlocks.
  *
-- 
cgit v1.2.3-71-gd317


From b7281ca2a4b00044c60c25059f467d05772cdbe3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Wed, 18 May 2011 10:51:48 +0100
Subject: ARM: 6904/1: MTD: Add integrator-flash feature to physmap

In the process of moving platforms away from integrator-flash
(aka armflash), add to physmap the few features that make
armflash unique:

- optionnal probing for the AFS partition type
- init() and exit() methods, used by Integrator to control
  write access to the various onboard programmable components

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Acked-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/mtd/maps/physmap.c  | 16 +++++++++++++++-
 include/linux/mtd/physmap.h |  2 ++
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index 7522df4f71f1..49676b7a53a4 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -67,6 +67,10 @@ static int physmap_flash_remove(struct platform_device *dev)
 		if (info->mtd[i] != NULL)
 			map_destroy(info->mtd[i]);
 	}
+
+	if (physmap_data->exit)
+		physmap_data->exit(dev);
+
 	return 0;
 }
 
@@ -77,7 +81,11 @@ static const char *rom_probe_types[] = {
 					"map_rom",
 					NULL };
 #ifdef CONFIG_MTD_PARTITIONS
-static const char *part_probe_types[] = { "cmdlinepart", "RedBoot", NULL };
+static const char *part_probe_types[] = { "cmdlinepart", "RedBoot",
+#ifdef CONFIG_MTD_AFS_PARTS
+					  "afs",
+#endif
+					  NULL };
 #endif
 
 static int physmap_flash_probe(struct platform_device *dev)
@@ -100,6 +108,12 @@ static int physmap_flash_probe(struct platform_device *dev)
 		goto err_out;
 	}
 
+	if (physmap_data->init) {
+		err = physmap_data->init(dev);
+		if (err)
+			goto err_out;
+	}
+
 	platform_set_drvdata(dev, info);
 
 	for (i = 0; i < dev->num_resources; i++) {
diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h
index bcfd9f777454..d37cca05e62c 100644
--- a/include/linux/mtd/physmap.h
+++ b/include/linux/mtd/physmap.h
@@ -22,6 +22,8 @@ struct map_info;
 
 struct physmap_flash_data {
 	unsigned int		width;
+	int			(*init)(struct platform_device *);
+	void			(*exit)(struct platform_device *);
 	void			(*set_vpp)(struct map_info *, int);
 	unsigned int		nr_parts;
 	unsigned int		pfow_base;
-- 
cgit v1.2.3-71-gd317


From 667f390bee987d45351402e42008c52cdfb77d76 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Wed, 18 May 2011 10:51:55 +0100
Subject: ARM: 6910/1: MTD: physmap: let set_vpp() pass a platform_device
 instead of a map_info

The set_vpp() method provided by physmap passes a map_info back to
the platform code, which has little relevance as far as the platform
is concerned (this parameter is completely unused).

Instead, pass the platform_device, which can be used in the pismo
driver to retrieve some important information in a nicer way, instead
of the hack that was in place.

The empty set_vpp function in board-at572d940hf_ek.c is left untouched,
as the board/SoC is scheduled for removal.

Cc: Andrew Victor <linux@maxim.org.za>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Cc: Philipp Zabel <philipp.zabel@gmail.com>
Cc: Eric Miao <eric.y.miao@gmail.com>
Cc: Ben Dooks <ben-linux@fluff.org>
Acked-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Acked-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-integrator/integrator_ap.c |  2 +-
 arch/arm/mach-integrator/integrator_cp.c |  2 +-
 arch/arm/mach-omap1/flash.c              |  2 +-
 arch/arm/mach-pxa/hx4700.c               |  2 +-
 arch/arm/mach-pxa/magician.c             |  2 +-
 arch/arm/mach-realview/core.c            |  2 +-
 arch/arm/mach-s3c2410/nor-simtec.c       |  2 +-
 arch/arm/mach-versatile/core.c           |  2 +-
 arch/arm/mach-vexpress/v2m.c             |  2 +-
 arch/arm/plat-omap/include/plat/flash.h  |  2 +-
 drivers/mtd/maps/physmap.c               | 20 ++++++++++++----
 drivers/mtd/maps/pismo.c                 | 40 +++-----------------------------
 include/linux/mtd/physmap.h              |  2 +-
 13 files changed, 29 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index 11bd49e8b663..2aa98ee41b8d 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -263,7 +263,7 @@ static void ap_flash_exit(struct platform_device *dev)
 	}
 }
 
-static void ap_flash_set_vpp(struct map_info *map, int on)
+static void ap_flash_set_vpp(struct platform_device *pdev, int on)
 {
 	void __iomem *reg = on ? SC_CTRLS : SC_CTRLC;
 
diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c
index ec9628fe7109..b676b41d70e2 100644
--- a/arch/arm/mach-integrator/integrator_cp.c
+++ b/arch/arm/mach-integrator/integrator_cp.c
@@ -259,7 +259,7 @@ static void intcp_flash_exit(struct platform_device *dev)
 	writel(val, INTCP_VA_CTRL_BASE + INTCP_FLASHPROG);
 }
 
-static void intcp_flash_set_vpp(struct map_info *map, int on)
+static void intcp_flash_set_vpp(struct platform_device *pdev, int on)
 {
 	u32 val;
 
diff --git a/arch/arm/mach-omap1/flash.c b/arch/arm/mach-omap1/flash.c
index acd161666408..1749cb37dda0 100644
--- a/arch/arm/mach-omap1/flash.c
+++ b/arch/arm/mach-omap1/flash.c
@@ -13,7 +13,7 @@
 #include <plat/tc.h>
 #include <plat/flash.h>
 
-void omap1_set_vpp(struct map_info *map, int enable)
+void omap1_set_vpp(struct platform_device *pdev, int enable)
 {
 	static int count;
 	u32 l;
diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index 9cdcca597924..f941a495a4a8 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -735,7 +735,7 @@ static struct platform_device bq24022 = {
  * StrataFlash
  */
 
-static void hx4700_set_vpp(struct map_info *map, int vpp)
+static void hx4700_set_vpp(struct platform_device *pdev, int vpp)
 {
 	gpio_set_value(GPIO91_HX4700_FLASH_VPEN, vpp);
 }
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index 9984ef70bd79..e1920572948a 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -662,7 +662,7 @@ static struct pxaohci_platform_data magician_ohci_info = {
  * StrataFlash
  */
 
-static void magician_set_vpp(struct map_info *map, int vpp)
+static void magician_set_vpp(struct platform_device *pdev, int vpp)
 {
 	gpio_set_value(EGPIO_MAGICIAN_FLASH_VPP, vpp);
 }
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index d3f1dde70fc9..c8ec08886633 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -77,7 +77,7 @@ void __init realview_adjust_zones(unsigned long *size, unsigned long *hole)
 
 #define REALVIEW_FLASHCTRL    (__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_FLASH_OFFSET)
 
-static void realview_flash_set_vpp(struct map_info *map, int on)
+static void realview_flash_set_vpp(struct platform_device *pdev, int on)
 {
 	u32 val;
 
diff --git a/arch/arm/mach-s3c2410/nor-simtec.c b/arch/arm/mach-s3c2410/nor-simtec.c
index 598d130633dc..ad9f750f1e55 100644
--- a/arch/arm/mach-s3c2410/nor-simtec.c
+++ b/arch/arm/mach-s3c2410/nor-simtec.c
@@ -32,7 +32,7 @@
 
 #include "nor-simtec.h"
 
-static void simtec_nor_vpp(struct map_info *map, int vpp)
+static void simtec_nor_vpp(struct platform_device *pdev, int vpp)
 {
 	unsigned int val;
 	unsigned long flags;
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 06f406ad8edd..335d8250e364 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -190,7 +190,7 @@ void __init versatile_map_io(void)
 
 #define VERSATILE_FLASHCTRL    (__io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_FLASH_OFFSET)
 
-static void versatile_flash_set_vpp(struct map_info *map, int on)
+static void versatile_flash_set_vpp(struct platform_device *pdev, int on)
 {
 	u32 val;
 
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index e3268152c834..f860314ef7f6 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -206,7 +206,7 @@ static struct platform_device v2m_usb_device = {
 	.dev.platform_data = &v2m_usb_config,
 };
 
-static void v2m_flash_set_vpp(struct map_info *map, int on)
+static void v2m_flash_set_vpp(struct platform_device *pdev, int on)
 {
 	writel(on != 0, MMIO_P2V(V2M_SYS_FLASH));
 }
diff --git a/arch/arm/plat-omap/include/plat/flash.h b/arch/arm/plat-omap/include/plat/flash.h
index 3e6327016b40..3083195123ea 100644
--- a/arch/arm/plat-omap/include/plat/flash.h
+++ b/arch/arm/plat-omap/include/plat/flash.h
@@ -11,6 +11,6 @@
 
 #include <linux/mtd/map.h>
 
-extern void omap1_set_vpp(struct map_info *map, int enable);
+extern void omap1_set_vpp(struct platform_device *pdev, int enable);
 
 #endif
diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index 49676b7a53a4..1a9b94f0ee54 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -74,6 +74,18 @@ static int physmap_flash_remove(struct platform_device *dev)
 	return 0;
 }
 
+static void physmap_set_vpp(struct map_info *map, int state)
+{
+	struct platform_device *pdev;
+	struct physmap_flash_data *physmap_data;
+
+	pdev = (struct platform_device *)map->map_priv_1;
+	physmap_data = pdev->dev.platform_data;
+
+	if (physmap_data->set_vpp)
+		physmap_data->set_vpp(pdev, state);
+}
+
 static const char *rom_probe_types[] = {
 					"cfi_probe",
 					"jedec_probe",
@@ -81,10 +93,7 @@ static const char *rom_probe_types[] = {
 					"map_rom",
 					NULL };
 #ifdef CONFIG_MTD_PARTITIONS
-static const char *part_probe_types[] = { "cmdlinepart", "RedBoot",
-#ifdef CONFIG_MTD_AFS_PARTS
-					  "afs",
-#endif
+static const char *part_probe_types[] = { "cmdlinepart", "RedBoot", "afs",
 					  NULL };
 #endif
 
@@ -134,8 +143,9 @@ static int physmap_flash_probe(struct platform_device *dev)
 		info->map[i].phys = dev->resource[i].start;
 		info->map[i].size = resource_size(&dev->resource[i]);
 		info->map[i].bankwidth = physmap_data->width;
-		info->map[i].set_vpp = physmap_data->set_vpp;
+		info->map[i].set_vpp = physmap_set_vpp;
 		info->map[i].pfow_base = physmap_data->pfow_base;
+		info->map[i].map_priv_1 = (unsigned long)dev;
 
 		info->map[i].virt = devm_ioremap(&dev->dev, info->map[i].phys,
 						 info->map[i].size);
diff --git a/drivers/mtd/maps/pismo.c b/drivers/mtd/maps/pismo.c
index f4ce273e93fd..65bd1cd4d627 100644
--- a/drivers/mtd/maps/pismo.c
+++ b/drivers/mtd/maps/pismo.c
@@ -50,39 +50,13 @@ struct pismo_data {
 	struct platform_device	*dev[PISMO_NUM_CS];
 };
 
-/* FIXME: set_vpp could do with a better calling convention */
-static struct pismo_data *vpp_pismo;
-static DEFINE_MUTEX(pismo_mutex);
-
-static int pismo_setvpp_probe_fix(struct pismo_data *pismo)
+static void pismo_set_vpp(struct platform_device *pdev, int on)
 {
-	mutex_lock(&pismo_mutex);
-	if (vpp_pismo) {
-		mutex_unlock(&pismo_mutex);
-		kfree(pismo);
-		return -EBUSY;
-	}
-	vpp_pismo = pismo;
-	mutex_unlock(&pismo_mutex);
-	return 0;
-}
-
-static void pismo_setvpp_remove_fix(struct pismo_data *pismo)
-{
-	mutex_lock(&pismo_mutex);
-	if (vpp_pismo == pismo)
-		vpp_pismo = NULL;
-	mutex_unlock(&pismo_mutex);
-}
-
-static void pismo_set_vpp(struct map_info *map, int on)
-{
-	struct pismo_data *pismo = vpp_pismo;
+	struct i2c_client *client = to_i2c_client(pdev->dev.parent);
+	struct pismo_data *pismo = i2c_get_clientdata(client);
 
 	pismo->vpp(pismo->vpp_data, on);
 }
-/* end of hack */
-
 
 static unsigned int __devinit pismo_width_to_bytes(unsigned int width)
 {
@@ -231,9 +205,6 @@ static int __devexit pismo_remove(struct i2c_client *client)
 	for (i = 0; i < ARRAY_SIZE(pismo->dev); i++)
 		platform_device_unregister(pismo->dev[i]);
 
-	/* FIXME: set_vpp needs saner arguments */
-	pismo_setvpp_remove_fix(pismo);
-
 	kfree(pismo);
 
 	return 0;
@@ -257,11 +228,6 @@ static int __devinit pismo_probe(struct i2c_client *client,
 	if (!pismo)
 		return -ENOMEM;
 
-	/* FIXME: set_vpp needs saner arguments */
-	ret = pismo_setvpp_probe_fix(pismo);
-	if (ret)
-		return ret;
-
 	pismo->client = client;
 	if (pdata) {
 		pismo->vpp = pdata->set_vpp;
diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h
index d37cca05e62c..49b959029417 100644
--- a/include/linux/mtd/physmap.h
+++ b/include/linux/mtd/physmap.h
@@ -24,7 +24,7 @@ struct physmap_flash_data {
 	unsigned int		width;
 	int			(*init)(struct platform_device *);
 	void			(*exit)(struct platform_device *);
-	void			(*set_vpp)(struct map_info *, int);
+	void			(*set_vpp)(struct platform_device *, int);
 	unsigned int		nr_parts;
 	unsigned int		pfow_base;
 	char                    *probe_type;
-- 
cgit v1.2.3-71-gd317


From 94d56ffa0a9bf11dfb602dae9223089e09a8e050 Mon Sep 17 00:00:00 2001
From: Andreas Oberritter <obi@linuxtv.org>
Date: Thu, 12 May 2011 18:11:06 -0300
Subject: [media] DVB: Add basic API support for DVB-T2 and bump minor version

[steve@stevekerrison.com: Remove private definitions from cxd2820r that existed before API was defined]
Signed-off-by: Andreas Oberritter <obi@linuxtv.org>
Signed-off-by: Steve Kerrison <steve@stevekerrison.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-core/dvb_frontend.c   | 13 +++++++++----
 drivers/media/dvb/dvb-core/dvb_frontend.h   |  3 +++
 drivers/media/dvb/frontends/cxd2820r_priv.h | 12 ------------
 include/linux/dvb/frontend.h                | 20 ++++++++++++++++----
 include/linux/dvb/version.h                 |  2 +-
 5 files changed, 29 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index 95c3fec6ae40..3639edce65e0 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -1148,10 +1148,9 @@ static void dtv_property_adv_params_sync(struct dvb_frontend *fe)
 		break;
 	}
 
-	if(c->delivery_system == SYS_ISDBT) {
-		/* Fake out a generic DVB-T request so we pass validation in the ioctl */
-		p->frequency = c->frequency;
-		p->inversion = c->inversion;
+	/* Fake out a generic DVB-T request so we pass validation in the ioctl */
+	if ((c->delivery_system == SYS_ISDBT) ||
+	    (c->delivery_system == SYS_DVBT2)) {
 		p->u.ofdm.constellation = QAM_AUTO;
 		p->u.ofdm.code_rate_HP = FEC_AUTO;
 		p->u.ofdm.code_rate_LP = FEC_AUTO;
@@ -1324,6 +1323,9 @@ static int dtv_property_process_get(struct dvb_frontend *fe,
 	case DTV_ISDBS_TS_ID:
 		tvp->u.data = fe->dtv_property_cache.isdbs_ts_id;
 		break;
+	case DTV_DVBT2_PLP_ID:
+		tvp->u.data = fe->dtv_property_cache.dvbt2_plp_id;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1479,6 +1481,9 @@ static int dtv_property_process_set(struct dvb_frontend *fe,
 	case DTV_ISDBS_TS_ID:
 		fe->dtv_property_cache.isdbs_ts_id = tvp->u.data;
 		break;
+	case DTV_DVBT2_PLP_ID:
+		fe->dtv_property_cache.dvbt2_plp_id = tvp->u.data;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.h b/drivers/media/dvb/dvb-core/dvb_frontend.h
index 3b860504bf04..5590eb6eb408 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.h
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.h
@@ -358,6 +358,9 @@ struct dtv_frontend_properties {
 
 	/* ISDB-T specifics */
 	u32			isdbs_ts_id;
+
+	/* DVB-T2 specifics */
+	u32                     dvbt2_plp_id;
 };
 
 struct dvb_frontend {
diff --git a/drivers/media/dvb/frontends/cxd2820r_priv.h b/drivers/media/dvb/frontends/cxd2820r_priv.h
index d4e2e0b76c10..25adbeefa6d3 100644
--- a/drivers/media/dvb/frontends/cxd2820r_priv.h
+++ b/drivers/media/dvb/frontends/cxd2820r_priv.h
@@ -40,18 +40,6 @@
 #undef warn
 #define warn(f, arg...) printk(KERN_WARNING LOG_PREFIX": " f "\n" , ## arg)
 
-/*
- * FIXME: These are totally wrong and must be added properly to the API.
- * Only temporary solution in order to get driver compile.
- */
-#define SYS_DVBT2             SYS_DAB
-#define TRANSMISSION_MODE_1K  0
-#define TRANSMISSION_MODE_16K 0
-#define TRANSMISSION_MODE_32K 0
-#define GUARD_INTERVAL_1_128  0
-#define GUARD_INTERVAL_19_128 0
-#define GUARD_INTERVAL_19_256 0
-
 struct reg_val_mask {
 	u32 reg;
 	u8  val;
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index 493a2bf85f62..36a3ed63f571 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -175,14 +175,20 @@ typedef enum fe_transmit_mode {
 	TRANSMISSION_MODE_2K,
 	TRANSMISSION_MODE_8K,
 	TRANSMISSION_MODE_AUTO,
-	TRANSMISSION_MODE_4K
+	TRANSMISSION_MODE_4K,
+	TRANSMISSION_MODE_1K,
+	TRANSMISSION_MODE_16K,
+	TRANSMISSION_MODE_32K,
 } fe_transmit_mode_t;
 
 typedef enum fe_bandwidth {
 	BANDWIDTH_8_MHZ,
 	BANDWIDTH_7_MHZ,
 	BANDWIDTH_6_MHZ,
-	BANDWIDTH_AUTO
+	BANDWIDTH_AUTO,
+	BANDWIDTH_5_MHZ,
+	BANDWIDTH_10_MHZ,
+	BANDWIDTH_1_712_MHZ,
 } fe_bandwidth_t;
 
 
@@ -191,7 +197,10 @@ typedef enum fe_guard_interval {
 	GUARD_INTERVAL_1_16,
 	GUARD_INTERVAL_1_8,
 	GUARD_INTERVAL_1_4,
-	GUARD_INTERVAL_AUTO
+	GUARD_INTERVAL_AUTO,
+	GUARD_INTERVAL_1_128,
+	GUARD_INTERVAL_19_128,
+	GUARD_INTERVAL_19_256,
 } fe_guard_interval_t;
 
 
@@ -305,7 +314,9 @@ struct dvb_frontend_event {
 
 #define DTV_ISDBS_TS_ID		42
 
-#define DTV_MAX_COMMAND				DTV_ISDBS_TS_ID
+#define DTV_DVBT2_PLP_ID	43
+
+#define DTV_MAX_COMMAND				DTV_DVBT2_PLP_ID
 
 typedef enum fe_pilot {
 	PILOT_ON,
@@ -337,6 +348,7 @@ typedef enum fe_delivery_system {
 	SYS_DMBTH,
 	SYS_CMMB,
 	SYS_DAB,
+	SYS_DVBT2,
 } fe_delivery_system_t;
 
 struct dtv_cmds_h {
diff --git a/include/linux/dvb/version.h b/include/linux/dvb/version.h
index 5a7546c12688..1421cc84afaa 100644
--- a/include/linux/dvb/version.h
+++ b/include/linux/dvb/version.h
@@ -24,6 +24,6 @@
 #define _DVBVERSION_H_
 
 #define DVB_API_VERSION 5
-#define DVB_API_VERSION_MINOR 2
+#define DVB_API_VERSION_MINOR 3
 
 #endif /*_DVBVERSION_H_*/
-- 
cgit v1.2.3-71-gd317


From 3e0c2ab67e48f77c2da0a5c826aac397792a214e Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Fri, 20 May 2011 09:42:48 -0500
Subject: slub: Deal with hyperthetical case of PAGE_SIZE > 2M

kmalloc_index() currently returns -1 if the PAGE_SIZE is larger than 2M
which seems to cause some concern since the callers do not check for -1.

Insert a BUG() and add a comment to the -1 explaining that the code
cannot be reached.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slub_def.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index ca0c076b2374..c8668d161dd8 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -177,7 +177,8 @@ static __always_inline int kmalloc_index(size_t size)
 	if (size <=   4 * 1024) return 12;
 /*
  * The following is only needed to support architectures with a larger page
- * size than 4k.
+ * size than 4k. We need to support 2 * PAGE_SIZE here. So for a 64k page
+ * size we would have to go up to 128k.
  */
 	if (size <=   8 * 1024) return 13;
 	if (size <=  16 * 1024) return 14;
@@ -188,7 +189,8 @@ static __always_inline int kmalloc_index(size_t size)
 	if (size <= 512 * 1024) return 19;
 	if (size <= 1024 * 1024) return 20;
 	if (size <=  2 * 1024 * 1024) return 21;
-	return -1;
+	BUG();
+	return -1; /* Will never be reached */
 
 /*
  * What we really wanted to do and cannot do because of compiler issues is:
-- 
cgit v1.2.3-71-gd317


From 9f728f53dd70396f3183d2f0861022259471824b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 12 May 2011 17:11:47 -0700
Subject: PCI/e1000e: Add and use pci_disable_link_state_locked()

Need to use it in _e1000e_disable_aspm.  This routine is used for error
recovery, where the pci_bus_sem is already held, and we don't want
pci_disable_link_state to try to take it again.  So add a locked variant
for use in cases like this.

Found lock up:

[ 2374.654557] kworker/32:1    D ffff881027f6b0f0     0  6075      2 0x00000000
[ 2374.654816]  ffff88503f099a68 0000000000000046 ffff88503f098000 0000000000004000
[ 2374.654837]  00000000001d1ec0 ffff88503f099fd8 00000000001d1ec0 ffff88503f099fd8
[ 2374.654860]  0000000000004000 00000000001d1ec0 ffff88503dcc8000 ffff88503f090000
[ 2374.654880] Call Trace:
[ 2374.654898]  [<ffffffff810b1302>] ? __lock_acquired+0x3a/0x224
[ 2374.654914]  [<ffffffff81c2b59c>] ? _raw_spin_unlock_irq+0x30/0x36
[ 2374.654925]  [<ffffffff810b069d>] ? trace_hardirqs_on_caller+0x1f/0x178
[ 2374.654936]  [<ffffffff81c2ab24>] rwsem_down_failed_common+0xd3/0x103
[ 2374.654945]  [<ffffffff810b158f>] ? __lock_contended+0x3a/0x2a2
[ 2374.654955]  [<ffffffff81c2ab7b>] rwsem_down_read_failed+0x12/0x14
[ 2374.654967]  [<ffffffff813371e4>] call_rwsem_down_read_failed+0x14/0x30
[ 2374.654981]  [<ffffffff8135df20>] ? pci_disable_link_state+0x5f/0xf5
[ 2374.654990]  [<ffffffff81c2a0e6>] ? down_read+0x7e/0x91
[ 2374.654999]  [<ffffffff8135df20>] ? pci_disable_link_state+0x5f/0xf5
[ 2374.655008]  [<ffffffff8135df20>] pci_disable_link_state+0x5f/0xf5
[ 2374.655024]  [<ffffffff81661796>] e1000e_disable_aspm+0x55/0x5a
[ 2374.655037]  [<ffffffff816677eb>] e1000_io_slot_reset+0x59/0xea
[ 2374.655048]  [<ffffffff8135fe0d>] ? report_mmio_enabled+0x5d/0x5d
[ 2374.655057]  [<ffffffff8135fe3b>] report_slot_reset+0x2e/0x5d
[ 2374.655072]  [<ffffffff8135369e>] pci_walk_bus+0x8a/0xb7
[ 2374.655081]  [<ffffffff8135fe0d>] ? report_mmio_enabled+0x5d/0x5d
[ 2374.655091]  [<ffffffff813603be>] broadcast_error_message+0xa4/0xb2
[ 2374.655101]  [<ffffffff81352c71>] ? pci_bus_read_config_dword+0x72/0x80
[ 2374.655110]  [<ffffffff813606df>] do_recovery+0x9e/0xf9
[ 2374.655120]  [<ffffffff81360786>] handle_error_source+0x4c/0x51
[ 2374.655129]  [<ffffffff81360974>] aer_isr_one_error+0x1e9/0x21a
[ 2374.655138]  [<ffffffff81360a6c>] aer_isr+0xc7/0xcc
[ 2374.655147]  [<ffffffff813609a5>] ? aer_isr_one_error+0x21a/0x21a
[ 2374.655159]  [<ffffffff81096d9f>] process_one_work+0x237/0x3ec
[ 2374.655168]  [<ffffffff81096d10>] ? process_one_work+0x1a8/0x3ec
[ 2374.655178]  [<ffffffff8109728d>] worker_thread+0x17c/0x240
[ 2374.655186]  [<ffffffff810b0803>] ? trace_hardirqs_on+0xd/0xf
[ 2374.655196]  [<ffffffff81097111>] ? manage_workers+0xab/0xab
[ 2374.655209]  [<ffffffff8109c8ed>] kthread+0xa0/0xa8
[ 2374.655223]  [<ffffffff81c332d4>] kernel_thread_helper+0x4/0x10
[ 2374.655232]  [<ffffffff81c2b880>] ? retint_restore_args+0xe/0xe
[ 2374.655243]  [<ffffffff8109c84d>] ? __init_kthread_worker+0x5b/0x5b
[ 2374.655252]  [<ffffffff81c332d0>] ? gs_change+0xb/0xb

when aer happens,
pci_walk_bus already have down_read(&pci_bus_sem)...
then report_slot_reset
        ==> e1000_io_slot_reset
                ==> e1000e_disable_aspm
                        ==> pci_disable_link_state...

We can not use pci_disable_link_state, and it will try to hold pci_bus_sem again.

Try to have __pci_disable_link_state that will not need to hold pci_bus_sem.

-v2: change name to pci_disable_link_state_locked() according to Jesse.

[jbarnes: make sure new function is exported for modules]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/net/e1000e/netdev.c |  2 +-
 drivers/pci/pcie/aspm.c     | 19 ++++++++++++++++---
 include/linux/pci-aspm.h    |  1 +
 3 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 506a0a0043b3..5fb43f098f17 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -5347,7 +5347,7 @@ static void e1000_complete_shutdown(struct pci_dev *pdev, bool sleep,
 #ifdef CONFIG_PCIEASPM
 static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
 {
-	pci_disable_link_state(pdev, state);
+	pci_disable_link_state_locked(pdev, state);
 }
 #else
 static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 3eb667b24787..6892601fc76f 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -734,7 +734,7 @@ void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
  * pci_disable_link_state - disable pci device's link state, so the link will
  * never enter specific states
  */
-void pci_disable_link_state(struct pci_dev *pdev, int state)
+static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
 {
 	struct pci_dev *parent = pdev->bus->self;
 	struct pcie_link_state *link;
@@ -747,7 +747,8 @@ void pci_disable_link_state(struct pci_dev *pdev, int state)
 	if (!parent || !parent->link_state)
 		return;
 
-	down_read(&pci_bus_sem);
+	if (sem)
+		down_read(&pci_bus_sem);
 	mutex_lock(&aspm_lock);
 	link = parent->link_state;
 	if (state & PCIE_LINK_STATE_L0S)
@@ -761,7 +762,19 @@ void pci_disable_link_state(struct pci_dev *pdev, int state)
 		pcie_set_clkpm(link, 0);
 	}
 	mutex_unlock(&aspm_lock);
-	up_read(&pci_bus_sem);
+	if (sem)
+		up_read(&pci_bus_sem);
+}
+
+void pci_disable_link_state_locked(struct pci_dev *pdev, int state)
+{
+	__pci_disable_link_state(pdev, state, false);
+}
+EXPORT_SYMBOL(pci_disable_link_state_locked);
+
+void pci_disable_link_state(struct pci_dev *pdev, int state)
+{
+	__pci_disable_link_state(pdev, state, true);
 }
 EXPORT_SYMBOL(pci_disable_link_state);
 
diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h
index 67cb3ae38016..7cea7b6c1413 100644
--- a/include/linux/pci-aspm.h
+++ b/include/linux/pci-aspm.h
@@ -28,6 +28,7 @@ extern void pcie_aspm_exit_link_state(struct pci_dev *pdev);
 extern void pcie_aspm_pm_state_change(struct pci_dev *pdev);
 extern void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
 extern void pci_disable_link_state(struct pci_dev *pdev, int state);
+extern void pci_disable_link_state_locked(struct pci_dev *pdev, int state);
 extern void pcie_clear_aspm(void);
 extern void pcie_no_aspm(void);
 #else
-- 
cgit v1.2.3-71-gd317


From 24a4742f0be6226eb0106fbb17caf4d711d1ad43 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 10 May 2011 10:02:11 -0600
Subject: PCI: Track the size of each saved capability data area

This will allow us to store and load it later.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c   | 12 +++++++-----
 include/linux/pci.h | 11 ++++++++---
 2 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 53302cbdb94c..d6e5b8ea9194 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -830,7 +830,7 @@ static int pci_save_pcie_state(struct pci_dev *dev)
 		dev_err(&dev->dev, "buffer not found in %s\n", __func__);
 		return -ENOMEM;
 	}
-	cap = (u16 *)&save_state->data[0];
+	cap = (u16 *)&save_state->cap.data[0];
 
 	pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags);
 
@@ -863,7 +863,7 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
 	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
 	if (!save_state || pos <= 0)
 		return;
-	cap = (u16 *)&save_state->data[0];
+	cap = (u16 *)&save_state->cap.data[0];
 
 	pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags);
 
@@ -899,7 +899,8 @@ static int pci_save_pcix_state(struct pci_dev *dev)
 		return -ENOMEM;
 	}
 
-	pci_read_config_word(dev, pos + PCI_X_CMD, (u16 *)save_state->data);
+	pci_read_config_word(dev, pos + PCI_X_CMD,
+			     (u16 *)save_state->cap.data);
 
 	return 0;
 }
@@ -914,7 +915,7 @@ static void pci_restore_pcix_state(struct pci_dev *dev)
 	pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
 	if (!save_state || pos <= 0)
 		return;
-	cap = (u16 *)&save_state->data[0];
+	cap = (u16 *)&save_state->cap.data[0];
 
 	pci_write_config_word(dev, pos + PCI_X_CMD, cap[i++]);
 }
@@ -1771,7 +1772,8 @@ static int pci_add_cap_save_buffer(
 	if (!save_state)
 		return -ENOMEM;
 
-	save_state->cap_nr = cap;
+	save_state->cap.cap_nr = cap;
+	save_state->cap.size = size;
 	pci_add_saved_cap(dev, save_state);
 
 	return 0;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index df4d69b82144..61ef8f2f9b19 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -214,12 +214,17 @@ enum pci_bus_speed {
 	PCI_SPEED_UNKNOWN		= 0xff,
 };
 
-struct pci_cap_saved_state {
-	struct hlist_node next;
+struct pci_cap_saved_data {
 	char cap_nr;
+	unsigned int size;
 	u32 data[0];
 };
 
+struct pci_cap_saved_state {
+	struct hlist_node next;
+	struct pci_cap_saved_data cap;
+};
+
 struct pcie_link_state;
 struct pci_vpd;
 struct pci_sriov;
@@ -366,7 +371,7 @@ static inline struct pci_cap_saved_state *pci_find_saved_cap(
 	struct hlist_node *pos;
 
 	hlist_for_each_entry(tmp, pos, &pci_dev->saved_cap_space, next) {
-		if (tmp->cap_nr == cap)
+		if (tmp->cap.cap_nr == cap)
 			return tmp;
 	}
 	return NULL;
-- 
cgit v1.2.3-71-gd317


From ffbdd3f7931fb7cb7e36d00d16303ec433be5145 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 10 May 2011 10:02:27 -0600
Subject: PCI: Add interfaces to store and load the device saved state

For KVM device assignment, we'd like to save off the state of a device
prior to passing it to the guest and restore it later.  We also want
to allow pci_reset_funciton() to be called while the device is owned
by the guest.  This however overwrites and invalidates the struct pci_dev
buffers, so we can't just manually call save and restore.  Add generic
interfaces for the saved state to be stored and reloaded back into
struct pci_dev at a later time.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c   | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h |  4 +++
 2 files changed, 102 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d6e5b8ea9194..22c9b27fdd8d 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -976,6 +976,104 @@ void pci_restore_state(struct pci_dev *dev)
 	dev->state_saved = false;
 }
 
+struct pci_saved_state {
+	u32 config_space[16];
+	struct pci_cap_saved_data cap[0];
+};
+
+/**
+ * pci_store_saved_state - Allocate and return an opaque struct containing
+ *			   the device saved state.
+ * @dev: PCI device that we're dealing with
+ *
+ * Rerturn NULL if no state or error.
+ */
+struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev)
+{
+	struct pci_saved_state *state;
+	struct pci_cap_saved_state *tmp;
+	struct pci_cap_saved_data *cap;
+	struct hlist_node *pos;
+	size_t size;
+
+	if (!dev->state_saved)
+		return NULL;
+
+	size = sizeof(*state) + sizeof(struct pci_cap_saved_data);
+
+	hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next)
+		size += sizeof(struct pci_cap_saved_data) + tmp->cap.size;
+
+	state = kzalloc(size, GFP_KERNEL);
+	if (!state)
+		return NULL;
+
+	memcpy(state->config_space, dev->saved_config_space,
+	       sizeof(state->config_space));
+
+	cap = state->cap;
+	hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) {
+		size_t len = sizeof(struct pci_cap_saved_data) + tmp->cap.size;
+		memcpy(cap, &tmp->cap, len);
+		cap = (struct pci_cap_saved_data *)((u8 *)cap + len);
+	}
+	/* Empty cap_save terminates list */
+
+	return state;
+}
+EXPORT_SYMBOL_GPL(pci_store_saved_state);
+
+/**
+ * pci_load_saved_state - Reload the provided save state into struct pci_dev.
+ * @dev: PCI device that we're dealing with
+ * @state: Saved state returned from pci_store_saved_state()
+ */
+int pci_load_saved_state(struct pci_dev *dev, struct pci_saved_state *state)
+{
+	struct pci_cap_saved_data *cap;
+
+	dev->state_saved = false;
+
+	if (!state)
+		return 0;
+
+	memcpy(dev->saved_config_space, state->config_space,
+	       sizeof(state->config_space));
+
+	cap = state->cap;
+	while (cap->size) {
+		struct pci_cap_saved_state *tmp;
+
+		tmp = pci_find_saved_cap(dev, cap->cap_nr);
+		if (!tmp || tmp->cap.size != cap->size)
+			return -EINVAL;
+
+		memcpy(tmp->cap.data, cap->data, tmp->cap.size);
+		cap = (struct pci_cap_saved_data *)((u8 *)cap +
+		       sizeof(struct pci_cap_saved_data) + cap->size);
+	}
+
+	dev->state_saved = true;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_load_saved_state);
+
+/**
+ * pci_load_and_free_saved_state - Reload the save state pointed to by state,
+ *				   and free the memory allocated for it.
+ * @dev: PCI device that we're dealing with
+ * @state: Pointer to saved state returned from pci_store_saved_state()
+ */
+int pci_load_and_free_saved_state(struct pci_dev *dev,
+				  struct pci_saved_state **state)
+{
+	int ret = pci_load_saved_state(dev, *state);
+	kfree(*state);
+	*state = NULL;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_load_and_free_saved_state);
+
 static int do_pci_enable_device(struct pci_dev *dev, int bars)
 {
 	int err;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 61ef8f2f9b19..4604d1d5514d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -812,6 +812,10 @@ size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size);
 /* Power management related routines */
 int pci_save_state(struct pci_dev *dev);
 void pci_restore_state(struct pci_dev *dev);
+struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev);
+int pci_load_saved_state(struct pci_dev *dev, struct pci_saved_state *state);
+int pci_load_and_free_saved_state(struct pci_dev *dev,
+				  struct pci_saved_state **state);
 int __pci_complete_power_transition(struct pci_dev *dev, pci_power_t state);
 int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
 pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
-- 
cgit v1.2.3-71-gd317


From f8fcfd775523347afe460dc3a0f45d0479e784a2 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 10 May 2011 10:02:39 -0600
Subject: KVM: Use pci_store/load_saved_state() around VM device usage

Store the device saved state so that we can reload the device back
to the original state when it's unassigned.  This has the benefit
that the state survives across pci_reset_function() calls via
the PCI sysfs reset interface while the VM is using the device.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/assigned-dev.c  | 18 ++++++++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ab428552af8e..9272db03a3e5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -513,6 +513,7 @@ struct kvm_assigned_dev_kernel {
 	struct kvm *kvm;
 	spinlock_t intx_lock;
 	char irq_name[32];
+	struct pci_saved_state *pci_saved_state;
 };
 
 struct kvm_irq_mask_notifier {
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index ae72ae604c89..6cc4b97ec458 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -197,8 +197,13 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 {
 	kvm_free_assigned_irq(kvm, assigned_dev);
 
-	__pci_reset_function(assigned_dev->dev);
-	pci_restore_state(assigned_dev->dev);
+	pci_reset_function(assigned_dev->dev);
+	if (pci_load_and_free_saved_state(assigned_dev->dev,
+					  &assigned_dev->pci_saved_state))
+		printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
+		       __func__, dev_name(&assigned_dev->dev->dev));
+	else
+		pci_restore_state(assigned_dev->dev);
 
 	pci_release_regions(assigned_dev->dev);
 	pci_disable_device(assigned_dev->dev);
@@ -516,7 +521,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 
 	pci_reset_function(dev);
 	pci_save_state(dev);
-
+	match->pci_saved_state = pci_store_saved_state(dev);
+	if (!match->pci_saved_state)
+		printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
+		       __func__, dev_name(&dev->dev));
 	match->assigned_dev_id = assigned_dev->assigned_dev_id;
 	match->host_segnr = assigned_dev->segnr;
 	match->host_busnr = assigned_dev->busnr;
@@ -546,7 +554,9 @@ out:
 	mutex_unlock(&kvm->lock);
 	return r;
 out_list_del:
-	pci_restore_state(dev);
+	if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
+		printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
+		       __func__, dev_name(&dev->dev));
 	list_del(&match->list);
 	pci_release_regions(dev);
 out_disable:
-- 
cgit v1.2.3-71-gd317


From 5ce941ee4258b836cf818d2ac159d8cf3ebad648 Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Wed, 27 Apr 2011 17:24:21 -0500
Subject: KVM: PPC: booke: add sregs support

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/kvm/api.txt           |   6 +-
 arch/powerpc/include/asm/kvm.h      | 184 ++++++++++++++++++++++++++++++++++++
 arch/powerpc/include/asm/kvm_44x.h  |   1 -
 arch/powerpc/include/asm/kvm_e500.h |   1 +
 arch/powerpc/include/asm/kvm_host.h |   3 +
 arch/powerpc/include/asm/kvm_ppc.h  |   9 ++
 arch/powerpc/kvm/44x.c              |  10 ++
 arch/powerpc/kvm/booke.c            | 154 +++++++++++++++++++++++++++++-
 arch/powerpc/kvm/e500.c             |  75 +++++++++++++++
 arch/powerpc/kvm/e500_emulate.c     |   5 +-
 arch/powerpc/kvm/e500_tlb.c         |   8 ++
 arch/powerpc/kvm/emulate.c          |  13 ++-
 arch/powerpc/kvm/powerpc.c          |   4 +
 include/linux/kvm.h                 |   1 +
 14 files changed, 461 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 1b9eaa7e8856..f64c41f8ba61 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -261,7 +261,7 @@ See KVM_GET_REGS for the data structure.
 4.13 KVM_GET_SREGS
 
 Capability: basic
-Architectures: x86
+Architectures: x86, ppc
 Type: vcpu ioctl
 Parameters: struct kvm_sregs (out)
 Returns: 0 on success, -1 on error
@@ -279,6 +279,8 @@ struct kvm_sregs {
 	__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
 };
 
+/* ppc -- see arch/powerpc/include/asm/kvm.h */
+
 interrupt_bitmap is a bitmap of pending external interrupts.  At most
 one bit may be set.  This interrupt has been acknowledged by the APIC
 but not yet injected into the cpu core.
@@ -286,7 +288,7 @@ but not yet injected into the cpu core.
 4.14 KVM_SET_SREGS
 
 Capability: basic
-Architectures: x86
+Architectures: x86, ppc
 Type: vcpu ioctl
 Parameters: struct kvm_sregs (in)
 Returns: 0 on success, -1 on error
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 18ea6963ad77..d2ca5ed3877b 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -45,6 +45,114 @@ struct kvm_regs {
 	__u64 gpr[32];
 };
 
+#define KVM_SREGS_E_IMPL_NONE	0
+#define KVM_SREGS_E_IMPL_FSL	1
+
+#define KVM_SREGS_E_FSL_PIDn	(1 << 0) /* PID1/PID2 */
+
+/*
+ * Feature bits indicate which sections of the sregs struct are valid,
+ * both in KVM_GET_SREGS and KVM_SET_SREGS.  On KVM_SET_SREGS, registers
+ * corresponding to unset feature bits will not be modified.  This allows
+ * restoring a checkpoint made without that feature, while keeping the
+ * default values of the new registers.
+ *
+ * KVM_SREGS_E_BASE contains:
+ * CSRR0/1 (refers to SRR2/3 on 40x)
+ * ESR
+ * DEAR
+ * MCSR
+ * TSR
+ * TCR
+ * DEC
+ * TB
+ * VRSAVE (USPRG0)
+ */
+#define KVM_SREGS_E_BASE		(1 << 0)
+
+/*
+ * KVM_SREGS_E_ARCH206 contains:
+ *
+ * PIR
+ * MCSRR0/1
+ * DECAR
+ * IVPR
+ */
+#define KVM_SREGS_E_ARCH206		(1 << 1)
+
+/*
+ * Contains EPCR, plus the upper half of 64-bit registers
+ * that are 32-bit on 32-bit implementations.
+ */
+#define KVM_SREGS_E_64			(1 << 2)
+
+#define KVM_SREGS_E_SPRG8		(1 << 3)
+#define KVM_SREGS_E_MCIVPR		(1 << 4)
+
+/*
+ * IVORs are used -- contains IVOR0-15, plus additional IVORs
+ * in combination with an appropriate feature bit.
+ */
+#define KVM_SREGS_E_IVOR		(1 << 5)
+
+/*
+ * Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG.
+ * Also TLBnPS if MMUCFG[MAVN] = 1.
+ */
+#define KVM_SREGS_E_ARCH206_MMU		(1 << 6)
+
+/* DBSR, DBCR, IAC, DAC, DVC */
+#define KVM_SREGS_E_DEBUG		(1 << 7)
+
+/* Enhanced debug -- DSRR0/1, SPRG9 */
+#define KVM_SREGS_E_ED			(1 << 8)
+
+/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_SPE			(1 << 9)
+
+/* External Proxy (EXP) -- EPR */
+#define KVM_SREGS_EXP			(1 << 10)
+
+/* External PID (E.PD) -- EPSC/EPLC */
+#define KVM_SREGS_E_PD			(1 << 11)
+
+/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_PC			(1 << 12)
+
+/* Page table (E.PT) -- EPTCFG */
+#define KVM_SREGS_E_PT			(1 << 13)
+
+/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_PM			(1 << 14)
+
+/*
+ * Special updates:
+ *
+ * Some registers may change even while a vcpu is not running.
+ * To avoid losing these changes, by default these registers are
+ * not updated by KVM_SET_SREGS.  To force an update, set the bit
+ * in u.e.update_special corresponding to the register to be updated.
+ *
+ * The update_special field is zero on return from KVM_GET_SREGS.
+ *
+ * When restoring a checkpoint, the caller can set update_special
+ * to 0xffffffff to ensure that everything is restored, even new features
+ * that the caller doesn't know about.
+ */
+#define KVM_SREGS_E_UPDATE_MCSR		(1 << 0)
+#define KVM_SREGS_E_UPDATE_TSR		(1 << 1)
+#define KVM_SREGS_E_UPDATE_DEC		(1 << 2)
+#define KVM_SREGS_E_UPDATE_DBSR		(1 << 3)
+
+/*
+ * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
+ * previous KVM_GET_REGS.
+ *
+ * Unless otherwise indicated, setting any register with KVM_SET_SREGS
+ * directly sets its value.  It does not trigger any special semantics such
+ * as write-one-to-clear.  Calling KVM_SET_SREGS on an unmodified struct
+ * just received from KVM_GET_SREGS is always a no-op.
+ */
 struct kvm_sregs {
 	__u32 pvr;
 	union {
@@ -62,6 +170,82 @@ struct kvm_sregs {
 				__u64 dbat[8]; 
 			} ppc32;
 		} s;
+		struct {
+			union {
+				struct { /* KVM_SREGS_E_IMPL_FSL */
+					__u32 features; /* KVM_SREGS_E_FSL_ */
+					__u32 svr;
+					__u64 mcar;
+					__u32 hid0;
+
+					/* KVM_SREGS_E_FSL_PIDn */
+					__u32 pid1, pid2;
+				} fsl;
+				__u8 pad[256];
+			} impl;
+
+			__u32 features; /* KVM_SREGS_E_ */
+			__u32 impl_id;	/* KVM_SREGS_E_IMPL_ */
+			__u32 update_special; /* KVM_SREGS_E_UPDATE_ */
+			__u32 pir;	/* read-only */
+			__u64 sprg8;
+			__u64 sprg9;	/* E.ED */
+			__u64 csrr0;
+			__u64 dsrr0;	/* E.ED */
+			__u64 mcsrr0;
+			__u32 csrr1;
+			__u32 dsrr1;	/* E.ED */
+			__u32 mcsrr1;
+			__u32 esr;
+			__u64 dear;
+			__u64 ivpr;
+			__u64 mcivpr;
+			__u64 mcsr;	/* KVM_SREGS_E_UPDATE_MCSR */
+
+			__u32 tsr;	/* KVM_SREGS_E_UPDATE_TSR */
+			__u32 tcr;
+			__u32 decar;
+			__u32 dec;	/* KVM_SREGS_E_UPDATE_DEC */
+
+			/*
+			 * Userspace can read TB directly, but the
+			 * value reported here is consistent with "dec".
+			 *
+			 * Read-only.
+			 */
+			__u64 tb;
+
+			__u32 dbsr;	/* KVM_SREGS_E_UPDATE_DBSR */
+			__u32 dbcr[3];
+			__u32 iac[4];
+			__u32 dac[2];
+			__u32 dvc[2];
+			__u8 num_iac;	/* read-only */
+			__u8 num_dac;	/* read-only */
+			__u8 num_dvc;	/* read-only */
+			__u8 pad;
+
+			__u32 epr;	/* EXP */
+			__u32 vrsave;	/* a.k.a. USPRG0 */
+			__u32 epcr;	/* KVM_SREGS_E_64 */
+
+			__u32 mas0;
+			__u32 mas1;
+			__u64 mas2;
+			__u64 mas7_3;
+			__u32 mas4;
+			__u32 mas6;
+
+			__u32 ivor_low[16]; /* IVOR0-15 */
+			__u32 ivor_high[18]; /* IVOR32+, plus room to expand */
+
+			__u32 mmucfg;	/* read-only */
+			__u32 eptcfg;	/* E.PT, read-only */
+			__u32 tlbcfg[4];/* read-only */
+			__u32 tlbps[4]; /* read-only */
+
+			__u32 eplc, epsc; /* E.PD */
+		} e;
 		__u8 pad[1020];
 	} u;
 };
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
index d22d39942a92..a0e57618ff33 100644
--- a/arch/powerpc/include/asm/kvm_44x.h
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -61,7 +61,6 @@ static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
 	return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
 }
 
-void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
 void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
 void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
 
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
index bb2a0890600f..7a2a565f88c4 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -59,6 +59,7 @@ struct kvmppc_vcpu_e500 {
 	u32 hid1;
 	u32 tlb0cfg;
 	u32 tlb1cfg;
+	u64 mcar;
 
 	struct kvm_vcpu vcpu;
 };
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index a16804399165..186f150b9b89 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -233,6 +233,9 @@ struct kvm_vcpu_arch {
 	ulong csrr1;
 	ulong dsrr0;
 	ulong dsrr1;
+	ulong mcsrr0;
+	ulong mcsrr1;
+	ulong mcsr;
 	ulong esr;
 	u32 dec;
 	u32 decar;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index ecb3bc74c344..9345238edecf 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -61,6 +61,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
                                       struct kvm_vcpu *vcpu);
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
+extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
 
 /* Core-specific hooks */
 
@@ -142,4 +143,12 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value)
 	return r;
 }
 
+void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+
+void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
+
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 74d0e7421143..da3a1225c0ac 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -107,6 +107,16 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	kvmppc_get_sregs_ivor(vcpu, sregs);
+}
+
+int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	return kvmppc_set_sregs_ivor(vcpu, sregs);
+}
+
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvmppc_vcpu_44x *vcpu_44x;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ef76acb455c3..8462b3a1c1c7 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -569,6 +569,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	kvmppc_set_msr(vcpu, regs->msr);
 	vcpu->arch.shared->srr0 = regs->srr0;
 	vcpu->arch.shared->srr1 = regs->srr1;
+	kvmppc_set_pid(vcpu, regs->pid);
 	vcpu->arch.shared->sprg0 = regs->sprg0;
 	vcpu->arch.shared->sprg1 = regs->sprg1;
 	vcpu->arch.shared->sprg2 = regs->sprg2;
@@ -584,16 +585,165 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	return 0;
 }
 
+static void get_sregs_base(struct kvm_vcpu *vcpu,
+                           struct kvm_sregs *sregs)
+{
+	u64 tb = get_tb();
+
+	sregs->u.e.features |= KVM_SREGS_E_BASE;
+
+	sregs->u.e.csrr0 = vcpu->arch.csrr0;
+	sregs->u.e.csrr1 = vcpu->arch.csrr1;
+	sregs->u.e.mcsr = vcpu->arch.mcsr;
+	sregs->u.e.esr = vcpu->arch.esr;
+	sregs->u.e.dear = vcpu->arch.shared->dar;
+	sregs->u.e.tsr = vcpu->arch.tsr;
+	sregs->u.e.tcr = vcpu->arch.tcr;
+	sregs->u.e.dec = kvmppc_get_dec(vcpu, tb);
+	sregs->u.e.tb = tb;
+	sregs->u.e.vrsave = vcpu->arch.vrsave;
+}
+
+static int set_sregs_base(struct kvm_vcpu *vcpu,
+                          struct kvm_sregs *sregs)
+{
+	if (!(sregs->u.e.features & KVM_SREGS_E_BASE))
+		return 0;
+
+	vcpu->arch.csrr0 = sregs->u.e.csrr0;
+	vcpu->arch.csrr1 = sregs->u.e.csrr1;
+	vcpu->arch.mcsr = sregs->u.e.mcsr;
+	vcpu->arch.esr = sregs->u.e.esr;
+	vcpu->arch.shared->dar = sregs->u.e.dear;
+	vcpu->arch.vrsave = sregs->u.e.vrsave;
+	vcpu->arch.tcr = sregs->u.e.tcr;
+
+	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
+		vcpu->arch.dec = sregs->u.e.dec;
+
+	kvmppc_emulate_dec(vcpu);
+
+	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
+		/*
+		 * FIXME: existing KVM timer handling is incomplete.
+		 * TSR cannot be read by the guest, and its value in
+		 * vcpu->arch is always zero.  For now, just handle
+		 * the case where the caller is trying to inject a
+		 * decrementer interrupt.
+		 */
+
+		if ((sregs->u.e.tsr & TSR_DIS) &&
+		    (vcpu->arch.tcr & TCR_DIE))
+			kvmppc_core_queue_dec(vcpu);
+	}
+
+	return 0;
+}
+
+static void get_sregs_arch206(struct kvm_vcpu *vcpu,
+                              struct kvm_sregs *sregs)
+{
+	sregs->u.e.features |= KVM_SREGS_E_ARCH206;
+
+	sregs->u.e.pir = 0;
+	sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0;
+	sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1;
+	sregs->u.e.decar = vcpu->arch.decar;
+	sregs->u.e.ivpr = vcpu->arch.ivpr;
+}
+
+static int set_sregs_arch206(struct kvm_vcpu *vcpu,
+                             struct kvm_sregs *sregs)
+{
+	if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206))
+		return 0;
+
+	if (sregs->u.e.pir != 0)
+		return -EINVAL;
+
+	vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0;
+	vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1;
+	vcpu->arch.decar = sregs->u.e.decar;
+	vcpu->arch.ivpr = sregs->u.e.ivpr;
+
+	return 0;
+}
+
+void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	sregs->u.e.features |= KVM_SREGS_E_IVOR;
+
+	sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+	sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+	sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+	sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+	sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+	sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+	sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+	sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+	sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+	sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+	sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+	sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+	sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+	sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+	sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+	sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+}
+
+int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
+		return 0;
+
+	vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15];
+
+	return 0;
+}
+
 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
                                   struct kvm_sregs *sregs)
 {
-	return -ENOTSUPP;
+	sregs->pvr = vcpu->arch.pvr;
+
+	get_sregs_base(vcpu, sregs);
+	get_sregs_arch206(vcpu, sregs);
+	kvmppc_core_get_sregs(vcpu, sregs);
+	return 0;
 }
 
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
                                   struct kvm_sregs *sregs)
 {
-	return -ENOTSUPP;
+	int ret;
+
+	if (vcpu->arch.pvr != sregs->pvr)
+		return -EINVAL;
+
+	ret = set_sregs_base(vcpu, sregs);
+	if (ret < 0)
+		return ret;
+
+	ret = set_sregs_arch206(vcpu, sregs);
+	if (ret < 0)
+		return ret;
+
+	return kvmppc_core_set_sregs(vcpu, sregs);
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 0c1af1267843..318dbc61ba44 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -97,6 +97,81 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE |
+	                       KVM_SREGS_E_PM;
+	sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL;
+
+	sregs->u.e.impl.fsl.features = 0;
+	sregs->u.e.impl.fsl.svr = vcpu_e500->svr;
+	sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
+	sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
+
+	sregs->u.e.mas0 = vcpu_e500->mas0;
+	sregs->u.e.mas1 = vcpu_e500->mas1;
+	sregs->u.e.mas2 = vcpu_e500->mas2;
+	sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3;
+	sregs->u.e.mas4 = vcpu_e500->mas4;
+	sregs->u.e.mas6 = vcpu_e500->mas6;
+
+	sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG);
+	sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg;
+	sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg;
+	sregs->u.e.tlbcfg[2] = 0;
+	sregs->u.e.tlbcfg[3] = 0;
+
+	sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
+	sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
+	sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
+	sregs->u.e.ivor_high[3] =
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+
+	kvmppc_get_sregs_ivor(vcpu, sregs);
+}
+
+int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
+		vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
+		vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0;
+		vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
+	}
+
+	if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
+		vcpu_e500->mas0 = sregs->u.e.mas0;
+		vcpu_e500->mas1 = sregs->u.e.mas1;
+		vcpu_e500->mas2 = sregs->u.e.mas2;
+		vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32;
+		vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3;
+		vcpu_e500->mas4 = sregs->u.e.mas4;
+		vcpu_e500->mas6 = sregs->u.e.mas6;
+	}
+
+	if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
+		return 0;
+
+	if (sregs->u.e.features & KVM_SREGS_E_SPE) {
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] =
+			sregs->u.e.ivor_high[0];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] =
+			sregs->u.e.ivor_high[1];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] =
+			sregs->u.e.ivor_high[2];
+	}
+
+	if (sregs->u.e.features & KVM_SREGS_E_PM) {
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] =
+			sregs->u.e.ivor_high[3];
+	}
+
+	return kvmppc_set_sregs_ivor(vcpu, sregs);
+}
+
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500;
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index e2fb47f035a5..69cd665a0caf 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Author: Yu Liu, <yu.liu@freescale.com>
  *
@@ -78,8 +78,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 
 	switch (sprn) {
 	case SPRN_PID:
-		vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
-			vcpu->arch.pid = spr_val;
+		kvmppc_set_pid(vcpu, spr_val);
 		break;
 	case SPRN_PID1:
 		vcpu_e500->pid[1] = spr_val; break;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 56ac4523857d..b18fe353397d 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -675,6 +675,14 @@ int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
 	return -1;
 }
 
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
+		vcpu->arch.pid = pid;
+}
+
 void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
 	struct tlbe *tlbe;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 8f7a3aa03c26..141dce3c6810 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -114,6 +114,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 	}
 }
 
+u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
+{
+	u64 jd = tb - vcpu->arch.dec_jiffies;
+	return vcpu->arch.dec - jd;
+}
+
 /* XXX to do:
  * lhax
  * lhaux
@@ -279,11 +285,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 			case SPRN_DEC:
 			{
-				u64 jd = get_tb() - vcpu->arch.dec_jiffies;
-				kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd);
-				pr_debug("mfDEC: %x - %llx = %lx\n",
-					 vcpu->arch.dec, jd,
-					 kvmppc_get_gpr(vcpu, rt));
+				kvmppc_set_gpr(vcpu, rt,
+					       kvmppc_get_dec(vcpu, get_tb()));
 				break;
 			}
 			default:
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 9e6aa8bfd160..616dd516ca1f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -175,7 +175,11 @@ int kvm_dev_ioctl_check_extension(long ext)
 	int r;
 
 	switch (ext) {
+#ifdef CONFIG_BOOKE
+	case KVM_CAP_PPC_BOOKE_SREGS:
+#else
 	case KVM_CAP_PPC_SEGSTATE:
+#endif
 	case KVM_CAP_PPC_PAIRED_SINGLES:
 	case KVM_CAP_PPC_UNSET_IRQ:
 	case KVM_CAP_PPC_IRQ_LEVEL:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 2f63ebeac639..55ef181521ff 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -543,6 +543,7 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_ASYNC_PF 59
 #define KVM_CAP_TSC_CONTROL 60
 #define KVM_CAP_GET_TSC_KHZ 61
+#define KVM_CAP_PPC_BOOKE_SREGS 62
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3-71-gd317


From 8fa2206821953a50a3a02ea33fcfb3ced2fd9997 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 4 May 2011 16:31:04 +0300
Subject: KVM: make guest mode entry to be rcu quiescent state

KVM does not hold any references to rcu protected data when it switches
CPU into a guest mode. In fact switching to a guest mode is very similar
to exiting to userspase from rcu point of view. In addition CPU may stay
in a guest mode for quite a long time (up to one time slice). Lets treat
guest mode as quiescent state, just like we do with user-mode execution.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0bc3d372e3cb..b9c3299c6a55 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -591,8 +591,17 @@ static inline int kvm_deassign_device(struct kvm *kvm,
 
 static inline void kvm_guest_enter(void)
 {
+	BUG_ON(preemptible());
 	account_system_vtime(current);
 	current->flags |= PF_VCPU;
+	/* KVM does not hold any references to rcu protected data when it
+	 * switches CPU into a guest mode. In fact switching to a guest mode
+	 * is very similar to exiting to userspase from rcu point of view. In
+	 * addition CPU may stay in a guest mode for quite a long time (up to
+	 * one time slice). Lets treat guest mode as quiescent state, just like
+	 * we do with user-mode execution.
+	 */
+	rcu_virt_note_context_switch(smp_processor_id());
 }
 
 static inline void kvm_guest_exit(void)
-- 
cgit v1.2.3-71-gd317


From 34ea646c9f8c18fd2e4332ff3b2b509f878c56f1 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sun, 22 May 2011 18:55:10 +0200
Subject: net: add missing prefetch.h include

Fixes build errors on s390 and probably other archs as well:

  In file included from net/ipv4/ip_forward.c:32:0:
  include/net/udp.h: In function 'udp_csum_outgoing':
  include/net/udp.h:141:2: error: implicit declaration of function 'prefetch'

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/skbuff.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 79aafbbf430a..827681540d6f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -28,6 +28,7 @@
 #include <net/checksum.h>
 #include <linux/rcupdate.h>
 #include <linux/dmaengine.h>
+#include <linux/prefetch.h>
 #include <linux/hrtimer.h>
 
 /* Don't change this without changing skb_csum_unnecessary! */
-- 
cgit v1.2.3-71-gd317


From 0fcbe742eaac14bd5032b369c09e9d94be9058ad Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 22 May 2011 20:35:29 -0400
Subject: net: Remove prefetches from SKB list handlers.

Noticed by Linus.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 827681540d6f..09901fdd73ae 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1783,7 +1783,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 
 #define skb_queue_walk(queue, skb) \
 		for (skb = (queue)->next;					\
-		     prefetch(skb->next), (skb != (struct sk_buff *)(queue));	\
+		     (skb != (struct sk_buff *)(queue));			\
 		     skb = skb->next)
 
 #define skb_queue_walk_safe(queue, skb, tmp)					\
@@ -1792,7 +1792,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 		     skb = tmp, tmp = skb->next)
 
 #define skb_queue_walk_from(queue, skb)						\
-		for (; prefetch(skb->next), (skb != (struct sk_buff *)(queue));	\
+		for (; (skb != (struct sk_buff *)(queue));			\
 		     skb = skb->next)
 
 #define skb_queue_walk_from_safe(queue, skb, tmp)				\
@@ -1802,7 +1802,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 
 #define skb_queue_reverse_walk(queue, skb) \
 		for (skb = (queue)->prev;					\
-		     prefetch(skb->prev), (skb != (struct sk_buff *)(queue));	\
+		     (skb != (struct sk_buff *)(queue));			\
 		     skb = skb->prev)
 
 #define skb_queue_reverse_walk_safe(queue, skb, tmp)				\
-- 
cgit v1.2.3-71-gd317


From 67f11f4deda0818640decb19a28c537dbe5d429e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 22 May 2011 20:54:11 -0400
Subject: net: Remove linux/prefetch.h include from linux/skbuff.h

No longer needed.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 09901fdd73ae..8cac356b77b2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -28,7 +28,6 @@
 #include <net/checksum.h>
 #include <linux/rcupdate.h>
 #include <linux/dmaengine.h>
-#include <linux/prefetch.h>
 #include <linux/hrtimer.h>
 
 /* Don't change this without changing skb_csum_unnecessary! */
-- 
cgit v1.2.3-71-gd317


From 8d8fc29d02a33e4bd5f4fa47823c1fd386346093 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Thu, 19 May 2011 21:39:10 +0000
Subject: netpoll: disable netpoll when enslave a device

V3: rename NETDEV_ENSLAVE to NETDEV_JOIN

Currently we do nothing when we enslave a net device which is running netconsole.
Neil pointed out that we may get weird results in such case, so let's disable
netpoll on the device being enslaved. I think it is too harsh to prevent
the device being ensalved if it is running netconsole.

By the way, this patch also removes the NETDEV_GOING_DOWN from netconsole
netdev notifier, because netpoll will check if the device is running or not
and we don't handle NETDEV_PRE_UP neither.

This patch is based on net-next-2.6.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Neil Horman <nhorman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  2 ++
 drivers/net/netconsole.c        | 26 +++++++++++++++++---------
 include/linux/notifier.h        |  1 +
 3 files changed, 20 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 088fd845ffdf..f4960f516c39 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1640,6 +1640,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		}
 	}
 
+	call_netdevice_notifiers(NETDEV_JOIN, slave_dev);
+
 	/* If this is the first slave, then we need to set the master's hardware
 	 * address to be the same as the slave's. */
 	if (is_zero_ether_addr(bond->dev->dev_addr))
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index a83e101440fd..4190786de403 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -621,11 +621,10 @@ static int netconsole_netdev_event(struct notifier_block *this,
 	bool stopped = false;
 
 	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER ||
-	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN))
+	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_JOIN))
 		goto done;
 
 	spin_lock_irqsave(&target_list_lock, flags);
-restart:
 	list_for_each_entry(nt, &target_list, list) {
 		netconsole_target_get(nt);
 		if (nt->np.dev == dev) {
@@ -633,6 +632,8 @@ restart:
 			case NETDEV_CHANGENAME:
 				strlcpy(nt->np.dev_name, dev->name, IFNAMSIZ);
 				break;
+			case NETDEV_BONDING_DESLAVE:
+			case NETDEV_JOIN:
 			case NETDEV_UNREGISTER:
 				/*
 				 * rtnl_lock already held
@@ -647,11 +648,7 @@ restart:
 					dev_put(nt->np.dev);
 					nt->np.dev = NULL;
 					netconsole_target_put(nt);
-					goto restart;
 				}
-				/* Fall through */
-			case NETDEV_GOING_DOWN:
-			case NETDEV_BONDING_DESLAVE:
 				nt->enabled = 0;
 				stopped = true;
 				break;
@@ -660,10 +657,21 @@ restart:
 		netconsole_target_put(nt);
 	}
 	spin_unlock_irqrestore(&target_list_lock, flags);
-	if (stopped && (event == NETDEV_UNREGISTER || event == NETDEV_BONDING_DESLAVE))
+	if (stopped) {
 		printk(KERN_INFO "netconsole: network logging stopped on "
-			"interface %s as it %s\n",  dev->name,
-			event == NETDEV_UNREGISTER ? "unregistered" : "released slaves");
+		       "interface %s as it ", dev->name);
+		switch (event) {
+		case NETDEV_UNREGISTER:
+			printk(KERN_CONT "unregistered\n");
+			break;
+		case NETDEV_BONDING_DESLAVE:
+			printk(KERN_CONT "released slaves\n");
+			break;
+		case NETDEV_JOIN:
+			printk(KERN_CONT "is joining a master device\n");
+			break;
+		}
+	}
 
 done:
 	return NOTIFY_DONE;
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 621dfa16acc0..a577762afbe7 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -211,6 +211,7 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_UNREGISTER_BATCH 0x0011
 #define NETDEV_BONDING_DESLAVE  0x0012
 #define NETDEV_NOTIFY_PEERS	0x0013
+#define NETDEV_JOIN		0x0014
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
-- 
cgit v1.2.3-71-gd317


From daf9209bb2c8b07ca025eac82e3d175534086c77 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Thu, 19 May 2011 21:39:12 +0000
Subject: net: rename NETDEV_BONDING_DESLAVE to NETDEV_RELEASE

s/NETDEV_BONDING_DESLAVE/NETDEV_RELEASE/ as Andy suggested.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Andy Gospodarek <andy@greyhouse.net>
Cc: Neil Horman <nhorman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 2 +-
 drivers/net/netconsole.c        | 6 +++---
 include/linux/notifier.h        | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f4960f516c39..6dc428461541 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1974,7 +1974,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 	}
 
 	block_netpoll_tx();
-	netdev_bonding_change(bond_dev, NETDEV_BONDING_DESLAVE);
+	netdev_bonding_change(bond_dev, NETDEV_RELEASE);
 	write_lock_bh(&bond->lock);
 
 	slave = bond_get_slave_by_dev(bond, slave_dev);
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 4190786de403..dfc82720065a 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -621,7 +621,7 @@ static int netconsole_netdev_event(struct notifier_block *this,
 	bool stopped = false;
 
 	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER ||
-	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_JOIN))
+	      event == NETDEV_RELEASE || event == NETDEV_JOIN))
 		goto done;
 
 	spin_lock_irqsave(&target_list_lock, flags);
@@ -632,7 +632,7 @@ static int netconsole_netdev_event(struct notifier_block *this,
 			case NETDEV_CHANGENAME:
 				strlcpy(nt->np.dev_name, dev->name, IFNAMSIZ);
 				break;
-			case NETDEV_BONDING_DESLAVE:
+			case NETDEV_RELEASE:
 			case NETDEV_JOIN:
 			case NETDEV_UNREGISTER:
 				/*
@@ -664,7 +664,7 @@ static int netconsole_netdev_event(struct notifier_block *this,
 		case NETDEV_UNREGISTER:
 			printk(KERN_CONT "unregistered\n");
 			break;
-		case NETDEV_BONDING_DESLAVE:
+		case NETDEV_RELEASE:
 			printk(KERN_CONT "released slaves\n");
 			break;
 		case NETDEV_JOIN:
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index a577762afbe7..c0688b0168b3 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -209,7 +209,7 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_POST_TYPE_CHANGE	0x000F
 #define NETDEV_POST_INIT	0x0010
 #define NETDEV_UNREGISTER_BATCH 0x0011
-#define NETDEV_BONDING_DESLAVE  0x0012
+#define NETDEV_RELEASE		0x0012
 #define NETDEV_NOTIFY_PEERS	0x0013
 #define NETDEV_JOIN		0x0014
 
-- 
cgit v1.2.3-71-gd317


From c4264f27e83968ddfe3f0cfe7a33adfb320e1e42 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sat, 21 May 2011 19:46:09 +0000
Subject: net: skb_trim explicitely check the linearity instead of data_len

The purpose of the check on data_len is to check linearity, so use the inline
helper for this. No overhead and more explicit.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8cac356b77b2..aeaad97e6815 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1442,7 +1442,7 @@ extern int ___pskb_trim(struct sk_buff *skb, unsigned int len);
 
 static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
 {
-	if (unlikely(skb->data_len)) {
+	if (unlikely(skb_is_nonlinear(skb))) {
 		WARN_ON(1);
 		return;
 	}
-- 
cgit v1.2.3-71-gd317


From 792d4b5cb16b684958c2590f77688667ddec1f61 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sun, 22 May 2011 07:08:11 +0000
Subject: net: filter: move forward declarations to avoid compile warnings

Get rid of this compile warning:

In file included from arch/s390/kernel/compat_linux.c:37:0:
include/linux/filter.h:139:23: warning: 'struct sk_buff' declared inside parameter list

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4609b85e559d..9ee3f9fb0b4a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -131,6 +131,10 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define SKF_LL_OFF    (-0x200000)
 
 #ifdef __KERNEL__
+
+struct sk_buff;
+struct sock;
+
 struct sk_filter
 {
 	atomic_t		refcnt;
@@ -146,9 +150,6 @@ static inline unsigned int sk_filter_len(const struct sk_filter *fp)
 	return fp->len * sizeof(struct sock_filter) + sizeof(*fp);
 }
 
-struct sk_buff;
-struct sock;
-
 extern int sk_filter(struct sock *sk, struct sk_buff *skb);
 extern unsigned int sk_run_filter(const struct sk_buff *skb,
 				  const struct sock_filter *filter);
-- 
cgit v1.2.3-71-gd317


From a1e4891fd48d298870b704c6eb48cba0da5ed6b1 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 22 May 2011 16:51:43 -0700
Subject: Remove prefetch() from <linux/skbuff.h> and "netlabel_addrlist.h"

Commit e66eed651fd1 ("list: remove prefetching from regular list
iterators") removed the include of prefetch.h from list.h.  The skbuff
list traversal still had them.

Quoth David Miller:
  "Please just remove the prefetches.

  Those are modelled after list.h as I intend to eventually convert
  SKB list handling to "struct list_head" but we're not there yet.

  Therefore if we kill prefetches from list.h we should kill it from
  these things in skbuff.h too."

Requested-by: David Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/skbuff.h           | 7 +++----
 net/netlabel/netlabel_addrlist.h | 8 ++++----
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 827681540d6f..16c9c091555d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -28,7 +28,6 @@
 #include <net/checksum.h>
 #include <linux/rcupdate.h>
 #include <linux/dmaengine.h>
-#include <linux/prefetch.h>
 #include <linux/hrtimer.h>
 
 /* Don't change this without changing skb_csum_unnecessary! */
@@ -1783,7 +1782,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 
 #define skb_queue_walk(queue, skb) \
 		for (skb = (queue)->next;					\
-		     prefetch(skb->next), (skb != (struct sk_buff *)(queue));	\
+		     skb != (struct sk_buff *)(queue);				\
 		     skb = skb->next)
 
 #define skb_queue_walk_safe(queue, skb, tmp)					\
@@ -1792,7 +1791,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 		     skb = tmp, tmp = skb->next)
 
 #define skb_queue_walk_from(queue, skb)						\
-		for (; prefetch(skb->next), (skb != (struct sk_buff *)(queue));	\
+		for (; skb != (struct sk_buff *)(queue);			\
 		     skb = skb->next)
 
 #define skb_queue_walk_from_safe(queue, skb, tmp)				\
@@ -1802,7 +1801,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 
 #define skb_queue_reverse_walk(queue, skb) \
 		for (skb = (queue)->prev;					\
-		     prefetch(skb->prev), (skb != (struct sk_buff *)(queue));	\
+		     skb != (struct sk_buff *)(queue);				\
 		     skb = skb->prev)
 
 #define skb_queue_reverse_walk_safe(queue, skb, tmp)				\
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index 1c1c093cf279..2b9644e19de0 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -96,12 +96,12 @@ static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s,
 
 #define netlbl_af4list_foreach(iter, head)				\
 	for (iter = __af4list_valid((head)->next, head);		\
-	     prefetch(iter->list.next), &iter->list != (head);		\
+	     &iter->list != (head);					\
 	     iter = __af4list_valid(iter->list.next, head))
 
 #define netlbl_af4list_foreach_rcu(iter, head)				\
 	for (iter = __af4list_valid_rcu((head)->next, head);		\
-	     prefetch(iter->list.next),	&iter->list != (head);		\
+	     &iter->list != (head);					\
 	     iter = __af4list_valid_rcu(iter->list.next, head))
 
 #define netlbl_af4list_foreach_safe(iter, tmp, head)			\
@@ -163,12 +163,12 @@ static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s,
 
 #define netlbl_af6list_foreach(iter, head)				\
 	for (iter = __af6list_valid((head)->next, head);		\
-	     prefetch(iter->list.next),	&iter->list != (head);		\
+	     &iter->list != (head);					\
 	     iter = __af6list_valid(iter->list.next, head))
 
 #define netlbl_af6list_foreach_rcu(iter, head)				\
 	for (iter = __af6list_valid_rcu((head)->next, head);		\
-	     prefetch(iter->list.next),	&iter->list != (head);		\
+	     &iter->list != (head);					\
 	     iter = __af6list_valid_rcu(iter->list.next, head))
 
 #define netlbl_af6list_foreach_safe(iter, tmp, head)			\
-- 
cgit v1.2.3-71-gd317


From 586692a5a5fc5740c8a46abc0f2365495c2d7c5f Mon Sep 17 00:00:00 2001
From: Mandeep Singh Baines <msb@chromium.org>
Date: Sun, 22 May 2011 22:10:22 -0700
Subject: watchdog: Disable watchdog when thresh is zero

This restores the previous behavior of softlock_thresh.

Currently, setting watchdog_thresh to zero causes the watchdog
kthreads to consume a lot of CPU.

In addition, the logic of proc_dowatchdog_thresh and
proc_dowatchdog_enabled has been factored into proc_dowatchdog.

Signed-off-by: Mandeep Singh Baines <msb@chromium.org>
Cc: Marcin Slusarz <marcin.slusarz@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/1306127423-3347-3-git-send-email-msb@chromium.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <20110517071018.GE22305@elte.hu>
---
 include/linux/nmi.h   |  5 +++--
 include/linux/sched.h |  1 -
 kernel/sysctl.c       | 12 ++++++++----
 kernel/watchdog.c     | 25 +++++++++----------------
 4 files changed, 20 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index c536f8545f74..5317b8b2198f 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -47,9 +47,10 @@ static inline bool trigger_all_cpu_backtrace(void)
 int hw_nmi_is_cpu_stuck(struct pt_regs *);
 u64 hw_nmi_get_sample_period(void);
 extern int watchdog_enabled;
+extern int watchdog_thresh;
 struct ctl_table;
-extern int proc_dowatchdog_enabled(struct ctl_table *, int ,
-			void __user *, size_t *, loff_t *);
+extern int proc_dowatchdog(struct ctl_table *, int ,
+			   void __user *, size_t *, loff_t *);
 #endif
 
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 12211e1666e2..d8b2d0bec0d8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -315,7 +315,6 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
 				  void __user *buffer,
 				  size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
-extern int softlockup_thresh;
 void lockup_detector_init(void);
 #else
 static inline void touch_softlockup_watchdog(void)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c0bb32414b17..3dd0c46fa3bb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -730,14 +730,16 @@ static struct ctl_table kern_table[] = {
 		.data           = &watchdog_enabled,
 		.maxlen         = sizeof (int),
 		.mode           = 0644,
-		.proc_handler   = proc_dowatchdog_enabled,
+		.proc_handler   = proc_dowatchdog,
+		.extra1		= &zero,
+		.extra2		= &one,
 	},
 	{
 		.procname	= "watchdog_thresh",
-		.data		= &softlockup_thresh,
+		.data		= &watchdog_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dowatchdog_thresh,
+		.proc_handler	= proc_dowatchdog,
 		.extra1		= &neg_one,
 		.extra2		= &sixty,
 	},
@@ -755,7 +757,9 @@ static struct ctl_table kern_table[] = {
 		.data           = &watchdog_enabled,
 		.maxlen         = sizeof (int),
 		.mode           = 0644,
-		.proc_handler   = proc_dowatchdog_enabled,
+		.proc_handler   = proc_dowatchdog,
+		.extra1		= &zero,
+		.extra2		= &one,
 	},
 #endif
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index cf0e09f452e7..60301916f62e 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -28,7 +28,7 @@
 #include <linux/perf_event.h>
 
 int watchdog_enabled = 1;
-int __read_mostly softlockup_thresh = 60;
+int __read_mostly watchdog_thresh = 60;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
@@ -105,12 +105,12 @@ static unsigned long get_timestamp(int this_cpu)
 static unsigned long get_sample_period(void)
 {
 	/*
-	 * convert softlockup_thresh from seconds to ns
+	 * convert watchdog_thresh from seconds to ns
 	 * the divide by 5 is to give hrtimer 5 chances to
 	 * increment before the hardlockup detector generates
 	 * a warning
 	 */
-	return softlockup_thresh * (NSEC_PER_SEC / 5);
+	return watchdog_thresh * (NSEC_PER_SEC / 5);
 }
 
 /* Commands for resetting the watchdog */
@@ -182,7 +182,7 @@ static int is_softlockup(unsigned long touch_ts)
 	unsigned long now = get_timestamp(smp_processor_id());
 
 	/* Warn about unreasonable delays: */
-	if (time_after(now, touch_ts + softlockup_thresh))
+	if (time_after(now, touch_ts + watchdog_thresh))
 		return now - touch_ts;
 
 	return 0;
@@ -501,19 +501,19 @@ static void watchdog_disable_all_cpus(void)
 /* sysctl functions */
 #ifdef CONFIG_SYSCTL
 /*
- * proc handler for /proc/sys/kernel/nmi_watchdog
+ * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh
  */
 
-int proc_dowatchdog_enabled(struct ctl_table *table, int write,
-		     void __user *buffer, size_t *length, loff_t *ppos)
+int proc_dowatchdog(struct ctl_table *table, int write,
+		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 
-	ret = proc_dointvec(table, write, buffer, length, ppos);
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	if (ret || !write)
 		goto out;
 
-	if (watchdog_enabled)
+	if (watchdog_enabled && watchdog_thresh)
 		watchdog_enable_all_cpus();
 	else
 		watchdog_disable_all_cpus();
@@ -521,13 +521,6 @@ int proc_dowatchdog_enabled(struct ctl_table *table, int write,
 out:
 	return ret;
 }
-
-int proc_dowatchdog_thresh(struct ctl_table *table, int write,
-			     void __user *buffer,
-			     size_t *lenp, loff_t *ppos)
-{
-	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-}
 #endif /* CONFIG_SYSCTL */
 
 
-- 
cgit v1.2.3-71-gd317


From 4eec42f392043063d0f019640b4ccc2a45570002 Mon Sep 17 00:00:00 2001
From: Mandeep Singh Baines <msb@chromium.org>
Date: Sun, 22 May 2011 22:10:23 -0700
Subject: watchdog: Change the default timeout and configure nmi watchdog
 period based on watchdog_thresh

Before the conversion of the NMI watchdog to perf event, the
watchdog timeout was 5 seconds. Now it is 60 seconds. For my
particular application, netbooks, 5 seconds was a better
timeout. With a short timeout, we catch faults earlier and are
able to send back a panic. With a 60 second timeout, the user is
unlikely to wait and will instead hit the power button, causing
us to lose the panic info.

This change configures the NMI period to watchdog_thresh and
sets the softlockup_thresh to watchdog_thresh * 2. In addition,
watchdog_thresh was reduced to 10 seconds as suggested by Ingo
Molnar.

Signed-off-by: Mandeep Singh Baines <msb@chromium.org>
Cc: Marcin Slusarz <marcin.slusarz@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/1306127423-3347-4-git-send-email-msb@chromium.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <20110517071642.GF22305@elte.hu>
---
 arch/x86/kernel/apic/hw_nmi.c |  4 ++--
 include/linux/nmi.h           |  2 +-
 kernel/watchdog.c             | 19 +++++++++++++++----
 3 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 5260fe91bcb6..d5e57db0f7be 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -19,9 +19,9 @@
 #include <linux/delay.h>
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
-u64 hw_nmi_get_sample_period(void)
+u64 hw_nmi_get_sample_period(int watchdog_thresh)
 {
-	return (u64)(cpu_khz) * 1000 * 60;
+	return (u64)(cpu_khz) * 1000 * watchdog_thresh;
 }
 #endif
 
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 5317b8b2198f..2d304efc89df 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -45,7 +45,7 @@ static inline bool trigger_all_cpu_backtrace(void)
 
 #ifdef CONFIG_LOCKUP_DETECTOR
 int hw_nmi_is_cpu_stuck(struct pt_regs *);
-u64 hw_nmi_get_sample_period(void);
+u64 hw_nmi_get_sample_period(int watchdog_thresh);
 extern int watchdog_enabled;
 extern int watchdog_thresh;
 struct ctl_table;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 60301916f62e..6e63097fa73a 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -28,7 +28,7 @@
 #include <linux/perf_event.h>
 
 int watchdog_enabled = 1;
-int __read_mostly watchdog_thresh = 60;
+int __read_mostly watchdog_thresh = 10;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
@@ -91,6 +91,17 @@ static int __init nosoftlockup_setup(char *str)
 __setup("nosoftlockup", nosoftlockup_setup);
 /*  */
 
+/*
+ * Hard-lockup warnings should be triggered after just a few seconds. Soft-
+ * lockups can have false positives under extreme conditions. So we generally
+ * want a higher threshold for soft lockups than for hard lockups. So we couple
+ * the thresholds with a factor: we make the soft threshold twice the amount of
+ * time the hard threshold is.
+ */
+static int get_softlockup_thresh()
+{
+	return watchdog_thresh * 2;
+}
 
 /*
  * Returns seconds, approximately.  We don't need nanosecond
@@ -110,7 +121,7 @@ static unsigned long get_sample_period(void)
 	 * increment before the hardlockup detector generates
 	 * a warning
 	 */
-	return watchdog_thresh * (NSEC_PER_SEC / 5);
+	return get_softlockup_thresh() * (NSEC_PER_SEC / 5);
 }
 
 /* Commands for resetting the watchdog */
@@ -182,7 +193,7 @@ static int is_softlockup(unsigned long touch_ts)
 	unsigned long now = get_timestamp(smp_processor_id());
 
 	/* Warn about unreasonable delays: */
-	if (time_after(now, touch_ts + watchdog_thresh))
+	if (time_after(now, touch_ts + get_softlockup_thresh()))
 		return now - touch_ts;
 
 	return 0;
@@ -359,7 +370,7 @@ static int watchdog_nmi_enable(int cpu)
 
 	/* Try to register using hardware perf events */
 	wd_attr = &wd_hw_attr;
-	wd_attr->sample_period = hw_nmi_get_sample_period();
+	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
 	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback);
 	if (!IS_ERR(event)) {
 		printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
-- 
cgit v1.2.3-71-gd317


From 9ec2690758a5467f24beb301cca5098078073bba Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 20 May 2011 16:18:50 +0200
Subject: timerfd: Manage cancelable timers in timerfd

Peter is concerned about the extra scan of CLOCK_REALTIME_COS in the
timer interrupt. Yes, I did not think about it, because the solution
was so elegant. I didn't like the extra list in timerfd when it was
proposed some time ago, but with a rcu based list the list walk it's
less horrible than the original global lock, which was held over the
list iteration.

Requested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Peter Zijlstra <peterz@infradead.org>
---
 fs/timerfd.c            | 105 ++++++++++++++++++++++++++++++++++--------------
 include/linux/hrtimer.h |   6 ++-
 include/linux/time.h    |   6 ---
 include/linux/timerfd.h |   4 +-
 kernel/hrtimer.c        |  94 +++++++++++++++----------------------------
 5 files changed, 113 insertions(+), 102 deletions(-)

(limited to 'include/linux')

diff --git a/fs/timerfd.c b/fs/timerfd.c
index 7e14c9e7c4ee..f67acbdda5e8 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -22,6 +22,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/timerfd.h>
 #include <linux/syscalls.h>
+#include <linux/rcupdate.h>
 
 struct timerfd_ctx {
 	struct hrtimer tmr;
@@ -31,9 +32,14 @@ struct timerfd_ctx {
 	u64 ticks;
 	int expired;
 	int clockid;
+	struct rcu_head rcu;
+	struct list_head clist;
 	bool might_cancel;
 };
 
+static LIST_HEAD(cancel_list);
+static DEFINE_SPINLOCK(cancel_lock);
+
 /*
  * This gets called when the timer event triggers. We set the "expired"
  * flag, but we do not re-arm the timer (in case it's necessary,
@@ -53,28 +59,69 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
 	return HRTIMER_NORESTART;
 }
 
-static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
+/*
+ * Called when the clock was set to cancel the timers in the cancel
+ * list.
+ */
+void timerfd_clock_was_set(void)
 {
-	ktime_t remaining;
+	ktime_t moffs = ktime_get_monotonic_offset();
+	struct timerfd_ctx *ctx;
+	unsigned long flags;
 
-	remaining = hrtimer_expires_remaining(&ctx->tmr);
-	return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
+	rcu_read_lock();
+	list_for_each_entry_rcu(ctx, &cancel_list, clist) {
+		if (!ctx->might_cancel)
+			continue;
+		spin_lock_irqsave(&ctx->wqh.lock, flags);
+		if (ctx->moffs.tv64 != moffs.tv64) {
+			ctx->moffs.tv64 = KTIME_MAX;
+			wake_up_locked(&ctx->wqh);
+		}
+		spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+	}
+	rcu_read_unlock();
 }
 
-static bool timerfd_canceled(struct timerfd_ctx *ctx)
+static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
 {
-	ktime_t moffs;
+	if (ctx->might_cancel) {
+		ctx->might_cancel = false;
+		spin_lock(&cancel_lock);
+		list_del_rcu(&ctx->clist);
+		spin_unlock(&cancel_lock);
+	}
+}
 
-	if (!ctx->might_cancel)
+static bool timerfd_canceled(struct timerfd_ctx *ctx)
+{
+	if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
 		return false;
+	ctx->moffs = ktime_get_monotonic_offset();
+	return true;
+}
 
-	moffs = ktime_get_monotonic_offset();
+static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
+{
+	if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) &&
+	    (flags & TFD_TIMER_CANCEL_ON_SET)) {
+		if (!ctx->might_cancel) {
+			ctx->might_cancel = true;
+			spin_lock(&cancel_lock);
+			list_add_rcu(&ctx->clist, &cancel_list);
+			spin_unlock(&cancel_lock);
+		}
+	} else if (ctx->might_cancel) {
+		timerfd_remove_cancel(ctx);
+	}
+}
 
-	if (moffs.tv64 == ctx->moffs.tv64)
-		return false;
+static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
+{
+	ktime_t remaining;
 
-	ctx->moffs = moffs;
-	return true;
+	remaining = hrtimer_expires_remaining(&ctx->tmr);
+	return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
 }
 
 static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
@@ -87,13 +134,6 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
 	htmode = (flags & TFD_TIMER_ABSTIME) ?
 		HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
 
-	ctx->might_cancel = false;
-	if (htmode == HRTIMER_MODE_ABS && ctx->clockid == CLOCK_REALTIME &&
-	    (flags & TFD_TIMER_CANCELON_SET)) {
-		clockid = CLOCK_REALTIME_COS;
-		ctx->might_cancel = true;
-	}
-
 	texp = timespec_to_ktime(ktmr->it_value);
 	ctx->expired = 0;
 	ctx->ticks = 0;
@@ -113,8 +153,9 @@ static int timerfd_release(struct inode *inode, struct file *file)
 {
 	struct timerfd_ctx *ctx = file->private_data;
 
+	timerfd_remove_cancel(ctx);
 	hrtimer_cancel(&ctx->tmr);
-	kfree(ctx);
+	kfree_rcu(ctx, rcu);
 	return 0;
 }
 
@@ -149,20 +190,20 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
 	else
 		res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
 
+	/*
+	 * If clock has changed, we do not care about the
+	 * ticks and we do not rearm the timer. Userspace must
+	 * reevaluate anyway.
+	 */
+	if (timerfd_canceled(ctx)) {
+		ctx->ticks = 0;
+		ctx->expired = 0;
+		res = -ECANCELED;
+	}
+
 	if (ctx->ticks) {
 		ticks = ctx->ticks;
 
-		/*
-		 * If clock has changed, we do not care about the
-		 * ticks and we do not rearm the timer. Userspace must
-		 * reevaluate anyway.
-		 */
-		if (timerfd_canceled(ctx)) {
-			ticks = 0;
-			ctx->expired = 0;
-			res = -ECANCELED;
-		}
-
 		if (ctx->expired && ctx->tintv.tv64) {
 			/*
 			 * If tintv.tv64 != 0, this is a periodic timer that
@@ -258,6 +299,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 		return PTR_ERR(file);
 	ctx = file->private_data;
 
+	timerfd_setup_cancel(ctx, flags);
+
 	/*
 	 * We need to stop the existing timer before reprogramming
 	 * it to the new values.
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index eda4ccde0730..925c8c01db7b 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -155,7 +155,6 @@ enum  hrtimer_base_type {
 	HRTIMER_BASE_REALTIME,
 	HRTIMER_BASE_MONOTONIC,
 	HRTIMER_BASE_BOOTTIME,
-	HRTIMER_BASE_REALTIME_COS,
 	HRTIMER_MAX_CLOCK_BASES,
 };
 
@@ -306,6 +305,11 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
 #endif
 
 extern void clock_was_set(void);
+#ifdef CONFIG_TIMERFD
+extern void timerfd_clock_was_set(void);
+#else
+static inline void timerfd_clock_was_set(void) { }
+#endif
 extern void hrtimers_resume(void);
 
 extern ktime_t ktime_get(void);
diff --git a/include/linux/time.h b/include/linux/time.h
index a9242773eb24..b3061782dec3 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -302,12 +302,6 @@ struct itimerval {
  * The IDs of various hardware clocks:
  */
 #define CLOCK_SGI_CYCLE			10
-
-#ifdef __KERNEL__
-/* This clock is not exposed to user space */
-#define CLOCK_REALTIME_COS		15
-#endif
-
 #define MAX_CLOCKS			16
 #define CLOCKS_MASK			(CLOCK_REALTIME | CLOCK_MONOTONIC)
 #define CLOCKS_MONO			CLOCK_MONOTONIC
diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h
index e9571fc8f1a0..d3b57fa12225 100644
--- a/include/linux/timerfd.h
+++ b/include/linux/timerfd.h
@@ -19,7 +19,7 @@
  * shared O_* flags.
  */
 #define TFD_TIMER_ABSTIME (1 << 0)
-#define TFD_TIMER_CANCELON_SET (1 << 1)
+#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
 #define TFD_CLOEXEC O_CLOEXEC
 #define TFD_NONBLOCK O_NONBLOCK
 
@@ -27,6 +27,6 @@
 /* Flags for timerfd_create.  */
 #define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS
 /* Flags for timerfd_settime.  */
-#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCELON_SET)
+#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)
 
 #endif /* _LINUX_TIMERFD_H */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index eabcbd781433..26dd32f9f6b2 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -78,11 +78,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
 			.get_time = &ktime_get_boottime,
 			.resolution = KTIME_LOW_RES,
 		},
-		{
-			.index = CLOCK_REALTIME_COS,
-			.get_time = &ktime_get_real,
-			.resolution = KTIME_LOW_RES,
-		},
 	}
 };
 
@@ -90,7 +85,6 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
 	[CLOCK_REALTIME]	= HRTIMER_BASE_REALTIME,
 	[CLOCK_MONOTONIC]	= HRTIMER_BASE_MONOTONIC,
 	[CLOCK_BOOTTIME]	= HRTIMER_BASE_BOOTTIME,
-	[CLOCK_REALTIME_COS]	= HRTIMER_BASE_REALTIME_COS,
 };
 
 static inline int hrtimer_clockid_to_base(clockid_t clock_id)
@@ -116,7 +110,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
 	base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
 	base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
 	base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
-	base->clock_base[HRTIMER_BASE_REALTIME_COS].softirq_time = xtim;
 }
 
 /*
@@ -486,8 +479,6 @@ static inline void debug_deactivate(struct hrtimer *timer)
 	trace_hrtimer_cancel(timer);
 }
 
-static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base);
-
 /* High resolution timer related functions */
 #ifdef CONFIG_HIGH_RES_TIMERS
 
@@ -663,7 +654,33 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 	return 0;
 }
 
-static void retrigger_next_event(void *arg);
+/*
+ * Retrigger next event is called after clock was set
+ *
+ * Called with interrupts disabled via on_each_cpu()
+ */
+static void retrigger_next_event(void *arg)
+{
+	struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
+	struct timespec realtime_offset, xtim, wtm, sleep;
+
+	if (!hrtimer_hres_active())
+		return;
+
+	/* Optimized out for !HIGH_RES */
+	get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
+	set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
+
+	/* Adjust CLOCK_REALTIME offset */
+	raw_spin_lock(&base->lock);
+	base->clock_base[HRTIMER_BASE_REALTIME].offset =
+		timespec_to_ktime(realtime_offset);
+	base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
+		timespec_to_ktime(sleep);
+
+	hrtimer_force_reprogram(base, 0);
+	raw_spin_unlock(&base->lock);
+}
 
 /*
  * Switch to high resolution mode
@@ -711,45 +728,10 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 	return 0;
 }
 static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
+static inline void retrigger_next_event(void *arg) { }
 
 #endif /* CONFIG_HIGH_RES_TIMERS */
 
-/*
- * Retrigger next event is called after clock was set
- *
- * Called with interrupts disabled via on_each_cpu()
- */
-static void retrigger_next_event(void *arg)
-{
-	struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
-	struct timespec realtime_offset, xtim, wtm, sleep;
-
-	if (!hrtimer_hres_active()) {
-		raw_spin_lock(&base->lock);
-		hrtimer_expire_cancelable(base);
-		raw_spin_unlock(&base->lock);
-		return;
-	}
-
-	/* Optimized out for !HIGH_RES */
-	get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
-	set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
-
-	/* Adjust CLOCK_REALTIME offset */
-	raw_spin_lock(&base->lock);
-	base->clock_base[HRTIMER_BASE_REALTIME].offset =
-		timespec_to_ktime(realtime_offset);
-	base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
-		timespec_to_ktime(sleep);
-	base->clock_base[HRTIMER_BASE_REALTIME_COS].offset =
-		timespec_to_ktime(realtime_offset);
-
-	hrtimer_expire_cancelable(base);
-
-	hrtimer_force_reprogram(base, 0);
-	raw_spin_unlock(&base->lock);
-}
-
 /*
  * Clock realtime was set
  *
@@ -763,8 +745,11 @@ static void retrigger_next_event(void *arg)
  */
 void clock_was_set(void)
 {
+#ifdef CONFIG_HIGHRES_TIMERS
 	/* Retrigger the CPU local events everywhere */
 	on_each_cpu(retrigger_next_event, NULL, 1);
+#endif
+	timerfd_clock_was_set();
 }
 
 /*
@@ -777,6 +762,7 @@ void hrtimers_resume(void)
 		  KERN_INFO "hrtimers_resume() called with IRQs enabled!");
 
 	retrigger_next_event(NULL);
+	timerfd_clock_was_set();
 }
 
 static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
@@ -1240,22 +1226,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
 	timer->state &= ~HRTIMER_STATE_CALLBACK;
 }
 
-static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base)
-{
-	struct timerqueue_node *node;
-	struct hrtimer_clock_base *base;
-	ktime_t now = ktime_get_real();
-
-	base = &cpu_base->clock_base[HRTIMER_BASE_REALTIME_COS];
-
-	while ((node = timerqueue_getnext(&base->active))) {
-			struct hrtimer *timer;
-
-			timer = container_of(node, struct hrtimer, node);
-			__run_hrtimer(timer, &now);
-	}
-}
-
 #ifdef CONFIG_HIGH_RES_TIMERS
 
 /*
-- 
cgit v1.2.3-71-gd317


From f24444b01bf6c51c300fd3ffc73423383d747882 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 20 May 2011 13:02:58 +0200
Subject: hrtimers: Make struct hrtimer_cpu_base layout less stupid

In the HIGHRES=y case we access the members at the end of struct
hrtimer_cpu_base first and then the one at the beginning. Move the
hrtimer data to front, so we have linear progressing access.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/hrtimer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 925c8c01db7b..cc5f5f51db10 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -174,7 +174,6 @@ enum  hrtimer_base_type {
  */
 struct hrtimer_cpu_base {
 	raw_spinlock_t			lock;
-	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
 #ifdef CONFIG_HIGH_RES_TIMERS
 	ktime_t				expires_next;
 	int				hres_active;
@@ -184,6 +183,7 @@ struct hrtimer_cpu_base {
 	unsigned long			nr_hangs;
 	ktime_t				max_hang_time;
 #endif
+	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
 };
 
 static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
-- 
cgit v1.2.3-71-gd317


From ab8177bc53e8ae3a3ba6d200ce2c2dae263f7ee5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 20 May 2011 13:05:15 +0200
Subject: hrtimers: Avoid touching inactive timer bases

Instead of iterating over all possible timer bases avoid it by marking
the active bases in the cpu base.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/hrtimer.h     |  7 +++++--
 include/linux/thread_info.h |  2 +-
 kernel/hrtimer.c            | 29 ++++++++++++++++++-----------
 kernel/posix-cpu-timers.c   |  4 ++--
 kernel/posix-timers.c       |  2 +-
 kernel/time/alarmtimer.c    |  4 ++--
 6 files changed, 29 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index cc5f5f51db10..771c95802edc 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -143,7 +143,8 @@ struct hrtimer_sleeper {
  */
 struct hrtimer_clock_base {
 	struct hrtimer_cpu_base	*cpu_base;
-	clockid_t		index;
+	int			index;
+	clockid_t		clockid;
 	struct timerqueue_head	active;
 	ktime_t			resolution;
 	ktime_t			(*get_time)(void);
@@ -162,7 +163,7 @@ enum  hrtimer_base_type {
  * struct hrtimer_cpu_base - the per cpu clock bases
  * @lock:		lock protecting the base and associated clock bases
  *			and timers
- * @clock_base:		array of clock bases for this cpu
+ * @active_bases:	Bitfield to mark bases with active timers
  * @expires_next:	absolute time of the next event which was scheduled
  *			via clock_set_next_event()
  * @hres_active:	State of high resolution mode
@@ -171,9 +172,11 @@ enum  hrtimer_base_type {
  * @nr_retries:		Total number of hrtimer interrupt retries
  * @nr_hangs:		Total number of hrtimer interrupt hangs
  * @max_hang_time:	Maximum time spent in hrtimer_interrupt
+ * @clock_base:		array of clock bases for this cpu
  */
 struct hrtimer_cpu_base {
 	raw_spinlock_t			lock;
+	unsigned long			active_bases;
 #ifdef CONFIG_HIGH_RES_TIMERS
 	ktime_t				expires_next;
 	int				hres_active;
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 20fc303947d3..8d03f079688c 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -29,7 +29,7 @@ struct restart_block {
 		} futex;
 		/* For nanosleep */
 		struct {
-			clockid_t index;
+			clockid_t clockid;
 			struct timespec __user *rmtp;
 #ifdef CONFIG_COMPAT
 			struct compat_timespec __user *compat_rmtp;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 26dd32f9f6b2..1b08f6d67f12 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -64,17 +64,20 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
 	.clock_base =
 	{
 		{
-			.index = CLOCK_REALTIME,
+			.index = HRTIMER_BASE_REALTIME,
+			.clockid = CLOCK_REALTIME,
 			.get_time = &ktime_get_real,
 			.resolution = KTIME_LOW_RES,
 		},
 		{
-			.index = CLOCK_MONOTONIC,
+			.index = HRTIMER_BASE_MONOTONIC,
+			.clockid = CLOCK_MONOTONIC,
 			.get_time = &ktime_get,
 			.resolution = KTIME_LOW_RES,
 		},
 		{
-			.index = CLOCK_BOOTTIME,
+			.index = HRTIMER_BASE_BOOTTIME,
+			.clockid = CLOCK_BOOTTIME,
 			.get_time = &ktime_get_boottime,
 			.resolution = KTIME_LOW_RES,
 		},
@@ -196,7 +199,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
 	struct hrtimer_cpu_base *new_cpu_base;
 	int this_cpu = smp_processor_id();
 	int cpu = hrtimer_get_target(this_cpu, pinned);
-	int basenum = hrtimer_clockid_to_base(base->index);
+	int basenum = base->index;
 
 again:
 	new_cpu_base = &per_cpu(hrtimer_bases, cpu);
@@ -857,6 +860,7 @@ static int enqueue_hrtimer(struct hrtimer *timer,
 	debug_activate(timer);
 
 	timerqueue_add(&base->active, &timer->node);
+	base->cpu_base->active_bases |= 1 << base->index;
 
 	/*
 	 * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
@@ -898,6 +902,8 @@ static void __remove_hrtimer(struct hrtimer *timer,
 #endif
 	}
 	timerqueue_del(&base->active, &timer->node);
+	if (!timerqueue_getnext(&base->active))
+		base->cpu_base->active_bases &= ~(1 << base->index);
 out:
 	timer->state = newstate;
 }
@@ -1235,7 +1241,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
 void hrtimer_interrupt(struct clock_event_device *dev)
 {
 	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
-	struct hrtimer_clock_base *base;
 	ktime_t expires_next, now, entry_time, delta;
 	int i, retries = 0;
 
@@ -1257,12 +1262,15 @@ retry:
 	 */
 	cpu_base->expires_next.tv64 = KTIME_MAX;
 
-	base = cpu_base->clock_base;
-
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
-		ktime_t basenow;
+		struct hrtimer_clock_base *base;
 		struct timerqueue_node *node;
+		ktime_t basenow;
+
+		if (!(cpu_base->active_bases & (1 << i)))
+			continue;
 
+		base = cpu_base->clock_base + i;
 		basenow = ktime_add(now, base->offset);
 
 		while ((node = timerqueue_getnext(&base->active))) {
@@ -1295,7 +1303,6 @@ retry:
 
 			__run_hrtimer(timer, &basenow);
 		}
-		base++;
 	}
 
 	/*
@@ -1526,7 +1533,7 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
 	struct timespec __user  *rmtp;
 	int ret = 0;
 
-	hrtimer_init_on_stack(&t.timer, restart->nanosleep.index,
+	hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid,
 				HRTIMER_MODE_ABS);
 	hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
 
@@ -1578,7 +1585,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
 
 	restart = &current_thread_info()->restart_block;
 	restart->fn = hrtimer_nanosleep_restart;
-	restart->nanosleep.index = t.timer.base->index;
+	restart->nanosleep.clockid = t.timer.base->clockid;
 	restart->nanosleep.rmtp = rmtp;
 	restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
 
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 0791b13df7bf..58f405b581e7 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1514,7 +1514,7 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
 			return -EFAULT;
 
 		restart_block->fn = posix_cpu_nsleep_restart;
-		restart_block->nanosleep.index = which_clock;
+		restart_block->nanosleep.clockid = which_clock;
 		restart_block->nanosleep.rmtp = rmtp;
 		restart_block->nanosleep.expires = timespec_to_ns(rqtp);
 	}
@@ -1523,7 +1523,7 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
 
 static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
 {
-	clockid_t which_clock = restart_block->nanosleep.index;
+	clockid_t which_clock = restart_block->nanosleep.clockid;
 	struct timespec t;
 	struct itimerspec it;
 	int error;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index e5498d7405c3..a1b5edf1bf92 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -1056,7 +1056,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
  */
 long clock_nanosleep_restart(struct restart_block *restart_block)
 {
-	clockid_t which_clock = restart_block->nanosleep.index;
+	clockid_t which_clock = restart_block->nanosleep.clockid;
 	struct k_clock *kc = clockid_to_kclock(which_clock);
 
 	if (WARN_ON_ONCE(!kc || !kc->nsleep_restart))
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index c6027fe9a4e6..2d966244ea60 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -494,7 +494,7 @@ static int update_rmtp(ktime_t exp, enum  alarmtimer_type type,
  */
 static long __sched alarm_timer_nsleep_restart(struct restart_block *restart)
 {
-	enum  alarmtimer_type type = restart->nanosleep.index;
+	enum  alarmtimer_type type = restart->nanosleep.clockid;
 	ktime_t exp;
 	struct timespec __user  *rmtp;
 	struct alarm alarm;
@@ -573,7 +573,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
 
 	restart = &current_thread_info()->restart_block;
 	restart->fn = alarm_timer_nsleep_restart;
-	restart->nanosleep.index = type;
+	restart->nanosleep.clockid = type;
 	restart->nanosleep.expires = exp.tv64;
 	restart->nanosleep.rmtp = rmtp;
 	ret = -ERESTART_RESTARTBLOCK;
-- 
cgit v1.2.3-71-gd317


From 68fa61c026057a39d6ccb850aa8785043afbee02 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 20 May 2011 23:14:04 +0200
Subject: hrtimers: Reorder clock bases

The ordering of the clock bases is historical due to the
CLOCK_REALTIME and CLOCK_MONOTONIC constants. Now the hrtimer bases
have their own enumeration due to the gap between CLOCK_MONOTONIC and
CLOCK_BOOTTIME. So we can be more clever as most timers end up on the
CLOCK_MONOTONIC base due to the virtue of POSIX declaring that
relative CLOCK_REALTIME timers are not affected by time changes. In
desktop environments this is slowly changing as applications switch to
absolute timers, but I've observed empty CLOCK_REALTIME bases often
enough. There is no performance penalty or overhead when
CLOCK_REALTIME timers are active, but in case they are not we don't
skip over a full cache line.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/hrtimer.h |  2 +-
 kernel/hrtimer.c        | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 771c95802edc..51932e5acf7c 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -153,8 +153,8 @@ struct hrtimer_clock_base {
 };
 
 enum  hrtimer_base_type {
-	HRTIMER_BASE_REALTIME,
 	HRTIMER_BASE_MONOTONIC,
+	HRTIMER_BASE_REALTIME,
 	HRTIMER_BASE_BOOTTIME,
 	HRTIMER_MAX_CLOCK_BASES,
 };
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 1b08f6d67f12..c541ee527ecb 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -63,18 +63,18 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
 
 	.clock_base =
 	{
-		{
-			.index = HRTIMER_BASE_REALTIME,
-			.clockid = CLOCK_REALTIME,
-			.get_time = &ktime_get_real,
-			.resolution = KTIME_LOW_RES,
-		},
 		{
 			.index = HRTIMER_BASE_MONOTONIC,
 			.clockid = CLOCK_MONOTONIC,
 			.get_time = &ktime_get,
 			.resolution = KTIME_LOW_RES,
 		},
+		{
+			.index = HRTIMER_BASE_REALTIME,
+			.clockid = CLOCK_REALTIME,
+			.get_time = &ktime_get_real,
+			.resolution = KTIME_LOW_RES,
+		},
 		{
 			.index = HRTIMER_BASE_BOOTTIME,
 			.clockid = CLOCK_BOOTTIME,
-- 
cgit v1.2.3-71-gd317


From 442c8176d2efa468577738e3a99a6e051f6e8e55 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sun, 8 May 2011 14:06:52 +0100
Subject: clocksource: add common mmio clocksource

Add a generic mmio clocksource, covering both 32-bit and 16-bit register
access sizes, for up or down counters.  This can be used to easily
create clocksources for simple counter-based implementations.

Cc: Alessandro Rubini <rubini@unipv.it>
Cc: Colin Cross <ccross@android.com>
Cc: Eric Miao <eric.y.miao@gmail.com>
Cc: Erik Gilling <konkers@android.com>
Acked-by: "Hans J. Koch" <hjk@hansjkoch.de>
Cc: Imre Kaloz <kaloz@openwrt.org>
Cc: Krzysztof Halasa <khc@pm.waw.pl>
Cc: Kukjin Kim <kgene.kim@samsung.com>
Cc: Lennert Buytenhek <kernel@wantstofly.org>
Cc: Linus Walleij <linus.walleij@stericsson.com>
Cc: linux-omap@vger.kernel.org
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Cc: Olof Johansson <olof@lixom.net>
Tested-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Viresh Kumar <viresh.kumar@st.com>
Cc: Wan ZongShun <mcuos.com@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/clocksource/Kconfig  |  3 ++
 drivers/clocksource/Makefile |  1 +
 drivers/clocksource/mmio.c   | 73 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/clocksource.h  |  8 +++++
 4 files changed, 85 insertions(+)
 create mode 100644 drivers/clocksource/mmio.c

(limited to 'include/linux')

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 110aeeb52f9a..96c921910469 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -1,2 +1,5 @@
 config CLKSRC_I8253
 	bool
+
+config CLKSRC_MMIO
+	bool
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index cfb6383b543a..b995942a5060 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_SH_TIMER_CMT)	+= sh_cmt.o
 obj-$(CONFIG_SH_TIMER_MTU2)	+= sh_mtu2.o
 obj-$(CONFIG_SH_TIMER_TMU)	+= sh_tmu.o
 obj-$(CONFIG_CLKSRC_I8253)	+= i8253.o
+obj-$(CONFIG_CLKSRC_MMIO)	+= mmio.o
diff --git a/drivers/clocksource/mmio.c b/drivers/clocksource/mmio.c
new file mode 100644
index 000000000000..c0e25125a55e
--- /dev/null
+++ b/drivers/clocksource/mmio.c
@@ -0,0 +1,73 @@
+/*
+ * Generic MMIO clocksource support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clocksource.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+
+struct clocksource_mmio {
+	void __iomem *reg;
+	struct clocksource clksrc;
+};
+
+static inline struct clocksource_mmio *to_mmio_clksrc(struct clocksource *c)
+{
+	return container_of(c, struct clocksource_mmio, clksrc);
+}
+
+cycle_t clocksource_mmio_readl_up(struct clocksource *c)
+{
+	return readl_relaxed(to_mmio_clksrc(c)->reg);
+}
+
+cycle_t clocksource_mmio_readl_down(struct clocksource *c)
+{
+	return ~readl_relaxed(to_mmio_clksrc(c)->reg);
+}
+
+cycle_t clocksource_mmio_readw_up(struct clocksource *c)
+{
+	return readw_relaxed(to_mmio_clksrc(c)->reg);
+}
+
+cycle_t clocksource_mmio_readw_down(struct clocksource *c)
+{
+	return ~(unsigned)readw_relaxed(to_mmio_clksrc(c)->reg);
+}
+
+/**
+ * clocksource_mmio_init - Initialize a simple mmio based clocksource
+ * @base:	Virtual address of the clock readout register
+ * @name:	Name of the clocksource
+ * @hz:		Frequency of the clocksource in Hz
+ * @rating:	Rating of the clocksource
+ * @bits:	Number of valid bits
+ * @read:	One of clocksource_mmio_read*() above
+ */
+int __init clocksource_mmio_init(void __iomem *base, const char *name,
+	unsigned long hz, int rating, unsigned bits,
+	cycle_t (*read)(struct clocksource *))
+{
+	struct clocksource_mmio *cs;
+
+	if (bits > 32 || bits < 16)
+		return -EINVAL;
+
+	cs = kzalloc(sizeof(struct clocksource_mmio), GFP_KERNEL);
+	if (!cs)
+		return -ENOMEM;
+
+	cs->reg = base;
+	cs->clksrc.name = name;
+	cs->clksrc.rating = rating;
+	cs->clksrc.read = read;
+	cs->clksrc.mask = CLOCKSOURCE_MASK(bits);
+	cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+
+	return clocksource_register_hz(&cs->clksrc, hz);
+}
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index c918fbd33ee5..d4646b48dc4a 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -337,6 +337,14 @@ static inline void update_vsyscall_tz(void)
 
 extern void timekeeping_notify(struct clocksource *clock);
 
+extern cycle_t clocksource_mmio_readl_up(struct clocksource *);
+extern cycle_t clocksource_mmio_readl_down(struct clocksource *);
+extern cycle_t clocksource_mmio_readw_up(struct clocksource *);
+extern cycle_t clocksource_mmio_readw_down(struct clocksource *);
+
+extern int clocksource_mmio_init(void __iomem *, const char *,
+	unsigned long, int, unsigned, cycle_t (*)(struct clocksource *));
+
 extern int clocksource_i8253_init(void);
 
 #endif /* _LINUX_CLOCKSOURCE_H */
-- 
cgit v1.2.3-71-gd317