From 83a196773b8bc6702f49df1eddc848180e350340 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 10 Mar 2020 11:34:27 +0100 Subject: drm/bridge: analogix_dp: Split bind() into probe() and real bind() Analogix_dp driver acquires all its resources in the ->bind() callback, what is a bit against the component driver based approach, where the driver initialization is split into a probe(), where all resources are gathered, and a bind(), where all objects are created and a compound driver is initialized. Extract all the resource related operations to analogix_dp_probe() and analogix_dp_remove(), then call them before/after registration of the device components from the main Exynos DP and Rockchip DP drivers. Also move the plat_data initialization to the probe() to make it available for the analogix_dp_probe() function. This fixes the multiple calls to the bind() of the DRM compound driver when the DP PHY driver is not yet loaded/probed: [drm] Exynos DRM: using 14400000.fimd device for DMA mapping operations exynos-drm exynos-drm: bound 14400000.fimd (ops fimd_component_ops [exynosdrm]) exynos-drm exynos-drm: bound 14450000.mixer (ops mixer_component_ops [exynosdrm]) exynos-dp 145b0000.dp-controller: no DP phy configured exynos-drm exynos-drm: failed to bind 145b0000.dp-controller (ops exynos_dp_ops [exynosdrm]): -517 exynos-drm exynos-drm: master bind failed: -517 ... [drm] Exynos DRM: using 14400000.fimd device for DMA mapping operations exynos-drm exynos-drm: bound 14400000.fimd (ops hdmi_enable [exynosdrm]) exynos-drm exynos-drm: bound 14450000.mixer (ops hdmi_enable [exynosdrm]) exynos-drm exynos-drm: bound 145b0000.dp-controller (ops hdmi_enable [exynosdrm]) exynos-drm exynos-drm: bound 14530000.hdmi (ops hdmi_enable [exynosdrm]) [drm] Supports vblank timestamp caching Rev 2 (21.10.2013). Console: switching to colour frame buffer device 170x48 exynos-drm exynos-drm: fb0: exynosdrmfb frame buffer device [drm] Initialized exynos 1.1.0 20180330 for exynos-drm on minor 1 ... Signed-off-by: Marek Szyprowski Acked-by: Andy Yan Reviewed-by: Andrzej Hajda Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20200310103427.26048-1-m.szyprowski@samsung.com --- include/drm/bridge/analogix_dp.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/bridge/analogix_dp.h b/include/drm/bridge/analogix_dp.h index 7aa2f93da49c..b0dcc07334a1 100644 --- a/include/drm/bridge/analogix_dp.h +++ b/include/drm/bridge/analogix_dp.h @@ -42,9 +42,10 @@ int analogix_dp_resume(struct analogix_dp_device *dp); int analogix_dp_suspend(struct analogix_dp_device *dp); struct analogix_dp_device * -analogix_dp_bind(struct device *dev, struct drm_device *drm_dev, - struct analogix_dp_plat_data *plat_data); +analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data); +int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev); void analogix_dp_unbind(struct analogix_dp_device *dp); +void analogix_dp_remove(struct analogix_dp_device *dp); int analogix_dp_start_crc(struct drm_connector *connector); int analogix_dp_stop_crc(struct drm_connector *connector); -- cgit v1.2.3-71-gd317 From 595571cca4dec8ac48122a6d2733f790c9a2cade Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 9 Apr 2020 19:12:07 +0100 Subject: ASoC: dapm: Fix regression introducing multiple copies of DAI widgets Refactoring was done to factor out the linking of DAI widgets into a helper function, dapm_add_valid_dai_widget. However when this was done, a regression was introduced for CODEC to CODEC links. It was over looked that the playback and capture variables persisted across all CODEC DAIs being processed, which ensured that the special DAI widget that is added for CODEC to CODEC links was only created once. This bug causes kernel panics during DAPM shutdown. To stick with the spirit of the original refactoring whilst fixing the issue, variables to hold the DAI widgets are added to snd_soc_dai_link. Furthermore the dapm_add_valid_dai_widget function is renamed to dapm_connect_dai_pair, the function only adds DAI widgets in the CODEC to CODEC case and its primary job is to add routes connecting two DAI widgets, making the original name quite misleading. Fixes: 6c4b13b51aa3 ("ASoC: Add dapm_add_valid_dai_widget helper") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20200409181209.30130-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/soc.h | 3 ++ sound/soc/soc-dapm.c | 91 +++++++++++++++++++++++++++------------------------- 2 files changed, 51 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 13458e4fbb13..946f88a6c63d 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -790,6 +790,9 @@ struct snd_soc_dai_link { const struct snd_soc_pcm_stream *params; unsigned int num_params; + struct snd_soc_dapm_widget *playback_widget; + struct snd_soc_dapm_widget *capture_widget; + unsigned int dai_fmt; /* format to set on init */ enum snd_soc_dpcm_trigger trigger[2]; /* trigger type for DPCM */ diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 679ed60d850e..fe907f0cc709 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -4283,52 +4283,63 @@ int snd_soc_dapm_link_dai_widgets(struct snd_soc_card *card) return 0; } -static void dapm_add_valid_dai_widget(struct snd_soc_card *card, - struct snd_soc_pcm_runtime *rtd, - struct snd_soc_dai *codec_dai, - struct snd_soc_dai *cpu_dai) +static void dapm_connect_dai_routes(struct snd_soc_dapm_context *dapm, + struct snd_soc_dai *src_dai, + struct snd_soc_dapm_widget *src, + struct snd_soc_dapm_widget *dai, + struct snd_soc_dai *sink_dai, + struct snd_soc_dapm_widget *sink) { - struct snd_soc_dapm_widget *playback = NULL, *capture = NULL; - struct snd_soc_dapm_widget *codec, *playback_cpu, *capture_cpu; + dev_dbg(dapm->dev, "connected DAI link %s:%s -> %s:%s\n", + src_dai->component->name, src->name, + sink_dai->component->name, sink->name); + + if (dai) { + snd_soc_dapm_add_path(dapm, src, dai, NULL, NULL); + src = dai; + } + + snd_soc_dapm_add_path(dapm, src, sink, NULL, NULL); +} + +static void dapm_connect_dai_pair(struct snd_soc_card *card, + struct snd_soc_pcm_runtime *rtd, + struct snd_soc_dai *codec_dai, + struct snd_soc_dai *cpu_dai) +{ + struct snd_soc_dai_link *dai_link = rtd->dai_link; + struct snd_soc_dapm_widget *dai, *codec, *playback_cpu, *capture_cpu; struct snd_pcm_substream *substream; struct snd_pcm_str *streams = rtd->pcm->streams; - if (rtd->dai_link->params) { + if (dai_link->params) { playback_cpu = cpu_dai->capture_widget; capture_cpu = cpu_dai->playback_widget; } else { - playback = cpu_dai->playback_widget; - capture = cpu_dai->capture_widget; - playback_cpu = playback; - capture_cpu = capture; + playback_cpu = cpu_dai->playback_widget; + capture_cpu = cpu_dai->capture_widget; } /* connect BE DAI playback if widgets are valid */ codec = codec_dai->playback_widget; if (playback_cpu && codec) { - if (!playback) { + if (dai_link->params && !dai_link->playback_widget) { substream = streams[SNDRV_PCM_STREAM_PLAYBACK].substream; - playback = snd_soc_dapm_new_dai(card, substream, - "playback"); - if (IS_ERR(playback)) { + dai = snd_soc_dapm_new_dai(card, substream, "playback"); + if (IS_ERR(dai)) { dev_err(rtd->dev, "ASoC: Failed to create DAI %s: %ld\n", codec_dai->name, - PTR_ERR(playback)); + PTR_ERR(dai)); goto capture; } - - snd_soc_dapm_add_path(&card->dapm, playback_cpu, - playback, NULL, NULL); + dai_link->playback_widget = dai; } - dev_dbg(rtd->dev, "connected DAI link %s:%s -> %s:%s\n", - cpu_dai->component->name, playback_cpu->name, - codec_dai->component->name, codec->name); - - snd_soc_dapm_add_path(&card->dapm, playback, codec, - NULL, NULL); + dapm_connect_dai_routes(&card->dapm, cpu_dai, playback_cpu, + dai_link->playback_widget, + codec_dai, codec); } capture: @@ -4336,28 +4347,22 @@ capture: codec = codec_dai->capture_widget; if (codec && capture_cpu) { - if (!capture) { + if (dai_link->params && !dai_link->capture_widget) { substream = streams[SNDRV_PCM_STREAM_CAPTURE].substream; - capture = snd_soc_dapm_new_dai(card, substream, - "capture"); - if (IS_ERR(capture)) { + dai = snd_soc_dapm_new_dai(card, substream, "capture"); + if (IS_ERR(dai)) { dev_err(rtd->dev, "ASoC: Failed to create DAI %s: %ld\n", codec_dai->name, - PTR_ERR(capture)); + PTR_ERR(dai)); return; } - - snd_soc_dapm_add_path(&card->dapm, capture, - capture_cpu, NULL, NULL); + dai_link->capture_widget = dai; } - dev_dbg(rtd->dev, "connected DAI link %s:%s -> %s:%s\n", - codec_dai->component->name, codec->name, - cpu_dai->component->name, capture_cpu->name); - - snd_soc_dapm_add_path(&card->dapm, codec, capture, - NULL, NULL); + dapm_connect_dai_routes(&card->dapm, codec_dai, codec, + dai_link->capture_widget, + cpu_dai, capture_cpu); } } @@ -4369,12 +4374,12 @@ static void dapm_connect_dai_link_widgets(struct snd_soc_card *card, if (rtd->num_cpus == 1) { for_each_rtd_codec_dais(rtd, i, codec_dai) - dapm_add_valid_dai_widget(card, rtd, codec_dai, - rtd->cpu_dais[0]); + dapm_connect_dai_pair(card, rtd, codec_dai, + rtd->cpu_dais[0]); } else if (rtd->num_codecs == rtd->num_cpus) { for_each_rtd_codec_dais(rtd, i, codec_dai) - dapm_add_valid_dai_widget(card, rtd, codec_dai, - rtd->cpu_dais[i]); + dapm_connect_dai_pair(card, rtd, codec_dai, + rtd->cpu_dais[i]); } else { dev_err(card->dev, "N cpus to M codecs link is not supported yet\n"); -- cgit v1.2.3-71-gd317 From 20d60f6364474a978ab2a2146fb4c2bd9b6bbe3f Mon Sep 17 00:00:00 2001 From: Maciej Grochowski Date: Tue, 14 Apr 2020 00:17:03 -0400 Subject: include/linux/dmaengine: Typos fixes in API documentation Signed-off-by: Maciej Grochowski Link: https://lore.kernel.org/r/20200414041703.6661-1-maciek.grochowski@gmail.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 21065c04c4ac..31e58ec9f741 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -83,9 +83,9 @@ enum dma_transfer_direction { /** * Interleaved Transfer Request * ---------------------------- - * A chunk is collection of contiguous bytes to be transfered. + * A chunk is collection of contiguous bytes to be transferred. * The gap(in bytes) between two chunks is called inter-chunk-gap(ICG). - * ICGs may or maynot change between chunks. + * ICGs may or may not change between chunks. * A FRAME is the smallest series of contiguous {chunk,icg} pairs, * that when repeated an integral number of times, specifies the transfer. * A transfer template is specification of a Frame, the number of times @@ -1069,7 +1069,7 @@ static inline int dmaengine_terminate_all(struct dma_chan *chan) * dmaengine_synchronize() needs to be called before it is safe to free * any memory that is accessed by previously submitted descriptors or before * freeing any resources accessed from within the completion callback of any - * perviously submitted descriptors. + * previously submitted descriptors. * * This function can be called from atomic context as well as from within a * complete callback of a descriptor submitted on the same channel. @@ -1091,7 +1091,7 @@ static inline int dmaengine_terminate_async(struct dma_chan *chan) * * Synchronizes to the DMA channel termination to the current context. When this * function returns it is guaranteed that all transfers for previously issued - * descriptors have stopped and and it is safe to free the memory assoicated + * descriptors have stopped and it is safe to free the memory associated * with them. Furthermore it is guaranteed that all complete callback functions * for a previously submitted descriptor have finished running and it is safe to * free resources accessed from within the complete callbacks. -- cgit v1.2.3-71-gd317 From 5bd70440cb0a6f5c6a84019bb2aa93ab8310a5cd Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 14 Apr 2020 22:04:37 -0500 Subject: ASoC: soc-dai: revert all changes to DAI startup/shutdown sequence On Baytrail/Cherrytrail, the Atom/SST driver fails miserably: [ 9.741953] intel_sst_acpi 80860F28:00: FW Version 01.0c.00.01 [ 9.832992] intel_sst_acpi 80860F28:00: FW sent error response 0x40034 [ 9.833019] intel_sst_acpi 80860F28:00: FW alloc failed ret -4 [ 9.833028] intel_sst_acpi 80860F28:00: sst_get_stream returned err -5 [ 9.833033] sst-mfld-platform sst-mfld-platform: ASoC: DAI prepare error: -5 [ 9.833037] Baytrail Audio Port: ASoC: prepare FE Baytrail Audio Port failed [ 9.853942] intel_sst_acpi 80860F28:00: FW sent error response 0x40034 [ 9.853974] intel_sst_acpi 80860F28:00: FW alloc failed ret -4 [ 9.853984] intel_sst_acpi 80860F28:00: sst_get_stream returned err -5 [ 9.853990] sst-mfld-platform sst-mfld-platform: ASoC: DAI prepare error: -5 [ 9.853994] Baytrail Audio Port: ASoC: prepare FE Baytrail Audio Port failed Commit b56be800f1292 ("ASoC: soc-pcm: call snd_soc_dai_startup()/shutdown() once") was the initial problematic commit. Commit 1ba616bd1a6d5e ("ASoC: soc-dai: fix DAI startup/shutdown sequence") was an attempt to fix things but it does not work on Baytrail, reverting all changes seems necessary for now. Fixes: 1ba616bd1a6d5e ("ASoC: soc-dai: fix DAI startup/shutdown sequence") Signed-off-by: Pierre-Louis Bossart Tested-by: Hans de Goede Link: https://lore.kernel.org/r/20200415030437.23803-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 1 - sound/soc/soc-dai.c | 11 ++--------- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index d4825b82c7a3..b33abe93b905 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -351,7 +351,6 @@ struct snd_soc_dai { /* bit field */ unsigned int probed:1; - unsigned int started[SNDRV_PCM_STREAM_LAST + 1]; }; static inline struct snd_soc_pcm_stream * diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c index 8f3cad8db89a..31c41559034b 100644 --- a/sound/soc/soc-dai.c +++ b/sound/soc/soc-dai.c @@ -295,24 +295,17 @@ int snd_soc_dai_startup(struct snd_soc_dai *dai, { int ret = 0; - if (!dai->started[substream->stream] && - dai->driver->ops->startup) + if (dai->driver->ops->startup) ret = dai->driver->ops->startup(substream, dai); - if (ret == 0) - dai->started[substream->stream] = 1; - return ret; } void snd_soc_dai_shutdown(struct snd_soc_dai *dai, struct snd_pcm_substream *substream) { - if (dai->started[substream->stream] && - dai->driver->ops->shutdown) + if (dai->driver->ops->shutdown) dai->driver->ops->shutdown(substream, dai); - - dai->started[substream->stream] = 0; } int snd_soc_dai_prepare(struct snd_soc_dai *dai, -- cgit v1.2.3-71-gd317 From 3302363a27fb38a3581921a74aff855f4dcbfe0a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 1 Apr 2020 12:46:22 -0400 Subject: virtio/test: fix up after IOTLB changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow building vringh without IOTLB (that's the case for userspace builds, will be useful for CAIF/VOD down the road too). Update for API tweaks. Don't include vringh with userspace builds. Cc: Jason Wang Cc: Eugenio Pérez Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/Kconfig | 2 ++ drivers/vhost/test.c | 4 ++-- drivers/vhost/vringh.c | 5 +++++ include/linux/vringh.h | 6 ++++++ tools/virtio/Makefile | 5 +++-- tools/virtio/generated/autoconf.h | 0 6 files changed, 18 insertions(+), 4 deletions(-) create mode 100644 tools/virtio/generated/autoconf.h (limited to 'include') diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 362b832f5338..f0404ce255d1 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -3,6 +3,8 @@ config VHOST_IOTLB tristate help Generic IOTLB implementation for vhost and vringh. + This option is selected by any driver which needs to support + an IOMMU in software. config VHOST_RING tristate diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 394e2e5c772d..9a3a09005e03 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -120,7 +120,7 @@ static int vhost_test_open(struct inode *inode, struct file *f) vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ]; n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV, - VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT); + VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, NULL); f->private_data = n; @@ -225,7 +225,7 @@ static long vhost_test_reset_owner(struct vhost_test *n) { void *priv = NULL; long err; - struct vhost_umem *umem; + struct vhost_iotlb *umem; mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index ee0491f579ac..ba8e0d6cfd97 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -13,9 +13,11 @@ #include #include #include +#if IS_REACHABLE(CONFIG_VHOST_IOTLB) #include #include #include +#endif #include static __printf(1,2) __cold void vringh_bad(const char *fmt, ...) @@ -1059,6 +1061,8 @@ int vringh_need_notify_kern(struct vringh *vrh) } EXPORT_SYMBOL(vringh_need_notify_kern); +#if IS_REACHABLE(CONFIG_VHOST_IOTLB) + static int iotlb_translate(const struct vringh *vrh, u64 addr, u64 len, struct bio_vec iov[], int iov_size, u32 perm) @@ -1416,5 +1420,6 @@ int vringh_need_notify_iotlb(struct vringh *vrh) } EXPORT_SYMBOL(vringh_need_notify_iotlb); +#endif MODULE_LICENSE("GPL"); diff --git a/include/linux/vringh.h b/include/linux/vringh.h index bd0503ca6f8f..9e2763d7c159 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -14,8 +14,10 @@ #include #include #include +#if IS_REACHABLE(CONFIG_VHOST_IOTLB) #include #include +#endif #include /* virtio_ring with information needed for host access. */ @@ -254,6 +256,8 @@ static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val) return __cpu_to_virtio64(vringh_is_little_endian(vrh), val); } +#if IS_REACHABLE(CONFIG_VHOST_IOTLB) + void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb); int vringh_init_iotlb(struct vringh *vrh, u64 features, @@ -284,4 +288,6 @@ void vringh_notify_disable_iotlb(struct vringh *vrh); int vringh_need_notify_iotlb(struct vringh *vrh); +#endif /* CONFIG_VHOST_IOTLB */ + #endif /* _LINUX_VRINGH_H */ diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index f33f32f1d208..b587b9a7a124 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -4,7 +4,7 @@ test: virtio_test vringh_test virtio_test: virtio_ring.o virtio_test.o vringh_test: vringh_test.o vringh.o virtio_ring.o -CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE +CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h vpath %.c ../../drivers/virtio ../../drivers/vhost mod: ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V} @@ -22,7 +22,8 @@ OOT_CONFIGS=\ CONFIG_VHOST=m \ CONFIG_VHOST_NET=n \ CONFIG_VHOST_SCSI=n \ - CONFIG_VHOST_VSOCK=n + CONFIG_VHOST_VSOCK=n \ + CONFIG_VHOST_RING=n OOT_BUILD=KCFLAGS="-I "${OOT_VHOST} ${MAKE} -C ${OOT_KSRC} V=${V} oot-build: echo "UNSUPPORTED! Don't use the resulting modules in production!" diff --git a/tools/virtio/generated/autoconf.h b/tools/virtio/generated/autoconf.h new file mode 100644 index 000000000000..e69de29bb2d1 -- cgit v1.2.3-71-gd317 From 425a5070239aac22ed8fa4732eca624293f88546 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 9 Apr 2020 16:26:21 -0400 Subject: vdpa: allow a 32 bit vq alignment get_vq_align returns u16 now, but that's not enough for systems/devices with 64K pages. All callers assign it to a u32 variable anyway, so let's just change the return value type to u32. Reported-by: Arnd Bergmann Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/ifcvf/ifcvf_main.c | 2 +- drivers/vdpa/vdpa_sim/vdpa_sim.c | 2 +- include/linux/vdpa.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 28d9e5de5675..abf6a061cab6 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -226,7 +226,7 @@ static u32 ifcvf_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev) return IFCVF_SUBSYS_VENDOR_ID; } -static u16 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev) +static u32 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev) { return IFCVF_QUEUE_ALIGNMENT; } diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 72863d01a12a..7957d2d41fc4 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -435,7 +435,7 @@ static u64 vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx) return vrh->last_avail_idx; } -static u16 vdpasim_get_vq_align(struct vdpa_device *vdpa) +static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa) { return VDPASIM_QUEUE_ALIGN; } diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 733acfb7ef84..5453af87a33e 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -164,7 +164,7 @@ struct vdpa_config_ops { u64 (*get_vq_state)(struct vdpa_device *vdev, u16 idx); /* Device ops */ - u16 (*get_vq_align)(struct vdpa_device *vdev); + u32 (*get_vq_align)(struct vdpa_device *vdev); u64 (*get_features)(struct vdpa_device *vdev); int (*set_features)(struct vdpa_device *vdev, u64 features); void (*set_config_cb)(struct vdpa_device *vdev, -- cgit v1.2.3-71-gd317 From 31ba514b2fd0495796b506a309eec5f91d747cf1 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 15 Apr 2020 10:44:08 -0700 Subject: virtio-balloon: Avoid using the word 'report' when referring to free page hinting It can be confusing to have multiple features within the same driver that are using the same verbage. As such this patch is creating a union of free_page_report_cmd_id with free_page_hint_cmd_id so that we can clean-up the userspace code a bit in terms of readability while maintaining the functionality of legacy code. Signed-off-by: Alexander Duyck Link: https://lore.kernel.org/r/20200415174318.13597.99753.stgit@localhost.localdomain Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 2 +- include/uapi/linux/virtio_balloon.h | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 8c964b3eebdf..51086a5afdd4 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -580,7 +580,7 @@ static u32 virtio_balloon_cmd_id_received(struct virtio_balloon *vb) if (test_and_clear_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID, &vb->config_read_bitmap)) virtio_cread(vb->vdev, struct virtio_balloon_config, - free_page_report_cmd_id, + free_page_hint_cmd_id, &vb->cmd_id_received_cache); return vb->cmd_id_received_cache; diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 19974392d324..dc3e656470dd 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -48,8 +48,15 @@ struct virtio_balloon_config { __u32 num_pages; /* Number of pages we've actually got in balloon. */ __u32 actual; - /* Free page report command id, readonly by guest */ - __u32 free_page_report_cmd_id; + /* + * Free page hint command id, readonly by guest. + * Was previously named free_page_report_cmd_id so we + * need to carry that name for legacy support. + */ + union { + __u32 free_page_hint_cmd_id; + __u32 free_page_report_cmd_id; /* deprecated */ + }; /* Stores PAGE_POISON if page poisoning is in use */ __u32 poison_val; }; -- cgit v1.2.3-71-gd317 From 15064e70909cd8cca092ed6dd888bdbd9fefaae3 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 17 Apr 2020 03:17:26 -0400 Subject: virtio: drop vringh.h dependency Most virtio drivers don't depend on vringh, let's not pull that dependency, include it directly as needed. Signed-off-by: Michael S. Tsirkin --- include/linux/virtio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 15f906e4a748..a493eac08393 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -9,7 +9,6 @@ #include #include #include -#include /** * virtqueue - a queue to register buffers for sending or receiving. -- cgit v1.2.3-71-gd317 From 2b07021a940ce1cdec736ec0cacad6af77717afc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 16 Apr 2020 16:54:48 +0200 Subject: debugfs: remove return value of debugfs_create_u32() No one checks the return value of debugfs_create_u32(), as it's not needed, so make the return value void, so that no one tries to do so in the future. Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20200416145448.GA1380878@kroah.com Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/debugfs.rst | 4 ++-- fs/debugfs/file.c | 15 +++------------ include/linux/debugfs.h | 12 ++++-------- 3 files changed, 9 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/debugfs.rst b/Documentation/filesystems/debugfs.rst index db9ea0854040..6c032db235a5 100644 --- a/Documentation/filesystems/debugfs.rst +++ b/Documentation/filesystems/debugfs.rst @@ -79,8 +79,8 @@ created with any of:: struct dentry *parent, u8 *value); void debugfs_create_u16(const char *name, umode_t mode, struct dentry *parent, u16 *value); - struct dentry *debugfs_create_u32(const char *name, umode_t mode, - struct dentry *parent, u32 *value); + void debugfs_create_u32(const char *name, umode_t mode, + struct dentry *parent, u32 *value); void debugfs_create_u64(const char *name, umode_t mode, struct dentry *parent, u64 *value); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 2d357680094c..ae49a55bda00 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -506,20 +506,11 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n"); * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. - * - * This function will return a pointer to a dentry if it succeeds. This - * pointer must be passed to the debugfs_remove() function when the file is - * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be - * returned. - * - * If debugfs is not enabled in the kernel, the value ERR_PTR(-ENODEV) will - * be returned. */ -struct dentry *debugfs_create_u32(const char *name, umode_t mode, - struct dentry *parent, u32 *value) +void debugfs_create_u32(const char *name, umode_t mode, struct dentry *parent, + u32 *value) { - return debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u32, + debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u32, &fops_u32_ro, &fops_u32_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u32); diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index a274d95fa66e..63cb3606dea7 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -103,8 +103,8 @@ void debugfs_create_u8(const char *name, umode_t mode, struct dentry *parent, u8 *value); void debugfs_create_u16(const char *name, umode_t mode, struct dentry *parent, u16 *value); -struct dentry *debugfs_create_u32(const char *name, umode_t mode, - struct dentry *parent, u32 *value); +void debugfs_create_u32(const char *name, umode_t mode, struct dentry *parent, + u32 *value); void debugfs_create_u64(const char *name, umode_t mode, struct dentry *parent, u64 *value); struct dentry *debugfs_create_ulong(const char *name, umode_t mode, @@ -250,12 +250,8 @@ static inline void debugfs_create_u8(const char *name, umode_t mode, static inline void debugfs_create_u16(const char *name, umode_t mode, struct dentry *parent, u16 *value) { } -static inline struct dentry *debugfs_create_u32(const char *name, umode_t mode, - struct dentry *parent, - u32 *value) -{ - return ERR_PTR(-ENODEV); -} +static inline void debugfs_create_u32(const char *name, umode_t mode, + struct dentry *parent, u32 *value) { } static inline void debugfs_create_u64(const char *name, umode_t mode, struct dentry *parent, u64 *value) { } -- cgit v1.2.3-71-gd317 From b1018eb75c98a2703a38f6d85ea6bd3bcc5f5238 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 Apr 2020 21:01:15 +0200 Subject: soc: tegra: fix tegra_pmc_get_suspend_mode definition When CONFIG_PM_SLEEP is disabled, the function is not defined, causing a link failure: arm-linux-gnueabi-ld: drivers/cpuidle/cpuidle-tegra.o: in function `tegra_cpuidle_probe': cpuidle-tegra.c:(.text+0x24): undefined reference to `tegra_pmc_get_suspend_mode' Change the #ifdef check according to the definition. Fixes: 382ac8e22b90 ("cpuidle: tegra: Disable CC6 state if LP2 unavailable") Signed-off-by: Arnd Bergmann Reviewed-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Arnd Bergmann --- include/soc/tegra/pmc.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/soc/tegra/pmc.h b/include/soc/tegra/pmc.h index 0dd52b0a5c1b..361cb64246f7 100644 --- a/include/soc/tegra/pmc.h +++ b/include/soc/tegra/pmc.h @@ -168,7 +168,6 @@ int tegra_io_pad_power_disable(enum tegra_io_pad id); int tegra_io_rail_power_on(unsigned int id); int tegra_io_rail_power_off(unsigned int id); -enum tegra_suspend_mode tegra_pmc_get_suspend_mode(void); void tegra_pmc_set_suspend_mode(enum tegra_suspend_mode mode); void tegra_pmc_enter_suspend_mode(enum tegra_suspend_mode mode); @@ -220,11 +219,6 @@ static inline int tegra_io_rail_power_off(unsigned int id) return -ENOSYS; } -static inline enum tegra_suspend_mode tegra_pmc_get_suspend_mode(void) -{ - return TEGRA_SUSPEND_NONE; -} - static inline void tegra_pmc_set_suspend_mode(enum tegra_suspend_mode mode) { } @@ -235,4 +229,13 @@ static inline void tegra_pmc_enter_suspend_mode(enum tegra_suspend_mode mode) #endif /* CONFIG_SOC_TEGRA_PMC */ +#if defined(CONFIG_SOC_TEGRA_PMC) && defined(CONFIG_PM_SLEEP) +enum tegra_suspend_mode tegra_pmc_get_suspend_mode(void); +#else +static inline enum tegra_suspend_mode tegra_pmc_get_suspend_mode(void) +{ + return TEGRA_SUSPEND_NONE; +} +#endif + #endif /* __SOC_TEGRA_PMC_H__ */ -- cgit v1.2.3-71-gd317 From e28b4fc652c1830796a4d3e09565f30c20f9a2cf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 30 Mar 2020 14:27:37 -0400 Subject: svcrdma: Fix trace point use-after-free race I hit this while testing nfsd-5.7 with kernel memory debugging enabled on my server: Mar 30 13:21:45 klimt kernel: BUG: unable to handle page fault for address: ffff8887e6c279a8 Mar 30 13:21:45 klimt kernel: #PF: supervisor read access in kernel mode Mar 30 13:21:45 klimt kernel: #PF: error_code(0x0000) - not-present page Mar 30 13:21:45 klimt kernel: PGD 3601067 P4D 3601067 PUD 87c519067 PMD 87c3e2067 PTE 800ffff8193d8060 Mar 30 13:21:45 klimt kernel: Oops: 0000 [#1] SMP DEBUG_PAGEALLOC PTI Mar 30 13:21:45 klimt kernel: CPU: 2 PID: 1933 Comm: nfsd Not tainted 5.6.0-rc6-00040-g881e87a3c6f9 #1591 Mar 30 13:21:45 klimt kernel: Hardware name: Supermicro Super Server/X10SRL-F, BIOS 1.0c 09/09/2015 Mar 30 13:21:45 klimt kernel: RIP: 0010:svc_rdma_post_chunk_ctxt+0xab/0x284 [rpcrdma] Mar 30 13:21:45 klimt kernel: Code: c1 83 34 02 00 00 29 d0 85 c0 7e 72 48 8b bb a0 02 00 00 48 8d 54 24 08 4c 89 e6 48 8b 07 48 8b 40 20 e8 5a 5c 2b e1 41 89 c6 <8b> 45 20 89 44 24 04 8b 05 02 e9 01 00 85 c0 7e 33 e9 5e 01 00 00 Mar 30 13:21:45 klimt kernel: RSP: 0018:ffffc90000dfbdd8 EFLAGS: 00010286 Mar 30 13:21:45 klimt kernel: RAX: 0000000000000000 RBX: ffff8887db8db400 RCX: 0000000000000030 Mar 30 13:21:45 klimt kernel: RDX: 0000000000000040 RSI: 0000000000000000 RDI: 0000000000000246 Mar 30 13:21:45 klimt kernel: RBP: ffff8887e6c27988 R08: 0000000000000000 R09: 0000000000000004 Mar 30 13:21:45 klimt kernel: R10: ffffc90000dfbdd8 R11: 00c068ef00000000 R12: ffff8887eb4e4a80 Mar 30 13:21:45 klimt kernel: R13: ffff8887db8db634 R14: 0000000000000000 R15: ffff8887fc931000 Mar 30 13:21:45 klimt kernel: FS: 0000000000000000(0000) GS:ffff88885bd00000(0000) knlGS:0000000000000000 Mar 30 13:21:45 klimt kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Mar 30 13:21:45 klimt kernel: CR2: ffff8887e6c279a8 CR3: 000000081b72e002 CR4: 00000000001606e0 Mar 30 13:21:45 klimt kernel: Call Trace: Mar 30 13:21:45 klimt kernel: ? svc_rdma_vec_to_sg+0x7f/0x7f [rpcrdma] Mar 30 13:21:45 klimt kernel: svc_rdma_send_write_chunk+0x59/0xce [rpcrdma] Mar 30 13:21:45 klimt kernel: svc_rdma_sendto+0xf9/0x3ae [rpcrdma] Mar 30 13:21:45 klimt kernel: ? nfsd_destroy+0x51/0x51 [nfsd] Mar 30 13:21:45 klimt kernel: svc_send+0x105/0x1e3 [sunrpc] Mar 30 13:21:45 klimt kernel: nfsd+0xf2/0x149 [nfsd] Mar 30 13:21:45 klimt kernel: kthread+0xf6/0xfb Mar 30 13:21:45 klimt kernel: ? kthread_queue_delayed_work+0x74/0x74 Mar 30 13:21:45 klimt kernel: ret_from_fork+0x3a/0x50 Mar 30 13:21:45 klimt kernel: Modules linked in: ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue ib_umad ib_ipoib mlx4_ib sb_edac x86_pkg_temp_thermal iTCO_wdt iTCO_vendor_support coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel glue_helper crypto_simd cryptd pcspkr rpcrdma i2c_i801 rdma_ucm lpc_ich mfd_core ib_iser rdma_cm iw_cm ib_cm mei_me raid0 libiscsi mei sg scsi_transport_iscsi ioatdma wmi ipmi_si ipmi_devintf ipmi_msghandler acpi_power_meter nfsd nfs_acl lockd auth_rpcgss grace sunrpc ip_tables xfs libcrc32c mlx4_en sd_mod sr_mod cdrom mlx4_core crc32c_intel igb nvme i2c_algo_bit ahci i2c_core libahci nvme_core dca libata t10_pi qedr dm_mirror dm_region_hash dm_log dm_mod dax qede qed crc8 ib_uverbs ib_core Mar 30 13:21:45 klimt kernel: CR2: ffff8887e6c279a8 Mar 30 13:21:45 klimt kernel: ---[ end trace 87971d2ad3429424 ]--- It's absolutely not safe to use resources pointed to by the @send_wr argument of ib_post_send() _after_ that function returns. Those resources are typically freed by the Send completion handler, which can run before ib_post_send() returns. Thus the trace points currently around ib_post_send() in the server's RPC/RDMA transport are a hazard, even when they are disabled. Rearrange them so that they touch the Work Request only _before_ ib_post_send() is invoked. Fixes: bd2abef33394 ("svcrdma: Trace key RDMA API events") Fixes: 4201c7464753 ("svcrdma: Introduce svc_rdma_send_ctxt") Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 50 +++++++++++++++++++++++++---------- net/sunrpc/xprtrdma/svc_rdma_rw.c | 3 +-- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 16 ++++++----- 3 files changed, 46 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 051f26fedc4d..596e0a803477 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1695,17 +1695,15 @@ DECLARE_EVENT_CLASS(svcrdma_sendcomp_event, TRACE_EVENT(svcrdma_post_send, TP_PROTO( - const struct ib_send_wr *wr, - int status + const struct ib_send_wr *wr ), - TP_ARGS(wr, status), + TP_ARGS(wr), TP_STRUCT__entry( __field(const void *, cqe) __field(unsigned int, num_sge) __field(u32, inv_rkey) - __field(int, status) ), TP_fast_assign( @@ -1713,12 +1711,11 @@ TRACE_EVENT(svcrdma_post_send, __entry->num_sge = wr->num_sge; __entry->inv_rkey = (wr->opcode == IB_WR_SEND_WITH_INV) ? wr->ex.invalidate_rkey : 0; - __entry->status = status; ), - TP_printk("cqe=%p num_sge=%u inv_rkey=0x%08x status=%d", + TP_printk("cqe=%p num_sge=%u inv_rkey=0x%08x", __entry->cqe, __entry->num_sge, - __entry->inv_rkey, __entry->status + __entry->inv_rkey ) ); @@ -1783,26 +1780,23 @@ TRACE_EVENT(svcrdma_wc_receive, TRACE_EVENT(svcrdma_post_rw, TP_PROTO( const void *cqe, - int sqecount, - int status + int sqecount ), - TP_ARGS(cqe, sqecount, status), + TP_ARGS(cqe, sqecount), TP_STRUCT__entry( __field(const void *, cqe) __field(int, sqecount) - __field(int, status) ), TP_fast_assign( __entry->cqe = cqe; __entry->sqecount = sqecount; - __entry->status = status; ), - TP_printk("cqe=%p sqecount=%d status=%d", - __entry->cqe, __entry->sqecount, __entry->status + TP_printk("cqe=%p sqecount=%d", + __entry->cqe, __entry->sqecount ) ); @@ -1870,6 +1864,34 @@ DECLARE_EVENT_CLASS(svcrdma_sendqueue_event, DEFINE_SQ_EVENT(full); DEFINE_SQ_EVENT(retry); +TRACE_EVENT(svcrdma_sq_post_err, + TP_PROTO( + const struct svcxprt_rdma *rdma, + int status + ), + + TP_ARGS(rdma, status), + + TP_STRUCT__entry( + __field(int, avail) + __field(int, depth) + __field(int, status) + __string(addr, rdma->sc_xprt.xpt_remotebuf) + ), + + TP_fast_assign( + __entry->avail = atomic_read(&rdma->sc_sq_avail); + __entry->depth = rdma->sc_sq_depth; + __entry->status = status; + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); + ), + + TP_printk("addr=%s sc_sq_avail=%d/%d status=%d", + __get_str(addr), __entry->avail, __entry->depth, + __entry->status + ) +); + #endif /* _TRACE_RPCRDMA_H */ #include diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index bd7c195d872e..23c2d3ce0dc9 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -323,8 +323,6 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) if (atomic_sub_return(cc->cc_sqecount, &rdma->sc_sq_avail) > 0) { ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); - trace_svcrdma_post_rw(&cc->cc_cqe, - cc->cc_sqecount, ret); if (ret) break; return 0; @@ -337,6 +335,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) trace_svcrdma_sq_retry(rdma); } while (1); + trace_svcrdma_sq_post_err(rdma, ret); set_bit(XPT_CLOSE, &xprt->xpt_flags); /* If even one was posted, there will be a completion. */ diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 90cba3058f04..6a87a2379e91 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -322,15 +322,17 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr) } svc_xprt_get(&rdma->sc_xprt); + trace_svcrdma_post_send(wr); ret = ib_post_send(rdma->sc_qp, wr, NULL); - trace_svcrdma_post_send(wr, ret); - if (ret) { - set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); - svc_xprt_put(&rdma->sc_xprt); - wake_up(&rdma->sc_send_wait); - } - break; + if (ret) + break; + return 0; } + + trace_svcrdma_sq_post_err(rdma, ret); + set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); + svc_xprt_put(&rdma->sc_xprt); + wake_up(&rdma->sc_send_wait); return ret; } -- cgit v1.2.3-71-gd317 From 23cf1ee1f1869966b75518c59b5cbda4c6c92450 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 31 Mar 2020 17:02:33 -0400 Subject: svcrdma: Fix leak of svc_rdma_recv_ctxt objects Utilize the xpo_release_rqst transport method to ensure that each rqstp's svc_rdma_recv_ctxt object is released even when the server cannot return a Reply for that rqstp. Without this fix, each RPC whose Reply cannot be sent leaks one svc_rdma_recv_ctxt. This is a 2.5KB structure, a 4KB DMA-mapped Receive buffer, and any pages that might be part of the Reply message. The leak is infrequent unless the network fabric is unreliable or Kerberos is in use, as GSS sequence window overruns, which result in connection loss, are more common on fast transports. Fixes: 3a88092ee319 ("svcrdma: Preserve Receive buffer until svc_rdma_sendto") Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 1 + net/sunrpc/svc_xprt.c | 3 --- net/sunrpc/svcsock.c | 4 ++++ net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 22 ++++++++++++++++++++++ net/sunrpc/xprtrdma/svc_rdma_sendto.c | 13 +++---------- net/sunrpc/xprtrdma/svc_rdma_transport.c | 5 ----- 6 files changed, 30 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 78fe2ac6dc6c..cbcfbd0521e3 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -170,6 +170,7 @@ extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma); extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *ctxt); extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma); +extern void svc_rdma_release_rqst(struct svc_rqst *rqstp); extern int svc_rdma_recvfrom(struct svc_rqst *); /* svc_rdma_rw.c */ diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 92f2c08c67a5..2284ff038dad 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -908,9 +908,6 @@ int svc_send(struct svc_rqst *rqstp) if (!xprt) goto out; - /* release the receive skb before sending the reply */ - xprt->xpt_ops->xpo_release_rqst(rqstp); - /* calculate over-all length */ xb = &rqstp->rq_res; xb->len = xb->head[0].iov_len + diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 519cf9c4f8fd..023514e392b3 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -527,6 +527,8 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) unsigned int uninitialized_var(sent); int err; + svc_release_udp_skb(rqstp); + svc_set_cmsg_data(rqstp, cmh); err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); @@ -1076,6 +1078,8 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) unsigned int uninitialized_var(sent); int err; + svc_release_skb(rqstp); + err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent); xdr_free_bvec(xdr); if (err < 0 || sent != (xdr->len + sizeof(marker))) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 54469b72b25f..efa5fcb5793f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -223,6 +223,26 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, svc_rdma_recv_ctxt_destroy(rdma, ctxt); } +/** + * svc_rdma_release_rqst - Release transport-specific per-rqst resources + * @rqstp: svc_rqst being released + * + * Ensure that the recv_ctxt is released whether or not a Reply + * was sent. For example, the client could close the connection, + * or svc_process could drop an RPC, before the Reply is sent. + */ +void svc_rdma_release_rqst(struct svc_rqst *rqstp) +{ + struct svc_rdma_recv_ctxt *ctxt = rqstp->rq_xprt_ctxt; + struct svc_xprt *xprt = rqstp->rq_xprt; + struct svcxprt_rdma *rdma = + container_of(xprt, struct svcxprt_rdma, sc_xprt); + + rqstp->rq_xprt_ctxt = NULL; + if (ctxt) + svc_rdma_recv_ctxt_put(rdma, ctxt); +} + static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *ctxt) { @@ -820,6 +840,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) __be32 *p; int ret; + rqstp->rq_xprt_ctxt = NULL; + spin_lock(&rdma_xprt->sc_rq_dto_lock); ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q); if (ctxt) { diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 6a87a2379e91..b6c8643867f2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -926,12 +926,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp); if (ret < 0) goto err1; - ret = 0; - -out: - rqstp->rq_xprt_ctxt = NULL; - svc_rdma_recv_ctxt_put(rdma, rctxt); - return ret; + return 0; err2: if (ret != -E2BIG && ret != -EINVAL) @@ -940,16 +935,14 @@ out: ret = svc_rdma_send_error_msg(rdma, sctxt, rqstp); if (ret < 0) goto err1; - ret = 0; - goto out; + return 0; err1: svc_rdma_send_ctxt_put(rdma, sctxt); err0: trace_svcrdma_send_failed(rqstp, ret); set_bit(XPT_CLOSE, &xprt->xpt_flags); - ret = -ENOTCONN; - goto out; + return -ENOTCONN; } /** diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 8bb99980ae85..ea54785db4f8 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -71,7 +71,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, struct sockaddr *sa, int salen, int flags); static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); -static void svc_rdma_release_rqst(struct svc_rqst *); static void svc_rdma_detach(struct svc_xprt *xprt); static void svc_rdma_free(struct svc_xprt *xprt); static int svc_rdma_has_wspace(struct svc_xprt *xprt); @@ -552,10 +551,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) return NULL; } -static void svc_rdma_release_rqst(struct svc_rqst *rqstp) -{ -} - /* * When connected, an svc_xprt has at least two references: * -- cgit v1.2.3-71-gd317 From c4b4c2a78a9fc0c532c58504e8cb5441224ff1d9 Mon Sep 17 00:00:00 2001 From: Zhiqiang Liu Date: Mon, 13 Apr 2020 13:12:10 +0800 Subject: buffer: remove useless comment and WB_REASON_FREE_MORE_MEM, reason. free_more_memory func has been completely removed in commit bc48f001de12 ("buffer: eliminate the need to call free_more_memory() in __getblk_slow()") So comment and `WB_REASON_FREE_MORE_MEM` reason about free_more_memory are no longer needed. Fixes: bc48f001de12 ("buffer: eliminate the need to call free_more_memory() in __getblk_slow()") Reviewed-by: Jan Kara Signed-off-by: Zhiqiang Liu Signed-off-by: Jens Axboe --- fs/buffer.c | 2 +- include/linux/backing-dev-defs.h | 1 - include/trace/events/writeback.h | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/fs/buffer.c b/fs/buffer.c index f73276d746bb..763de99508de 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -967,7 +967,7 @@ grow_dev_page(struct block_device *bdev, sector_t block, struct page *page; struct buffer_head *bh; sector_t end_block; - int ret = 0; /* Will call free_more_memory() */ + int ret = 0; gfp_t gfp_mask; gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp; diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 4fc87dee005a..ee577a83cfe6 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -54,7 +54,6 @@ enum wb_reason { WB_REASON_SYNC, WB_REASON_PERIODIC, WB_REASON_LAPTOP_TIMER, - WB_REASON_FREE_MORE_MEM, WB_REASON_FS_FREE_SPACE, /* * There is no bdi forker thread any more and works are done diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index d94def25e4dc..85a33bea76f1 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -36,7 +36,6 @@ EM( WB_REASON_SYNC, "sync") \ EM( WB_REASON_PERIODIC, "periodic") \ EM( WB_REASON_LAPTOP_TIMER, "laptop_timer") \ - EM( WB_REASON_FREE_MORE_MEM, "free_more_memory") \ EM( WB_REASON_FS_FREE_SPACE, "fs_free_space") \ EMe(WB_REASON_FORKER_THREAD, "forker_thread") -- cgit v1.2.3-71-gd317 From 9bacd256f1354883d3c1402655153367982bba49 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Apr 2020 07:10:23 -0700 Subject: tcp: cache line align MAX_TCP_HEADER TCP stack is dumb in how it cooks its output packets. Depending on MAX_HEADER value, we might chose a bad ending point for the headers. If we align the end of TCP headers to cache line boundary, we make sure to always use the smallest number of cache lines, which always help. Signed-off-by: Eric Dumazet Cc: Soheil Hassas Yeganeh Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 5fa9eacd965a..dcf9a72eeaa6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -51,7 +51,7 @@ extern struct inet_hashinfo tcp_hashinfo; extern struct percpu_counter tcp_orphan_count; void tcp_time_wait(struct sock *sk, int state, int timeo); -#define MAX_TCP_HEADER (128 + MAX_HEADER) +#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER) #define MAX_TCP_OPTION_SPACE 40 #define TCP_MIN_SND_MSS 48 #define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE) -- cgit v1.2.3-71-gd317 From a07479147be03d2450376ebaff9ea1a0682f25d6 Mon Sep 17 00:00:00 2001 From: Lars Engebretsen Date: Wed, 15 Apr 2020 12:10:43 +0200 Subject: iio: core: remove extra semi-colon from devm_iio_device_register() macro This change removes the semi-colon from the devm_iio_device_register() macro which seems to have been added by accident. Fixes: 63b19547cc3d9 ("iio: Use macro magic to avoid manual assign of driver_module") Signed-off-by: Lars Engebretsen Cc: Reviewed-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index eed58ed2f368..4e7848415c4f 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -600,7 +600,7 @@ void iio_device_unregister(struct iio_dev *indio_dev); * 0 on success, negative error number on failure. */ #define devm_iio_device_register(dev, indio_dev) \ - __devm_iio_device_register((dev), (indio_dev), THIS_MODULE); + __devm_iio_device_register((dev), (indio_dev), THIS_MODULE) int __devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev, struct module *this_mod); void devm_iio_device_unregister(struct device *dev, struct iio_dev *indio_dev); -- cgit v1.2.3-71-gd317 From bdb2ce82818577ba6e57b7d68b698b8d17329281 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 19 Apr 2020 20:03:05 -0400 Subject: xprtrdma: Fix trace point use-after-free race It's not safe to use resources pointed to by the @send_wr of ib_post_send() _after_ that function returns. Those resources are typically freed by the Send completion handler, which can run before ib_post_send() returns. Thus the trace points currently around ib_post_send() in the client's RPC/RDMA transport are a hazard, even when they are disabled. Rearrange them so that they touch the Work Request only _before_ ib_post_send() is invoked. Fixes: ab03eff58eb5 ("xprtrdma: Add trace points in RPC Call transmit paths") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 12 ++++-------- net/sunrpc/xprtrdma/verbs.c | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 051f26fedc4d..72f043876019 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -692,11 +692,10 @@ TRACE_EVENT(xprtrdma_prepsend_failed, TRACE_EVENT(xprtrdma_post_send, TP_PROTO( - const struct rpcrdma_req *req, - int status + const struct rpcrdma_req *req ), - TP_ARGS(req, status), + TP_ARGS(req), TP_STRUCT__entry( __field(const void *, req) @@ -705,7 +704,6 @@ TRACE_EVENT(xprtrdma_post_send, __field(unsigned int, client_id) __field(int, num_sge) __field(int, signaled) - __field(int, status) ), TP_fast_assign( @@ -718,15 +716,13 @@ TRACE_EVENT(xprtrdma_post_send, __entry->sc = req->rl_sendctx; __entry->num_sge = req->rl_wr.num_sge; __entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED; - __entry->status = status; ), - TP_printk("task:%u@%u req=%p sc=%p (%d SGE%s) %sstatus=%d", + TP_printk("task:%u@%u req=%p sc=%p (%d SGE%s) %s", __entry->task_id, __entry->client_id, __entry->req, __entry->sc, __entry->num_sge, (__entry->num_sge == 1 ? "" : "s"), - (__entry->signaled ? "signaled " : ""), - __entry->status + (__entry->signaled ? "signaled" : "") ) ); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 29ae982d69cf..05c4d3a9cda2 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1356,8 +1356,8 @@ int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) --ep->re_send_count; } + trace_xprtrdma_post_send(req); rc = frwr_send(r_xprt, req); - trace_xprtrdma_post_send(req, rc); if (rc) return -ENOTCONN; return 0; -- cgit v1.2.3-71-gd317 From ac84bac4062e7fc24f5e2c61c6a414b2a00a29ad Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 20 Apr 2020 12:06:15 +0200 Subject: vdso/datapage: Use correct clock mode name in comment While the explanation for time namespace <-> vdso interactions is very helpful it uses the wrong name in the comment when describing the clock mode making grepping a bit annoying. This seems like an accidental oversight when moving from VCLOCK_TIMENS to VDSO_CLOCKMODE_TIMENS. It seems that 660fd04f9317 ("lib/vdso: Prepare for time namespace support") misspelled VCLOCK_TIMENS as VLOCK_TIMENS which explains why it got missed when VCLOCK_TIMENS became VDSO_CLOCKMODE_TIMENS in 2d6b01bd88cc ("lib/vdso: Move VCLOCK_TIMENS to vdso_clock_modes"). Update the comment to use VDSO_CLOCKMODE_TIMENS. Fixes: 660fd04f9317 ("lib/vdso: Prepare for time namespace support") Fixes: 2d6b01bd88cc ("lib/vdso: Move VCLOCK_TIMENS to vdso_clock_modes") Signed-off-by: Christian Brauner Signed-off-by: Thomas Gleixner Acked-by: Andrei Vagin Acked-by: Vincenzo Frascino Link: https://lkml.kernel.org/r/20200420100615.1549804-1-christian.brauner@ubuntu.com --- include/vdso/datapage.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 5cbc9fcbfd45..7955c56d6b3c 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -73,8 +73,8 @@ struct vdso_timestamp { * * @offset is used by the special time namespace VVAR pages which are * installed instead of the real VVAR page. These namespace pages must set - * @seq to 1 and @clock_mode to VLOCK_TIMENS to force the code into the - * time namespace slow path. The namespace aware functions retrieve the + * @seq to 1 and @clock_mode to VDSO_CLOCKMODE_TIMENS to force the code into + * the time namespace slow path. The namespace aware functions retrieve the * real system wide VVAR page, read host time and add the per clock offset. * For clocks which are not affected by time namespace adjustment the * offset must be zero. -- cgit v1.2.3-71-gd317 From d6c8e949a35d6906d6c03a50e9a9cdf4e494528a Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 21 Apr 2020 09:07:55 -0400 Subject: blk-iocost: Fix error on iocost_ioc_vrate_adj MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Systemtap 4.2 is unable to correctly interpret the "u32 (*missed_ppm)[2]" argument of the iocost_ioc_vrate_adj trace entry defined in include/trace/events/iocost.h leading to the following error: /tmp/stapAcz0G0/stap_c89c58b83cea1724e26395efa9ed4939_6321_aux_6.c:78:8: error: expected ‘;’, ‘,’ or ‘)’ before ‘*’ token , u32[]* __tracepoint_arg_missed_ppm That argument type is indeed rather complex and hard to read. Looking at block/blk-iocost.c. It is just a 2-entry u32 array. By simplifying the argument to a simple "u32 *missed_ppm" and adjusting the trace entry accordingly, the compilation error was gone. Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost") Acked-by: Steven Rostedt (VMware) Acked-by: Tejun Heo Signed-off-by: Waiman Long Signed-off-by: Jens Axboe --- block/blk-iocost.c | 4 ++-- include/trace/events/iocost.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/block/blk-iocost.c b/block/blk-iocost.c index db35ee682294..3ab0c1c704b6 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1591,7 +1591,7 @@ skip_surplus_transfers: vrate_min, vrate_max); } - trace_iocost_ioc_vrate_adj(ioc, vrate, &missed_ppm, rq_wait_pct, + trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct, nr_lagging, nr_shortages, nr_surpluses); @@ -1600,7 +1600,7 @@ skip_surplus_transfers: ioc->period_us * vrate * INUSE_MARGIN_PCT, 100); } else if (ioc->busy_level != prev_busy_level || nr_lagging) { trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate), - &missed_ppm, rq_wait_pct, nr_lagging, + missed_ppm, rq_wait_pct, nr_lagging, nr_shortages, nr_surpluses); } diff --git a/include/trace/events/iocost.h b/include/trace/events/iocost.h index 7ecaa65b7106..c2f580fd371b 100644 --- a/include/trace/events/iocost.h +++ b/include/trace/events/iocost.h @@ -130,7 +130,7 @@ DEFINE_EVENT(iocg_inuse_update, iocost_inuse_reset, TRACE_EVENT(iocost_ioc_vrate_adj, - TP_PROTO(struct ioc *ioc, u64 new_vrate, u32 (*missed_ppm)[2], + TP_PROTO(struct ioc *ioc, u64 new_vrate, u32 *missed_ppm, u32 rq_wait_pct, int nr_lagging, int nr_shortages, int nr_surpluses), @@ -155,8 +155,8 @@ TRACE_EVENT(iocost_ioc_vrate_adj, __entry->old_vrate = atomic64_read(&ioc->vtime_rate);; __entry->new_vrate = new_vrate; __entry->busy_level = ioc->busy_level; - __entry->read_missed_ppm = (*missed_ppm)[READ]; - __entry->write_missed_ppm = (*missed_ppm)[WRITE]; + __entry->read_missed_ppm = missed_ppm[READ]; + __entry->write_missed_ppm = missed_ppm[WRITE]; __entry->rq_wait_pct = rq_wait_pct; __entry->nr_lagging = nr_lagging; __entry->nr_shortages = nr_shortages; -- cgit v1.2.3-71-gd317 From bdebd6a2831b6fab69eb85cee74a8ba77f1a1cc2 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 20 Apr 2020 18:14:11 -0700 Subject: vmalloc: fix remap_vmalloc_range() bounds checks remap_vmalloc_range() has had various issues with the bounds checks it promises to perform ("This function checks that addr is a valid vmalloc'ed area, and that it is big enough to cover the vma") over time, e.g.: - not detecting pgoff< Signed-off-by: Andrew Morton Cc: stable@vger.kernel.org Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Martin KaFai Lau Cc: Song Liu Cc: Yonghong Song Cc: Andrii Nakryiko Cc: John Fastabend Cc: KP Singh Link: http://lkml.kernel.org/r/20200415222312.236431-1-jannh@google.com Signed-off-by: Linus Torvalds --- fs/proc/vmcore.c | 5 +++-- include/linux/vmalloc.h | 2 +- mm/vmalloc.c | 16 +++++++++++++--- samples/vfio-mdev/mdpy.c | 2 +- 4 files changed, 18 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 7dc800cce354..c663202da8de 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -266,7 +266,8 @@ static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst, if (start < offset + dump->size) { tsz = min(offset + (u64)dump->size - start, (u64)size); buf = dump->buf + start - offset; - if (remap_vmalloc_range_partial(vma, dst, buf, tsz)) { + if (remap_vmalloc_range_partial(vma, dst, buf, 0, + tsz)) { ret = -EFAULT; goto out_unlock; } @@ -624,7 +625,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size); kaddr = elfnotes_buf + start - elfcorebuf_sz - vmcoredd_orig_sz; if (remap_vmalloc_range_partial(vma, vma->vm_start + len, - kaddr, tsz)) + kaddr, 0, tsz)) goto fail; size -= tsz; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 0507a162ccd0..a95d3cc74d79 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -137,7 +137,7 @@ extern void vunmap(const void *addr); extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, void *kaddr, - unsigned long size); + unsigned long pgoff, unsigned long size); extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 399f219544f7..9a8227afa073 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -3054,6 +3055,7 @@ finished: * @vma: vma to cover * @uaddr: target user address to start at * @kaddr: virtual address of vmalloc kernel memory + * @pgoff: offset from @kaddr to start at * @size: size of map area * * Returns: 0 for success, -Exxx on failure @@ -3066,9 +3068,15 @@ finished: * Similar to remap_pfn_range() (see mm/memory.c) */ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, - void *kaddr, unsigned long size) + void *kaddr, unsigned long pgoff, + unsigned long size) { struct vm_struct *area; + unsigned long off; + unsigned long end_index; + + if (check_shl_overflow(pgoff, PAGE_SHIFT, &off)) + return -EINVAL; size = PAGE_ALIGN(size); @@ -3082,8 +3090,10 @@ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT))) return -EINVAL; - if (kaddr + size > area->addr + get_vm_area_size(area)) + if (check_add_overflow(size, off, &end_index) || + end_index > get_vm_area_size(area)) return -EINVAL; + kaddr += off; do { struct page *page = vmalloc_to_page(kaddr); @@ -3122,7 +3132,7 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff) { return remap_vmalloc_range_partial(vma, vma->vm_start, - addr + (pgoff << PAGE_SHIFT), + addr, pgoff, vma->vm_end - vma->vm_start); } EXPORT_SYMBOL(remap_vmalloc_range); diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c index cc86bf6566e4..9894693f3be1 100644 --- a/samples/vfio-mdev/mdpy.c +++ b/samples/vfio-mdev/mdpy.c @@ -418,7 +418,7 @@ static int mdpy_mmap(struct mdev_device *mdev, struct vm_area_struct *vma) return -EINVAL; return remap_vmalloc_range_partial(vma, vma->vm_start, - mdev_state->memblk, + mdev_state->memblk, 0, vma->vm_end - vma->vm_start); } -- cgit v1.2.3-71-gd317 From b9663b7ca6ff780555108394c9c1b409f63b99a7 Mon Sep 17 00:00:00 2001 From: Voon Weifeng Date: Mon, 20 Apr 2020 23:42:52 +0800 Subject: net: stmmac: Enable SERDES power up/down sequence This patch is to enable Intel SERDES power up/down sequence. The SERDES converts 8/10 bits data to SGMII signal. Below is an example of HW configuration for SGMII mode. The SERDES is located in the PHY IF in the diagram below. <-----------------GBE Controller---------->|<--External PHY chip--> +----------+ +----+ +---+ +----------+ | EQoS | <-GMII->| DW | < ------ > |PHY| <-SGMII-> | External | | MAC | |xPCS| |IF | | PHY | +----------+ +----+ +---+ +----------+ ^ ^ ^ ^ | | | | +---------------------MDIO-------------------------+ PHY IF configuration and status registers are accessible through mdio address 0x15 which is defined as mdio_adhoc_addr. During D0, The driver will need to power up PHY IF by changing the power state to P0. Likewise, for D3, the driver sets PHY IF power state to P3. Signed-off-by: Voon Weifeng Signed-off-by: Ong Boon Leong Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 189 ++++++++++++++++++++++ drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h | 23 +++ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 23 +++ include/linux/stmmac.h | 2 + 4 files changed, 237 insertions(+) create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h (limited to 'include') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 5419d4e478c0..2e4aaedb93f5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -5,8 +5,13 @@ #include #include #include +#include "dwmac-intel.h" #include "stmmac.h" +struct intel_priv_data { + int mdio_adhoc_addr; /* mdio address for serdes & etc */ +}; + /* This struct is used to associate PCI Function of MAC controller on a board, * discovered via DMI, with the address of PHY connected to the MAC. The * negative value of the address means that MAC controller is not connected @@ -49,6 +54,172 @@ static int stmmac_pci_find_phy_addr(struct pci_dev *pdev, return -ENODEV; } +static int serdes_status_poll(struct stmmac_priv *priv, int phyaddr, + int phyreg, u32 mask, u32 val) +{ + unsigned int retries = 10; + int val_rd; + + do { + val_rd = mdiobus_read(priv->mii, phyaddr, phyreg); + if ((val_rd & mask) == (val & mask)) + return 0; + udelay(POLL_DELAY_US); + } while (--retries); + + return -ETIMEDOUT; +} + +static int intel_serdes_powerup(struct net_device *ndev, void *priv_data) +{ + struct intel_priv_data *intel_priv = priv_data; + struct stmmac_priv *priv = netdev_priv(ndev); + int serdes_phy_addr = 0; + u32 data = 0; + + if (!intel_priv->mdio_adhoc_addr) + return 0; + + serdes_phy_addr = intel_priv->mdio_adhoc_addr; + + /* assert clk_req */ + data = mdiobus_read(priv->mii, serdes_phy_addr, + SERDES_GCR0); + + data |= SERDES_PLL_CLK; + + mdiobus_write(priv->mii, serdes_phy_addr, + SERDES_GCR0, data); + + /* check for clk_ack assertion */ + data = serdes_status_poll(priv, serdes_phy_addr, + SERDES_GSR0, + SERDES_PLL_CLK, + SERDES_PLL_CLK); + + if (data) { + dev_err(priv->device, "Serdes PLL clk request timeout\n"); + return data; + } + + /* assert lane reset */ + data = mdiobus_read(priv->mii, serdes_phy_addr, + SERDES_GCR0); + + data |= SERDES_RST; + + mdiobus_write(priv->mii, serdes_phy_addr, + SERDES_GCR0, data); + + /* check for assert lane reset reflection */ + data = serdes_status_poll(priv, serdes_phy_addr, + SERDES_GSR0, + SERDES_RST, + SERDES_RST); + + if (data) { + dev_err(priv->device, "Serdes assert lane reset timeout\n"); + return data; + } + + /* move power state to P0 */ + data = mdiobus_read(priv->mii, serdes_phy_addr, + SERDES_GCR0); + + data &= ~SERDES_PWR_ST_MASK; + data |= SERDES_PWR_ST_P0 << SERDES_PWR_ST_SHIFT; + + mdiobus_write(priv->mii, serdes_phy_addr, + SERDES_GCR0, data); + + /* Check for P0 state */ + data = serdes_status_poll(priv, serdes_phy_addr, + SERDES_GSR0, + SERDES_PWR_ST_MASK, + SERDES_PWR_ST_P0 << SERDES_PWR_ST_SHIFT); + + if (data) { + dev_err(priv->device, "Serdes power state P0 timeout.\n"); + return data; + } + + return 0; +} + +static void intel_serdes_powerdown(struct net_device *ndev, void *intel_data) +{ + struct intel_priv_data *intel_priv = intel_data; + struct stmmac_priv *priv = netdev_priv(ndev); + int serdes_phy_addr = 0; + u32 data = 0; + + if (!intel_priv->mdio_adhoc_addr) + return; + + serdes_phy_addr = intel_priv->mdio_adhoc_addr; + + /* move power state to P3 */ + data = mdiobus_read(priv->mii, serdes_phy_addr, + SERDES_GCR0); + + data &= ~SERDES_PWR_ST_MASK; + data |= SERDES_PWR_ST_P3 << SERDES_PWR_ST_SHIFT; + + mdiobus_write(priv->mii, serdes_phy_addr, + SERDES_GCR0, data); + + /* Check for P3 state */ + data = serdes_status_poll(priv, serdes_phy_addr, + SERDES_GSR0, + SERDES_PWR_ST_MASK, + SERDES_PWR_ST_P3 << SERDES_PWR_ST_SHIFT); + + if (data) { + dev_err(priv->device, "Serdes power state P3 timeout\n"); + return; + } + + /* de-assert clk_req */ + data = mdiobus_read(priv->mii, serdes_phy_addr, + SERDES_GCR0); + + data &= ~SERDES_PLL_CLK; + + mdiobus_write(priv->mii, serdes_phy_addr, + SERDES_GCR0, data); + + /* check for clk_ack de-assert */ + data = serdes_status_poll(priv, serdes_phy_addr, + SERDES_GSR0, + SERDES_PLL_CLK, + (u32)~SERDES_PLL_CLK); + + if (data) { + dev_err(priv->device, "Serdes PLL clk de-assert timeout\n"); + return; + } + + /* de-assert lane reset */ + data = mdiobus_read(priv->mii, serdes_phy_addr, + SERDES_GCR0); + + data &= ~SERDES_RST; + + mdiobus_write(priv->mii, serdes_phy_addr, + SERDES_GCR0, data); + + /* check for de-assert lane reset reflection */ + data = serdes_status_poll(priv, serdes_phy_addr, + SERDES_GSR0, + SERDES_RST, + (u32)~SERDES_RST); + + if (data) { + dev_err(priv->device, "Serdes de-assert lane reset timeout\n"); + return; + } +} + static void common_default_data(struct plat_stmmacenet_data *plat) { plat->clk_csr = 2; /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ @@ -189,6 +360,9 @@ static int ehl_sgmii_data(struct pci_dev *pdev, plat->phy_addr = 0; plat->phy_interface = PHY_INTERFACE_MODE_SGMII; + plat->serdes_powerup = intel_serdes_powerup; + plat->serdes_powerdown = intel_serdes_powerdown; + return ehl_common_data(pdev, plat); } @@ -233,6 +407,8 @@ static int ehl_pse0_sgmii1g_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->phy_interface = PHY_INTERFACE_MODE_SGMII; + plat->serdes_powerup = intel_serdes_powerup; + plat->serdes_powerdown = intel_serdes_powerdown; return ehl_pse0_common_data(pdev, plat); } @@ -263,6 +439,8 @@ static int ehl_pse1_sgmii1g_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->phy_interface = PHY_INTERFACE_MODE_SGMII; + plat->serdes_powerup = intel_serdes_powerup; + plat->serdes_powerdown = intel_serdes_powerdown; return ehl_pse1_common_data(pdev, plat); } @@ -291,6 +469,8 @@ static int tgl_sgmii_data(struct pci_dev *pdev, plat->bus_id = 1; plat->phy_addr = 0; plat->phy_interface = PHY_INTERFACE_MODE_SGMII; + plat->serdes_powerup = intel_serdes_powerup; + plat->serdes_powerdown = intel_serdes_powerdown; return tgl_common_data(pdev, plat); } @@ -417,11 +597,17 @@ static int intel_eth_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct stmmac_pci_info *info = (struct stmmac_pci_info *)id->driver_data; + struct intel_priv_data *intel_priv; struct plat_stmmacenet_data *plat; struct stmmac_resources res; int i; int ret; + intel_priv = devm_kzalloc(&pdev->dev, sizeof(*intel_priv), + GFP_KERNEL); + if (!intel_priv) + return -ENOMEM; + plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL); if (!plat) return -ENOMEM; @@ -457,6 +643,9 @@ static int intel_eth_pci_probe(struct pci_dev *pdev, pci_set_master(pdev); + plat->bsp_priv = intel_priv; + intel_priv->mdio_adhoc_addr = 0x15; + ret = info->setup(pdev, plat); if (ret) return ret; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h new file mode 100644 index 000000000000..e723096c0b15 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020, Intel Corporation + * DWMAC Intel header file + */ + +#ifndef __DWMAC_INTEL_H__ +#define __DWMAC_INTEL_H__ + +#define POLL_DELAY_US 8 + +/* SERDES Register */ +#define SERDES_GSR0 0x5 /* Global Status Reg0 */ +#define SERDES_GCR0 0xb /* Global Configuration Reg0 */ + +/* SERDES defines */ +#define SERDES_PLL_CLK BIT(0) /* PLL clk valid signal */ +#define SERDES_RST BIT(2) /* Serdes Reset */ +#define SERDES_PWR_ST_MASK GENMASK(6, 4) /* Serdes Power state*/ +#define SERDES_PWR_ST_SHIFT 4 +#define SERDES_PWR_ST_P0 0x0 +#define SERDES_PWR_ST_P3 0x3 + +#endif /* __DWMAC_INTEL_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e6898fd5223f..565da6498c84 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4986,6 +4986,14 @@ int stmmac_dvr_probe(struct device *device, goto error_netdev_register; } + if (priv->plat->serdes_powerup) { + ret = priv->plat->serdes_powerup(ndev, + priv->plat->bsp_priv); + + if (ret < 0) + return ret; + } + #ifdef CONFIG_DEBUG_FS stmmac_init_fs(ndev); #endif @@ -5029,6 +5037,9 @@ int stmmac_dvr_remove(struct device *dev) stmmac_stop_all_dma(priv); + if (priv->plat->serdes_powerdown) + priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv); + stmmac_mac_set(priv, priv->ioaddr, false); netif_carrier_off(ndev); unregister_netdev(ndev); @@ -5081,6 +5092,9 @@ int stmmac_suspend(struct device *dev) /* Stop TX/RX DMA */ stmmac_stop_all_dma(priv); + if (priv->plat->serdes_powerdown) + priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv); + /* Enable Power down mode by programming the PMT regs */ if (device_may_wakeup(priv->device)) { stmmac_pmt(priv, priv->hw, priv->wolopts); @@ -5143,6 +5157,7 @@ int stmmac_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct stmmac_priv *priv = netdev_priv(ndev); + int ret; if (!netif_running(ndev)) return 0; @@ -5170,6 +5185,14 @@ int stmmac_resume(struct device *dev) stmmac_mdio_reset(priv->mii); } + if (priv->plat->serdes_powerup) { + ret = priv->plat->serdes_powerup(ndev, + priv->plat->bsp_priv); + + if (ret < 0) + return ret; + } + netif_device_attach(ndev); mutex_lock(&priv->lock); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index fbafb353e9be..bd964c31d333 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -177,6 +177,8 @@ struct plat_stmmacenet_data { struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES]; struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES]; void (*fix_mac_speed)(void *priv, unsigned int speed); + int (*serdes_powerup)(struct net_device *ndev, void *priv); + void (*serdes_powerdown)(struct net_device *ndev, void *priv); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); struct mac_device_info *(*setup)(void *priv); -- cgit v1.2.3-71-gd317 From 01b2bafe57b19d9119413f138765ef57990921ce Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 14 Apr 2020 12:10:50 -0300 Subject: pnp: Use list_for_each_entry() instead of open coding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aside from good practice, this avoids a warning from gcc 10: ./include/linux/kernel.h:997:3: warning: array subscript -31 is outside array bounds of ‘struct list_head[1]’ [-Warray-bounds] 997 | ((type *)(__mptr - offsetof(type, member))); }) | ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/list.h:493:2: note: in expansion of macro ‘container_of’ 493 | container_of(ptr, type, member) | ^~~~~~~~~~~~ ./include/linux/pnp.h:275:30: note: in expansion of macro ‘list_entry’ 275 | #define global_to_pnp_dev(n) list_entry(n, struct pnp_dev, global_list) | ^~~~~~~~~~ ./include/linux/pnp.h:281:11: note: in expansion of macro ‘global_to_pnp_dev’ 281 | (dev) != global_to_pnp_dev(&pnp_global); \ | ^~~~~~~~~~~~~~~~~ arch/x86/kernel/rtc.c:189:2: note: in expansion of macro ‘pnp_for_each_dev’ 189 | pnp_for_each_dev(dev) { Because the common code doesn't cast the starting list_head to the containing struct. Signed-off-by: Jason Gunthorpe [ rjw: Whitespace adjustments ] Signed-off-by: Rafael J. Wysocki --- include/linux/pnp.h | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/pnp.h b/include/linux/pnp.h index b18dca67253d..c2a7cfbca713 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -220,10 +220,8 @@ struct pnp_card { #define global_to_pnp_card(n) list_entry(n, struct pnp_card, global_list) #define protocol_to_pnp_card(n) list_entry(n, struct pnp_card, protocol_list) #define to_pnp_card(n) container_of(n, struct pnp_card, dev) -#define pnp_for_each_card(card) \ - for((card) = global_to_pnp_card(pnp_cards.next); \ - (card) != global_to_pnp_card(&pnp_cards); \ - (card) = global_to_pnp_card((card)->global_list.next)) +#define pnp_for_each_card(card) \ + list_for_each_entry(card, &pnp_cards, global_list) struct pnp_card_link { struct pnp_card *card; @@ -276,14 +274,9 @@ struct pnp_dev { #define card_to_pnp_dev(n) list_entry(n, struct pnp_dev, card_list) #define protocol_to_pnp_dev(n) list_entry(n, struct pnp_dev, protocol_list) #define to_pnp_dev(n) container_of(n, struct pnp_dev, dev) -#define pnp_for_each_dev(dev) \ - for((dev) = global_to_pnp_dev(pnp_global.next); \ - (dev) != global_to_pnp_dev(&pnp_global); \ - (dev) = global_to_pnp_dev((dev)->global_list.next)) -#define card_for_each_dev(card,dev) \ - for((dev) = card_to_pnp_dev((card)->devices.next); \ - (dev) != card_to_pnp_dev(&(card)->devices); \ - (dev) = card_to_pnp_dev((dev)->card_list.next)) +#define pnp_for_each_dev(dev) list_for_each_entry(dev, &pnp_global, global_list) +#define card_for_each_dev(card, dev) \ + list_for_each_entry(dev, &(card)->devices, card_list) #define pnp_dev_name(dev) (dev)->name static inline void *pnp_get_drvdata(struct pnp_dev *pdev) @@ -437,14 +430,10 @@ struct pnp_protocol { }; #define to_pnp_protocol(n) list_entry(n, struct pnp_protocol, protocol_list) -#define protocol_for_each_card(protocol,card) \ - for((card) = protocol_to_pnp_card((protocol)->cards.next); \ - (card) != protocol_to_pnp_card(&(protocol)->cards); \ - (card) = protocol_to_pnp_card((card)->protocol_list.next)) -#define protocol_for_each_dev(protocol,dev) \ - for((dev) = protocol_to_pnp_dev((protocol)->devices.next); \ - (dev) != protocol_to_pnp_dev(&(protocol)->devices); \ - (dev) = protocol_to_pnp_dev((dev)->protocol_list.next)) +#define protocol_for_each_card(protocol, card) \ + list_for_each_entry(card, &(protocol)->cards, protocol_list) +#define protocol_for_each_dev(protocol, dev) \ + list_for_each_entry(dev, &(protocol)->devices, protocol_list) extern struct bus_type pnp_bus_type; -- cgit v1.2.3-71-gd317 From 9175d3f38816835b0801bacbf4f6aff1a1672b71 Mon Sep 17 00:00:00 2001 From: Maciej Żenczykowski Date: Mon, 20 Apr 2020 11:25:07 -0700 Subject: ipv6: ndisc: RFC-ietf-6man-ra-pref64-09 is now published as RFC8781 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See: https://www.rfc-editor.org/authors/rfc8781.txt Cc: Erik Kline Cc: Jen Linkova Cc: Lorenzo Colitti Cc: Michael Haro Signed-off-by: Maciej Żenczykowski Fixes: c24a77edc9a7 ("ipv6: ndisc: add support for 'PREF64' dns64 prefix identifier") Signed-off-by: David S. Miller --- include/net/ndisc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 7d107113f988..9205a76d967a 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -41,7 +41,7 @@ enum { ND_OPT_DNSSL = 31, /* RFC6106 */ ND_OPT_6CO = 34, /* RFC6775 */ ND_OPT_CAPTIVE_PORTAL = 37, /* RFC7710 */ - ND_OPT_PREF64 = 38, /* RFC-ietf-6man-ra-pref64-09 */ + ND_OPT_PREF64 = 38, /* RFC8781 */ __ND_OPT_MAX }; -- cgit v1.2.3-71-gd317 From f081bbb3fd03f949bcdc5aed95a827d7c65e0f30 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 22 Apr 2020 16:18:18 +0300 Subject: hyper-v: Remove internal types from UAPI header The uuid_le mistakenly comes to be an UAPI type. Since it's luckily not used by Hyper-V APIs, we may replace with POD types, i.e. __u8 array. Note, previously shared uuid_be had been removed from UAPI few releases ago. This is a continuation of that process towards removing uuid_le one. Note, there is no ABI change! Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200422131818.23088-1-andriy.shevchenko@linux.intel.com Signed-off-by: Wei Liu --- include/uapi/linux/hyperv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h index 991b2b7ada7a..8f24404ad04f 100644 --- a/include/uapi/linux/hyperv.h +++ b/include/uapi/linux/hyperv.h @@ -119,8 +119,8 @@ enum hv_fcopy_op { struct hv_fcopy_hdr { __u32 operation; - uuid_le service_id0; /* currently unused */ - uuid_le service_id1; /* currently unused */ + __u8 service_id0[16]; /* currently unused */ + __u8 service_id1[16]; /* currently unused */ } __attribute__((packed)); #define OVER_WRITE 0x1 -- cgit v1.2.3-71-gd317 From 62d0fd591db1f9dcf68fb963b3a94af085a6b166 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 22 Apr 2020 01:13:55 +0900 Subject: arch: split MODULE_ARCH_VERMAGIC definitions out to As the bug report [1] pointed out, must be included after . I believe we should not impose any include order restriction. We often sort include directives alphabetically, but it is just coding style convention. Technically, we can include header files in any order by making every header self-contained. Currently, arch-specific MODULE_ARCH_VERMAGIC is defined in , which is not included from . Hence, the straight-forward fix-up would be as follows: |--- a/include/linux/vermagic.h |+++ b/include/linux/vermagic.h |@@ -1,5 +1,6 @@ | /* SPDX-License-Identifier: GPL-2.0 */ | #include |+#include | | /* Simply sanity version stamp for modules. */ | #ifdef CONFIG_SMP This works enough, but for further cleanups, I split MODULE_ARCH_VERMAGIC definitions into . With this, and will be orthogonal, and the location of MODULE_ARCH_VERMAGIC definitions will be consistent. For arc and ia64, MODULE_PROC_FAMILY is only used for defining MODULE_ARCH_VERMAGIC. I squashed it. For hexagon, nds32, and xtensa, I removed entirely because they contained nothing but MODULE_ARCH_VERMAGIC definition. Kbuild will automatically generate at build-time, wrapping . [1] https://lore.kernel.org/lkml/20200411155623.GA22175@zn.tnic Reported-by: Borislav Petkov Signed-off-by: Masahiro Yamada Acked-by: Jessica Yu --- arch/arc/include/asm/module.h | 5 --- arch/arc/include/asm/vermagic.h | 8 +++++ arch/arm/include/asm/module.h | 24 ------------- arch/arm/include/asm/vermagic.h | 31 +++++++++++++++++ arch/arm64/include/asm/module.h | 2 -- arch/arm64/include/asm/vermagic.h | 10 ++++++ arch/hexagon/include/asm/module.h | 13 ------- arch/hexagon/include/asm/vermagic.h | 13 +++++++ arch/ia64/include/asm/module.h | 4 --- arch/ia64/include/asm/vermagic.h | 15 ++++++++ arch/mips/include/asm/module.h | 61 --------------------------------- arch/mips/include/asm/vermagic.h | 66 +++++++++++++++++++++++++++++++++++ arch/nds32/include/asm/module.h | 11 ------ arch/nds32/include/asm/vermagic.h | 9 +++++ arch/powerpc/include/asm/module.h | 18 ---------- arch/powerpc/include/asm/vermagic.h | 20 +++++++++++ arch/riscv/include/asm/module.h | 2 -- arch/riscv/include/asm/vermagic.h | 9 +++++ arch/sh/include/asm/module.h | 28 --------------- arch/sh/include/asm/vermagic.h | 34 +++++++++++++++++++ arch/x86/include/asm/module.h | 60 -------------------------------- arch/x86/include/asm/vermagic.h | 68 +++++++++++++++++++++++++++++++++++++ arch/xtensa/include/asm/module.h | 20 ----------- arch/xtensa/include/asm/vermagic.h | 17 ++++++++++ include/asm-generic/Kbuild | 1 + include/asm-generic/vermagic.h | 7 ++++ include/linux/vermagic.h | 8 +++-- 27 files changed, 313 insertions(+), 251 deletions(-) create mode 100644 arch/arc/include/asm/vermagic.h create mode 100644 arch/arm/include/asm/vermagic.h create mode 100644 arch/arm64/include/asm/vermagic.h delete mode 100644 arch/hexagon/include/asm/module.h create mode 100644 arch/hexagon/include/asm/vermagic.h create mode 100644 arch/ia64/include/asm/vermagic.h create mode 100644 arch/mips/include/asm/vermagic.h delete mode 100644 arch/nds32/include/asm/module.h create mode 100644 arch/nds32/include/asm/vermagic.h create mode 100644 arch/powerpc/include/asm/vermagic.h create mode 100644 arch/riscv/include/asm/vermagic.h create mode 100644 arch/sh/include/asm/vermagic.h create mode 100644 arch/x86/include/asm/vermagic.h delete mode 100644 arch/xtensa/include/asm/module.h create mode 100644 arch/xtensa/include/asm/vermagic.h create mode 100644 include/asm-generic/vermagic.h (limited to 'include') diff --git a/arch/arc/include/asm/module.h b/arch/arc/include/asm/module.h index 48f13a4ace4b..f534a1fef070 100644 --- a/arch/arc/include/asm/module.h +++ b/arch/arc/include/asm/module.h @@ -3,7 +3,6 @@ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) * * Amit Bhor, Sameer Dhavale: Codito Technologies 2004 - */ #ifndef _ASM_ARC_MODULE_H @@ -19,8 +18,4 @@ struct mod_arch_specific { const char *secstr; }; -#define MODULE_PROC_FAMILY "ARC700" - -#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY - #endif /* _ASM_ARC_MODULE_H */ diff --git a/arch/arc/include/asm/vermagic.h b/arch/arc/include/asm/vermagic.h new file mode 100644 index 000000000000..a10257d2c62c --- /dev/null +++ b/arch/arc/include/asm/vermagic.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#define MODULE_ARCH_VERMAGIC "ARC700" + +#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index 182163b55546..4b0df09cbe67 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -37,30 +37,6 @@ struct mod_arch_specific { struct module; u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val); -/* - * Add the ARM architecture version to the version magic string - */ -#define MODULE_ARCH_VERMAGIC_ARMVSN "ARMv" __stringify(__LINUX_ARM_ARCH__) " " - -/* Add __virt_to_phys patching state as well */ -#ifdef CONFIG_ARM_PATCH_PHYS_VIRT -#define MODULE_ARCH_VERMAGIC_P2V "p2v8 " -#else -#define MODULE_ARCH_VERMAGIC_P2V "" -#endif - -/* Add instruction set architecture tag to distinguish ARM/Thumb kernels */ -#ifdef CONFIG_THUMB2_KERNEL -#define MODULE_ARCH_VERMAGIC_ARMTHUMB "thumb2 " -#else -#define MODULE_ARCH_VERMAGIC_ARMTHUMB "" -#endif - -#define MODULE_ARCH_VERMAGIC \ - MODULE_ARCH_VERMAGIC_ARMVSN \ - MODULE_ARCH_VERMAGIC_ARMTHUMB \ - MODULE_ARCH_VERMAGIC_P2V - #ifdef CONFIG_THUMB2_KERNEL #define HAVE_ARCH_KALLSYMS_SYMBOL_VALUE static inline unsigned long kallsyms_symbol_value(const Elf_Sym *sym) diff --git a/arch/arm/include/asm/vermagic.h b/arch/arm/include/asm/vermagic.h new file mode 100644 index 000000000000..62ce94e26a63 --- /dev/null +++ b/arch/arm/include/asm/vermagic.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#include + +/* + * Add the ARM architecture version to the version magic string + */ +#define MODULE_ARCH_VERMAGIC_ARMVSN "ARMv" __stringify(__LINUX_ARM_ARCH__) " " + +/* Add __virt_to_phys patching state as well */ +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT +#define MODULE_ARCH_VERMAGIC_P2V "p2v8 " +#else +#define MODULE_ARCH_VERMAGIC_P2V "" +#endif + +/* Add instruction set architecture tag to distinguish ARM/Thumb kernels */ +#ifdef CONFIG_THUMB2_KERNEL +#define MODULE_ARCH_VERMAGIC_ARMTHUMB "thumb2 " +#else +#define MODULE_ARCH_VERMAGIC_ARMTHUMB "" +#endif + +#define MODULE_ARCH_VERMAGIC \ + MODULE_ARCH_VERMAGIC_ARMVSN \ + MODULE_ARCH_VERMAGIC_ARMTHUMB \ + MODULE_ARCH_VERMAGIC_P2V + +#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 1e93de68c044..4e7fa2623896 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -7,8 +7,6 @@ #include -#define MODULE_ARCH_VERMAGIC "aarch64" - #ifdef CONFIG_ARM64_MODULE_PLTS struct mod_plt_sec { int plt_shndx; diff --git a/arch/arm64/include/asm/vermagic.h b/arch/arm64/include/asm/vermagic.h new file mode 100644 index 000000000000..a1eec6a000f1 --- /dev/null +++ b/arch/arm64/include/asm/vermagic.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 ARM Ltd. + */ +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#define MODULE_ARCH_VERMAGIC "aarch64" + +#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/hexagon/include/asm/module.h b/arch/hexagon/include/asm/module.h deleted file mode 100644 index e8de4fe03543..000000000000 --- a/arch/hexagon/include/asm/module.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - */ - -#ifndef _ASM_MODULE_H -#define _ASM_MODULE_H - -#include - -#define MODULE_ARCH_VERMAGIC __stringify(PROCESSOR_MODEL_NAME) " " - -#endif diff --git a/arch/hexagon/include/asm/vermagic.h b/arch/hexagon/include/asm/vermagic.h new file mode 100644 index 000000000000..0e8dedc8c486 --- /dev/null +++ b/arch/hexagon/include/asm/vermagic.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. + */ + +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#include + +#define MODULE_ARCH_VERMAGIC __stringify(PROCESSOR_MODEL_NAME) " " + +#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/ia64/include/asm/module.h b/arch/ia64/include/asm/module.h index f319144260ce..5a29652e6def 100644 --- a/arch/ia64/include/asm/module.h +++ b/arch/ia64/include/asm/module.h @@ -26,10 +26,6 @@ struct mod_arch_specific { unsigned int next_got_entry; /* index of next available got entry */ }; -#define MODULE_PROC_FAMILY "ia64" -#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY \ - "gcc-" __stringify(__GNUC__) "." __stringify(__GNUC_MINOR__) - #define ARCH_SHF_SMALL SHF_IA_64_SHORT #endif /* _ASM_IA64_MODULE_H */ diff --git a/arch/ia64/include/asm/vermagic.h b/arch/ia64/include/asm/vermagic.h new file mode 100644 index 000000000000..29c7424f4c25 --- /dev/null +++ b/arch/ia64/include/asm/vermagic.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#include + +#define MODULE_ARCH_VERMAGIC "ia64" \ + "gcc-" __stringify(__GNUC__) "." __stringify(__GNUC_MINOR__) + +#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h index 9846047b3d3d..724a0882576b 100644 --- a/arch/mips/include/asm/module.h +++ b/arch/mips/include/asm/module.h @@ -83,65 +83,4 @@ search_module_dbetables(unsigned long addr) } #endif -#ifdef CONFIG_CPU_BMIPS -#define MODULE_PROC_FAMILY "BMIPS " -#elif defined CONFIG_CPU_MIPS32_R1 -#define MODULE_PROC_FAMILY "MIPS32_R1 " -#elif defined CONFIG_CPU_MIPS32_R2 -#define MODULE_PROC_FAMILY "MIPS32_R2 " -#elif defined CONFIG_CPU_MIPS32_R6 -#define MODULE_PROC_FAMILY "MIPS32_R6 " -#elif defined CONFIG_CPU_MIPS64_R1 -#define MODULE_PROC_FAMILY "MIPS64_R1 " -#elif defined CONFIG_CPU_MIPS64_R2 -#define MODULE_PROC_FAMILY "MIPS64_R2 " -#elif defined CONFIG_CPU_MIPS64_R6 -#define MODULE_PROC_FAMILY "MIPS64_R6 " -#elif defined CONFIG_CPU_R3000 -#define MODULE_PROC_FAMILY "R3000 " -#elif defined CONFIG_CPU_TX39XX -#define MODULE_PROC_FAMILY "TX39XX " -#elif defined CONFIG_CPU_VR41XX -#define MODULE_PROC_FAMILY "VR41XX " -#elif defined CONFIG_CPU_R4X00 -#define MODULE_PROC_FAMILY "R4X00 " -#elif defined CONFIG_CPU_TX49XX -#define MODULE_PROC_FAMILY "TX49XX " -#elif defined CONFIG_CPU_R5000 -#define MODULE_PROC_FAMILY "R5000 " -#elif defined CONFIG_CPU_R5500 -#define MODULE_PROC_FAMILY "R5500 " -#elif defined CONFIG_CPU_NEVADA -#define MODULE_PROC_FAMILY "NEVADA " -#elif defined CONFIG_CPU_R10000 -#define MODULE_PROC_FAMILY "R10000 " -#elif defined CONFIG_CPU_RM7000 -#define MODULE_PROC_FAMILY "RM7000 " -#elif defined CONFIG_CPU_SB1 -#define MODULE_PROC_FAMILY "SB1 " -#elif defined CONFIG_CPU_LOONGSON32 -#define MODULE_PROC_FAMILY "LOONGSON32 " -#elif defined CONFIG_CPU_LOONGSON2EF -#define MODULE_PROC_FAMILY "LOONGSON2EF " -#elif defined CONFIG_CPU_LOONGSON64 -#define MODULE_PROC_FAMILY "LOONGSON64 " -#elif defined CONFIG_CPU_CAVIUM_OCTEON -#define MODULE_PROC_FAMILY "OCTEON " -#elif defined CONFIG_CPU_XLR -#define MODULE_PROC_FAMILY "XLR " -#elif defined CONFIG_CPU_XLP -#define MODULE_PROC_FAMILY "XLP " -#else -#error MODULE_PROC_FAMILY undefined for your processor configuration -#endif - -#ifdef CONFIG_32BIT -#define MODULE_KERNEL_TYPE "32BIT " -#elif defined CONFIG_64BIT -#define MODULE_KERNEL_TYPE "64BIT " -#endif - -#define MODULE_ARCH_VERMAGIC \ - MODULE_PROC_FAMILY MODULE_KERNEL_TYPE - #endif /* _ASM_MODULE_H */ diff --git a/arch/mips/include/asm/vermagic.h b/arch/mips/include/asm/vermagic.h new file mode 100644 index 000000000000..24dc3d35161c --- /dev/null +++ b/arch/mips/include/asm/vermagic.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#ifdef CONFIG_CPU_BMIPS +#define MODULE_PROC_FAMILY "BMIPS " +#elif defined CONFIG_CPU_MIPS32_R1 +#define MODULE_PROC_FAMILY "MIPS32_R1 " +#elif defined CONFIG_CPU_MIPS32_R2 +#define MODULE_PROC_FAMILY "MIPS32_R2 " +#elif defined CONFIG_CPU_MIPS32_R6 +#define MODULE_PROC_FAMILY "MIPS32_R6 " +#elif defined CONFIG_CPU_MIPS64_R1 +#define MODULE_PROC_FAMILY "MIPS64_R1 " +#elif defined CONFIG_CPU_MIPS64_R2 +#define MODULE_PROC_FAMILY "MIPS64_R2 " +#elif defined CONFIG_CPU_MIPS64_R6 +#define MODULE_PROC_FAMILY "MIPS64_R6 " +#elif defined CONFIG_CPU_R3000 +#define MODULE_PROC_FAMILY "R3000 " +#elif defined CONFIG_CPU_TX39XX +#define MODULE_PROC_FAMILY "TX39XX " +#elif defined CONFIG_CPU_VR41XX +#define MODULE_PROC_FAMILY "VR41XX " +#elif defined CONFIG_CPU_R4X00 +#define MODULE_PROC_FAMILY "R4X00 " +#elif defined CONFIG_CPU_TX49XX +#define MODULE_PROC_FAMILY "TX49XX " +#elif defined CONFIG_CPU_R5000 +#define MODULE_PROC_FAMILY "R5000 " +#elif defined CONFIG_CPU_R5500 +#define MODULE_PROC_FAMILY "R5500 " +#elif defined CONFIG_CPU_NEVADA +#define MODULE_PROC_FAMILY "NEVADA " +#elif defined CONFIG_CPU_R10000 +#define MODULE_PROC_FAMILY "R10000 " +#elif defined CONFIG_CPU_RM7000 +#define MODULE_PROC_FAMILY "RM7000 " +#elif defined CONFIG_CPU_SB1 +#define MODULE_PROC_FAMILY "SB1 " +#elif defined CONFIG_CPU_LOONGSON32 +#define MODULE_PROC_FAMILY "LOONGSON32 " +#elif defined CONFIG_CPU_LOONGSON2EF +#define MODULE_PROC_FAMILY "LOONGSON2EF " +#elif defined CONFIG_CPU_LOONGSON64 +#define MODULE_PROC_FAMILY "LOONGSON64 " +#elif defined CONFIG_CPU_CAVIUM_OCTEON +#define MODULE_PROC_FAMILY "OCTEON " +#elif defined CONFIG_CPU_XLR +#define MODULE_PROC_FAMILY "XLR " +#elif defined CONFIG_CPU_XLP +#define MODULE_PROC_FAMILY "XLP " +#else +#error MODULE_PROC_FAMILY undefined for your processor configuration +#endif + +#ifdef CONFIG_32BIT +#define MODULE_KERNEL_TYPE "32BIT " +#elif defined CONFIG_64BIT +#define MODULE_KERNEL_TYPE "64BIT " +#endif + +#define MODULE_ARCH_VERMAGIC \ + MODULE_PROC_FAMILY MODULE_KERNEL_TYPE + +#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/nds32/include/asm/module.h b/arch/nds32/include/asm/module.h deleted file mode 100644 index a3a08e993c65..000000000000 --- a/arch/nds32/include/asm/module.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -// Copyright (C) 2005-2017 Andes Technology Corporation - -#ifndef _ASM_NDS32_MODULE_H -#define _ASM_NDS32_MODULE_H - -#include - -#define MODULE_ARCH_VERMAGIC "NDS32v3" - -#endif /* _ASM_NDS32_MODULE_H */ diff --git a/arch/nds32/include/asm/vermagic.h b/arch/nds32/include/asm/vermagic.h new file mode 100644 index 000000000000..f772e7ba33f1 --- /dev/null +++ b/arch/nds32/include/asm/vermagic.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2005-2017 Andes Technology Corporation + +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#define MODULE_ARCH_VERMAGIC "NDS32v3" + +#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 356658711a86..5398bfc465b4 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -3,28 +3,10 @@ #define _ASM_POWERPC_MODULE_H #ifdef __KERNEL__ -/* - */ - #include #include #include - -#ifdef CONFIG_MPROFILE_KERNEL -#define MODULE_ARCH_VERMAGIC_FTRACE "mprofile-kernel " -#else -#define MODULE_ARCH_VERMAGIC_FTRACE "" -#endif - -#ifdef CONFIG_RELOCATABLE -#define MODULE_ARCH_VERMAGIC_RELOCATABLE "relocatable " -#else -#define MODULE_ARCH_VERMAGIC_RELOCATABLE "" -#endif - -#define MODULE_ARCH_VERMAGIC MODULE_ARCH_VERMAGIC_FTRACE MODULE_ARCH_VERMAGIC_RELOCATABLE - #ifndef __powerpc64__ /* * Thanks to Paul M for explaining this. diff --git a/arch/powerpc/include/asm/vermagic.h b/arch/powerpc/include/asm/vermagic.h new file mode 100644 index 000000000000..b054a8576e5d --- /dev/null +++ b/arch/powerpc/include/asm/vermagic.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#ifdef CONFIG_MPROFILE_KERNEL +#define MODULE_ARCH_VERMAGIC_FTRACE "mprofile-kernel " +#else +#define MODULE_ARCH_VERMAGIC_FTRACE "" +#endif + +#ifdef CONFIG_RELOCATABLE +#define MODULE_ARCH_VERMAGIC_RELOCATABLE "relocatable " +#else +#define MODULE_ARCH_VERMAGIC_RELOCATABLE "" +#endif + +#define MODULE_ARCH_VERMAGIC \ + MODULE_ARCH_VERMAGIC_FTRACE MODULE_ARCH_VERMAGIC_RELOCATABLE + +#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/riscv/include/asm/module.h b/arch/riscv/include/asm/module.h index 46202dad365d..76aa96a9fc08 100644 --- a/arch/riscv/include/asm/module.h +++ b/arch/riscv/include/asm/module.h @@ -6,8 +6,6 @@ #include -#define MODULE_ARCH_VERMAGIC "riscv" - struct module; unsigned long module_emit_got_entry(struct module *mod, unsigned long val); unsigned long module_emit_plt_entry(struct module *mod, unsigned long val); diff --git a/arch/riscv/include/asm/vermagic.h b/arch/riscv/include/asm/vermagic.h new file mode 100644 index 000000000000..7b9441a57466 --- /dev/null +++ b/arch/riscv/include/asm/vermagic.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2017 Andes Technology Corporation */ + +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#define MODULE_ARCH_VERMAGIC "riscv" + +#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/sh/include/asm/module.h b/arch/sh/include/asm/module.h index 9f38fb35fe96..337663a028db 100644 --- a/arch/sh/include/asm/module.h +++ b/arch/sh/include/asm/module.h @@ -11,32 +11,4 @@ struct mod_arch_specific { }; #endif -#ifdef CONFIG_CPU_LITTLE_ENDIAN -# ifdef CONFIG_CPU_SH2 -# define MODULE_PROC_FAMILY "SH2LE " -# elif defined CONFIG_CPU_SH3 -# define MODULE_PROC_FAMILY "SH3LE " -# elif defined CONFIG_CPU_SH4 -# define MODULE_PROC_FAMILY "SH4LE " -# elif defined CONFIG_CPU_SH5 -# define MODULE_PROC_FAMILY "SH5LE " -# else -# error unknown processor family -# endif -#else -# ifdef CONFIG_CPU_SH2 -# define MODULE_PROC_FAMILY "SH2BE " -# elif defined CONFIG_CPU_SH3 -# define MODULE_PROC_FAMILY "SH3BE " -# elif defined CONFIG_CPU_SH4 -# define MODULE_PROC_FAMILY "SH4BE " -# elif defined CONFIG_CPU_SH5 -# define MODULE_PROC_FAMILY "SH5BE " -# else -# error unknown processor family -# endif -#endif - -#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY - #endif /* _ASM_SH_MODULE_H */ diff --git a/arch/sh/include/asm/vermagic.h b/arch/sh/include/asm/vermagic.h new file mode 100644 index 000000000000..13d8eaa9188e --- /dev/null +++ b/arch/sh/include/asm/vermagic.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#ifdef CONFIG_CPU_LITTLE_ENDIAN +# ifdef CONFIG_CPU_SH2 +# define MODULE_PROC_FAMILY "SH2LE " +# elif defined CONFIG_CPU_SH3 +# define MODULE_PROC_FAMILY "SH3LE " +# elif defined CONFIG_CPU_SH4 +# define MODULE_PROC_FAMILY "SH4LE " +# elif defined CONFIG_CPU_SH5 +# define MODULE_PROC_FAMILY "SH5LE " +# else +# error unknown processor family +# endif +#else +# ifdef CONFIG_CPU_SH2 +# define MODULE_PROC_FAMILY "SH2BE " +# elif defined CONFIG_CPU_SH3 +# define MODULE_PROC_FAMILY "SH3BE " +# elif defined CONFIG_CPU_SH4 +# define MODULE_PROC_FAMILY "SH4BE " +# elif defined CONFIG_CPU_SH5 +# define MODULE_PROC_FAMILY "SH5BE " +# else +# error unknown processor family +# endif +#endif + +#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY + +#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index c215d2762488..e988bac0a4a1 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -13,64 +13,4 @@ struct mod_arch_specific { #endif }; -#ifdef CONFIG_X86_64 -/* X86_64 does not define MODULE_PROC_FAMILY */ -#elif defined CONFIG_M486SX -#define MODULE_PROC_FAMILY "486SX " -#elif defined CONFIG_M486 -#define MODULE_PROC_FAMILY "486 " -#elif defined CONFIG_M586 -#define MODULE_PROC_FAMILY "586 " -#elif defined CONFIG_M586TSC -#define MODULE_PROC_FAMILY "586TSC " -#elif defined CONFIG_M586MMX -#define MODULE_PROC_FAMILY "586MMX " -#elif defined CONFIG_MCORE2 -#define MODULE_PROC_FAMILY "CORE2 " -#elif defined CONFIG_MATOM -#define MODULE_PROC_FAMILY "ATOM " -#elif defined CONFIG_M686 -#define MODULE_PROC_FAMILY "686 " -#elif defined CONFIG_MPENTIUMII -#define MODULE_PROC_FAMILY "PENTIUMII " -#elif defined CONFIG_MPENTIUMIII -#define MODULE_PROC_FAMILY "PENTIUMIII " -#elif defined CONFIG_MPENTIUMM -#define MODULE_PROC_FAMILY "PENTIUMM " -#elif defined CONFIG_MPENTIUM4 -#define MODULE_PROC_FAMILY "PENTIUM4 " -#elif defined CONFIG_MK6 -#define MODULE_PROC_FAMILY "K6 " -#elif defined CONFIG_MK7 -#define MODULE_PROC_FAMILY "K7 " -#elif defined CONFIG_MK8 -#define MODULE_PROC_FAMILY "K8 " -#elif defined CONFIG_MELAN -#define MODULE_PROC_FAMILY "ELAN " -#elif defined CONFIG_MCRUSOE -#define MODULE_PROC_FAMILY "CRUSOE " -#elif defined CONFIG_MEFFICEON -#define MODULE_PROC_FAMILY "EFFICEON " -#elif defined CONFIG_MWINCHIPC6 -#define MODULE_PROC_FAMILY "WINCHIPC6 " -#elif defined CONFIG_MWINCHIP3D -#define MODULE_PROC_FAMILY "WINCHIP3D " -#elif defined CONFIG_MCYRIXIII -#define MODULE_PROC_FAMILY "CYRIXIII " -#elif defined CONFIG_MVIAC3_2 -#define MODULE_PROC_FAMILY "VIAC3-2 " -#elif defined CONFIG_MVIAC7 -#define MODULE_PROC_FAMILY "VIAC7 " -#elif defined CONFIG_MGEODEGX1 -#define MODULE_PROC_FAMILY "GEODEGX1 " -#elif defined CONFIG_MGEODE_LX -#define MODULE_PROC_FAMILY "GEODE " -#else -#error unknown processor family -#endif - -#ifdef CONFIG_X86_32 -# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY -#endif - #endif /* _ASM_X86_MODULE_H */ diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h new file mode 100644 index 000000000000..75884d2cdec3 --- /dev/null +++ b/arch/x86/include/asm/vermagic.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#ifdef CONFIG_X86_64 +/* X86_64 does not define MODULE_PROC_FAMILY */ +#elif defined CONFIG_M486SX +#define MODULE_PROC_FAMILY "486SX " +#elif defined CONFIG_M486 +#define MODULE_PROC_FAMILY "486 " +#elif defined CONFIG_M586 +#define MODULE_PROC_FAMILY "586 " +#elif defined CONFIG_M586TSC +#define MODULE_PROC_FAMILY "586TSC " +#elif defined CONFIG_M586MMX +#define MODULE_PROC_FAMILY "586MMX " +#elif defined CONFIG_MCORE2 +#define MODULE_PROC_FAMILY "CORE2 " +#elif defined CONFIG_MATOM +#define MODULE_PROC_FAMILY "ATOM " +#elif defined CONFIG_M686 +#define MODULE_PROC_FAMILY "686 " +#elif defined CONFIG_MPENTIUMII +#define MODULE_PROC_FAMILY "PENTIUMII " +#elif defined CONFIG_MPENTIUMIII +#define MODULE_PROC_FAMILY "PENTIUMIII " +#elif defined CONFIG_MPENTIUMM +#define MODULE_PROC_FAMILY "PENTIUMM " +#elif defined CONFIG_MPENTIUM4 +#define MODULE_PROC_FAMILY "PENTIUM4 " +#elif defined CONFIG_MK6 +#define MODULE_PROC_FAMILY "K6 " +#elif defined CONFIG_MK7 +#define MODULE_PROC_FAMILY "K7 " +#elif defined CONFIG_MK8 +#define MODULE_PROC_FAMILY "K8 " +#elif defined CONFIG_MELAN +#define MODULE_PROC_FAMILY "ELAN " +#elif defined CONFIG_MCRUSOE +#define MODULE_PROC_FAMILY "CRUSOE " +#elif defined CONFIG_MEFFICEON +#define MODULE_PROC_FAMILY "EFFICEON " +#elif defined CONFIG_MWINCHIPC6 +#define MODULE_PROC_FAMILY "WINCHIPC6 " +#elif defined CONFIG_MWINCHIP3D +#define MODULE_PROC_FAMILY "WINCHIP3D " +#elif defined CONFIG_MCYRIXIII +#define MODULE_PROC_FAMILY "CYRIXIII " +#elif defined CONFIG_MVIAC3_2 +#define MODULE_PROC_FAMILY "VIAC3-2 " +#elif defined CONFIG_MVIAC7 +#define MODULE_PROC_FAMILY "VIAC7 " +#elif defined CONFIG_MGEODEGX1 +#define MODULE_PROC_FAMILY "GEODEGX1 " +#elif defined CONFIG_MGEODE_LX +#define MODULE_PROC_FAMILY "GEODE " +#else +#error unknown processor family +#endif + +#ifdef CONFIG_X86_32 +# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY +#else +# define MODULE_ARCH_VERMAGIC "" +#endif + +#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/xtensa/include/asm/module.h b/arch/xtensa/include/asm/module.h deleted file mode 100644 index 488b40c6f9b9..000000000000 --- a/arch/xtensa/include/asm/module.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * include/asm-xtensa/module.h - * - * This file contains the module code specific to the Xtensa architecture. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2001 - 2005 Tensilica Inc. - */ - -#ifndef _XTENSA_MODULE_H -#define _XTENSA_MODULE_H - -#define MODULE_ARCH_VERMAGIC "xtensa-" __stringify(XCHAL_CORE_ID) " " - -#include - -#endif /* _XTENSA_MODULE_H */ diff --git a/arch/xtensa/include/asm/vermagic.h b/arch/xtensa/include/asm/vermagic.h new file mode 100644 index 000000000000..6d9c670e4ba9 --- /dev/null +++ b/arch/xtensa/include/asm/vermagic.h @@ -0,0 +1,17 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2001 - 2005 Tensilica Inc. + */ + +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#include +#include + +#define MODULE_ARCH_VERMAGIC "xtensa-" __stringify(XCHAL_CORE_ID) " " + +#endif /* _ASM_VERMAGIC_H */ diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 36341dfded70..44ec80e70518 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -56,6 +56,7 @@ mandatory-y += topology.h mandatory-y += trace_clock.h mandatory-y += uaccess.h mandatory-y += unaligned.h +mandatory-y += vermagic.h mandatory-y += vga.h mandatory-y += word-at-a-time.h mandatory-y += xor.h diff --git a/include/asm-generic/vermagic.h b/include/asm-generic/vermagic.h new file mode 100644 index 000000000000..084274a1219e --- /dev/null +++ b/include/asm-generic/vermagic.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_GENERIC_VERMAGIC_H +#define _ASM_GENERIC_VERMAGIC_H + +#define MODULE_ARCH_VERMAGIC "" + +#endif /* _ASM_GENERIC_VERMAGIC_H */ diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index 9aced11e9000..dc236577b92f 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -1,5 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VERMAGIC_H +#define _LINUX_VERMAGIC_H + #include +#include /* Simply sanity version stamp for modules. */ #ifdef CONFIG_SMP @@ -24,9 +28,6 @@ #else #define MODULE_VERMAGIC_MODVERSIONS "" #endif -#ifndef MODULE_ARCH_VERMAGIC -#define MODULE_ARCH_VERMAGIC "" -#endif #ifdef RANDSTRUCT_PLUGIN #include #define MODULE_RANDSTRUCT_PLUGIN "RANDSTRUCT_PLUGIN_" RANDSTRUCT_HASHED_SEED @@ -41,3 +42,4 @@ MODULE_ARCH_VERMAGIC \ MODULE_RANDSTRUCT_PLUGIN +#endif /* _LINUX_VERMAGIC_H */ -- cgit v1.2.3-71-gd317 From a2806ef77ff9a965a70d6c194bb3a4801bbdb07d Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 13 Apr 2020 18:32:46 +0300 Subject: tracing: Remove DECLARE_TRACE_NOARGS This macro was intentionally broken so that the kernel code is not poluted with such noargs macro used simply as markers. This use case can be satisfied by using dummy no inline functions. Just remove it. Link: http://lkml.kernel.org/r/20200413153246.8511-1-nborisov@suse.com Signed-off-by: Nikolay Borisov Signed-off-by: Steven Rostedt (VMware) --- include/linux/tracepoint.h | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 1fb11daa5c53..a1fecf311621 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -156,8 +156,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * Note, the proto and args passed in includes "__data" as the first parameter. * The reason for this is to handle the "void" prototype. If a tracepoint * has a "void" prototype, then it is invalid to declare a function - * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just - * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto". + * as "(void *, void)". */ #define __DO_TRACE(tp, proto, args, cond, rcuidle) \ do { \ @@ -373,25 +372,6 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) # define __tracepoint_string #endif -/* - * The need for the DECLARE_TRACE_NOARGS() is to handle the prototype - * (void). "void" is a special value in a function prototype and can - * not be combined with other arguments. Since the DECLARE_TRACE() - * macro adds a data element at the beginning of the prototype, - * we need a way to differentiate "(void *data, proto)" from - * "(void *data, void)". The second prototype is invalid. - * - * DECLARE_TRACE_NOARGS() passes "void" as the tracepoint prototype - * and "void *__data" as the callback prototype. - * - * DECLARE_TRACE() passes "proto" as the tracepoint protoype and - * "void *__data, proto" as the callback prototype. - */ -#define DECLARE_TRACE_NOARGS(name) \ - __DECLARE_TRACE(name, void, , \ - cpu_online(raw_smp_processor_id()), \ - void *__data, __data) - #define DECLARE_TRACE(name, proto, args) \ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ cpu_online(raw_smp_processor_id()), \ -- cgit v1.2.3-71-gd317 From 6cb5f3ea4654faf8c28b901266e960b1a4787b26 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 Apr 2020 11:13:49 +0200 Subject: mac80211: populate debugfs only after cfg80211 init When fixing the initialization race, we neglected to account for the fact that debugfs is initialized in wiphy_register(), and some debugfs things went missing (or rather were rerooted to the global debugfs root). Fix this by adding debugfs entries only after wiphy_register(). This requires some changes in the rate control code since it currently adds debugfs at alloc time, which can no longer be done after the reordering. Reported-by: Jouni Malinen Reported-by: kernel test robot Reported-by: Hauke Mehrtens Reported-by: Felix Fietkau Cc: stable@vger.kernel.org Fixes: 52e04b4ce5d0 ("mac80211: fix race in ieee80211_register_hw()") Signed-off-by: Johannes Berg Acked-by: Sumit Garg Link: https://lore.kernel.org/r/20200423111344.0e00d3346f12.Iadc76a03a55093d94391fc672e996a458702875d@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlegacy/3945-rs.c | 2 +- drivers/net/wireless/intel/iwlegacy/4965-rs.c | 2 +- drivers/net/wireless/intel/iwlwifi/dvm/rs.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rc.c | 2 +- include/net/mac80211.h | 4 +++- net/mac80211/main.c | 5 +++-- net/mac80211/rate.c | 15 ++++----------- net/mac80211/rate.h | 23 +++++++++++++++++++++++ net/mac80211/rc80211_minstrel_ht.c | 19 +++++++++++++------ 10 files changed, 51 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/intel/iwlegacy/3945-rs.c b/drivers/net/wireless/intel/iwlegacy/3945-rs.c index 6209f85a71dd..0af9e997c9f6 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-rs.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-rs.c @@ -374,7 +374,7 @@ out: } static void * -il3945_rs_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) +il3945_rs_alloc(struct ieee80211_hw *hw) { return hw->priv; } diff --git a/drivers/net/wireless/intel/iwlegacy/4965-rs.c b/drivers/net/wireless/intel/iwlegacy/4965-rs.c index 7c6e2c863497..0a02d8aca320 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-rs.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-rs.c @@ -2474,7 +2474,7 @@ il4965_rs_fill_link_cmd(struct il_priv *il, struct il_lq_sta *lq_sta, } static void * -il4965_rs_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) +il4965_rs_alloc(struct ieee80211_hw *hw) { return hw->priv; } diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/rs.c b/drivers/net/wireless/intel/iwlwifi/dvm/rs.c index 226165db7dfd..dac809df7f1d 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/rs.c @@ -3019,7 +3019,7 @@ static void rs_fill_link_cmd(struct iwl_priv *priv, cpu_to_le16(priv->lib->bt_params->agg_time_limit); } -static void *rs_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) +static void *rs_alloc(struct ieee80211_hw *hw) { return hw->priv; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index c1aba2bf73cf..00e7fdbaeb7f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -3665,7 +3665,7 @@ static void rs_fill_lq_cmd(struct iwl_mvm *mvm, cpu_to_le16(iwl_mvm_coex_agg_time_limit(mvm, sta)); } -static void *rs_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) +static void *rs_alloc(struct ieee80211_hw *hw) { return hw->priv; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rc.c b/drivers/net/wireless/realtek/rtlwifi/rc.c index 0c7d74902d33..4b5ea0ec9109 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rc.c +++ b/drivers/net/wireless/realtek/rtlwifi/rc.c @@ -261,7 +261,7 @@ static void rtl_rate_update(void *ppriv, { } -static void *rtl_rate_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) +static void *rtl_rate_alloc(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); return rtlpriv; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b6b4de0e4b5e..97fec4d310ac 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6007,7 +6007,9 @@ enum rate_control_capabilities { struct rate_control_ops { unsigned long capa; const char *name; - void *(*alloc)(struct ieee80211_hw *hw, struct dentry *debugfsdir); + void *(*alloc)(struct ieee80211_hw *hw); + void (*add_debugfs)(struct ieee80211_hw *hw, void *priv, + struct dentry *debugfsdir); void (*free)(void *priv); void *(*alloc_sta)(void *priv, struct ieee80211_sta *sta, gfp_t gfp); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 0e9ad60fb2b3..6423173bb87e 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1183,8 +1183,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->tx_headroom = max_t(unsigned int , local->hw.extra_tx_headroom, IEEE80211_TX_STATUS_HEADROOM); - debugfs_hw_add(local); - /* * if the driver doesn't specify a max listen interval we * use 5 which should be a safe default @@ -1273,6 +1271,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (result < 0) goto fail_wiphy_register; + debugfs_hw_add(local); + rate_control_add_debugfs(local); + rtnl_lock(); /* add one default STA interface if supported */ diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index a1e9fc7878aa..b051f125d3af 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -214,17 +214,16 @@ static ssize_t rcname_read(struct file *file, char __user *userbuf, ref->ops->name, len); } -static const struct file_operations rcname_ops = { +const struct file_operations rcname_ops = { .read = rcname_read, .open = simple_open, .llseek = default_llseek, }; #endif -static struct rate_control_ref *rate_control_alloc(const char *name, - struct ieee80211_local *local) +static struct rate_control_ref * +rate_control_alloc(const char *name, struct ieee80211_local *local) { - struct dentry *debugfsdir = NULL; struct rate_control_ref *ref; ref = kmalloc(sizeof(struct rate_control_ref), GFP_KERNEL); @@ -234,13 +233,7 @@ static struct rate_control_ref *rate_control_alloc(const char *name, if (!ref->ops) goto free; -#ifdef CONFIG_MAC80211_DEBUGFS - debugfsdir = debugfs_create_dir("rc", local->hw.wiphy->debugfsdir); - local->debugfs.rcdir = debugfsdir; - debugfs_create_file("name", 0400, debugfsdir, ref, &rcname_ops); -#endif - - ref->priv = ref->ops->alloc(&local->hw, debugfsdir); + ref->priv = ref->ops->alloc(&local->hw); if (!ref->priv) goto free; return ref; diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 5397c6dad056..79b44d3db171 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -60,6 +60,29 @@ static inline void rate_control_add_sta_debugfs(struct sta_info *sta) #endif } +extern const struct file_operations rcname_ops; + +static inline void rate_control_add_debugfs(struct ieee80211_local *local) +{ +#ifdef CONFIG_MAC80211_DEBUGFS + struct dentry *debugfsdir; + + if (!local->rate_ctrl) + return; + + if (!local->rate_ctrl->ops->add_debugfs) + return; + + debugfsdir = debugfs_create_dir("rc", local->hw.wiphy->debugfsdir); + local->debugfs.rcdir = debugfsdir; + debugfs_create_file("name", 0400, debugfsdir, + local->rate_ctrl, &rcname_ops); + + local->rate_ctrl->ops->add_debugfs(&local->hw, local->rate_ctrl->priv, + debugfsdir); +#endif +} + void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata); /* Get a reference to the rate control algorithm. If `name' is NULL, get the diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 694a31978a04..5dc3e5bc4e64 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1635,7 +1635,7 @@ minstrel_ht_init_cck_rates(struct minstrel_priv *mp) } static void * -minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) +minstrel_ht_alloc(struct ieee80211_hw *hw) { struct minstrel_priv *mp; @@ -1673,7 +1673,17 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) mp->update_interval = HZ / 10; mp->new_avg = true; + minstrel_ht_init_cck_rates(mp); + + return mp; +} + #ifdef CONFIG_MAC80211_DEBUGFS +static void minstrel_ht_add_debugfs(struct ieee80211_hw *hw, void *priv, + struct dentry *debugfsdir) +{ + struct minstrel_priv *mp = priv; + mp->fixed_rate_idx = (u32) -1; debugfs_create_u32("fixed_rate_idx", S_IRUGO | S_IWUGO, debugfsdir, &mp->fixed_rate_idx); @@ -1681,12 +1691,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) &mp->sample_switch); debugfs_create_bool("new_avg", S_IRUGO | S_IWUSR, debugfsdir, &mp->new_avg); -#endif - - minstrel_ht_init_cck_rates(mp); - - return mp; } +#endif static void minstrel_ht_free(void *priv) @@ -1725,6 +1731,7 @@ static const struct rate_control_ops mac80211_minstrel_ht = { .alloc = minstrel_ht_alloc, .free = minstrel_ht_free, #ifdef CONFIG_MAC80211_DEBUGFS + .add_debugfs = minstrel_ht_add_debugfs, .add_sta_debugfs = minstrel_ht_add_sta_debugfs, #endif .get_expected_throughput = minstrel_ht_get_expected_throughput, -- cgit v1.2.3-71-gd317 From a33d3147945543f9ded67a052f358a75595f1ecb Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Wed, 22 Apr 2020 10:23:24 +0200 Subject: bpf: Fix reStructuredText markup The patch fixes: $ scripts/bpf_helpers_doc.py > bpf-helpers.rst $ rst2man bpf-helpers.rst > bpf-helpers.7 bpf-helpers.rst:1105: (WARNING/2) Inline strong start-string without end-string. Signed-off-by: Jakub Wilk Signed-off-by: Alexei Starovoitov Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200422082324.2030-1-jwilk@jwilk.net --- include/uapi/linux/bpf.h | 2 +- tools/include/uapi/linux/bpf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2e29a671d67e..7bbf1b65be10 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1642,7 +1642,7 @@ union bpf_attr { * ifindex, but doesn't require a map to do so. * Return * **XDP_REDIRECT** on success, or the value of the two lower bits - * of the **flags* argument on error. + * of the *flags* argument on error. * * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 2e29a671d67e..7bbf1b65be10 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1642,7 +1642,7 @@ union bpf_attr { * ifindex, but doesn't require a map to do so. * Return * **XDP_REDIRECT** on success, or the value of the two lower bits - * of the **flags* argument on error. + * of the *flags* argument on error. * * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description -- cgit v1.2.3-71-gd317 From a5bff92eaac45bdf6221badf9505c26792fdf99e Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 7 Apr 2020 15:30:02 +0200 Subject: dma-buf: Fix SET_NAME ioctl uapi The uapi is the same on 32 and 64 bit, but the number isn't. Everyone who botched this please re-read: https://www.kernel.org/doc/html/v5.4-preprc-cpu/ioctl/botching-up-ioctls.html Also, the type argument for the ioctl macros is for the type the void __user *arg pointer points at, which in this case would be the variable-sized char[] of a 0 terminated string. So this was botched in more than just the usual ways. Cc: Sumit Semwal Cc: Chenbo Feng Cc: Greg Hackmann Cc: Daniel Vetter Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Cc: minchan@kernel.org Cc: surenb@google.com Cc: jenhaochen@google.com Cc: Martin Liu Signed-off-by: Daniel Vetter Tested-by: Martin Liu Reviewed-by: Martin Liu Signed-off-by: Sumit Semwal [sumits: updated some checkpatch fixes, corrected author email] Link: https://patchwork.freedesktop.org/patch/msgid/20200407133002.3486387-1-daniel.vetter@ffwll.ch --- drivers/dma-buf/dma-buf.c | 3 ++- include/uapi/linux/dma-buf.h | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index ccc9eda1bc28..de155d41d274 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -388,7 +388,8 @@ static long dma_buf_ioctl(struct file *file, return ret; - case DMA_BUF_SET_NAME: + case DMA_BUF_SET_NAME_A: + case DMA_BUF_SET_NAME_B: return dma_buf_set_name(dmabuf, (const char __user *)arg); default: diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index dbc7092e04b5..7f30393b92c3 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -39,6 +39,12 @@ struct dma_buf_sync { #define DMA_BUF_BASE 'b' #define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync) + +/* 32/64bitness of this uapi was botched in android, there's no difference + * between them in actual uapi, they're just different numbers. + */ #define DMA_BUF_SET_NAME _IOW(DMA_BUF_BASE, 1, const char *) +#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, u32) +#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, u64) #endif -- cgit v1.2.3-71-gd317 From 0821009445a8261ac4d32a6df4b83938e007c765 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Apr 2020 10:40:12 -0700 Subject: dmaengine: fix channel index enumeration When the channel register code was changed to allow hotplug operations, dynamic indexing wasn't taken into account. When channels are randomly plugged and unplugged out of order, the serial indexing breaks. Convert channel indexing to using IDA tracking in order to allow dynamic assignment. The previous code does not cause any regression bug for existing channel allocation besides idxd driver since the hotplug usage case is only used by idxd at this point. With this change, the chan->idr_ref is also not needed any longer. We can have a device with no channels registered due to hot plug. The channel device release code no longer should attempt to free the dma device id on the last channel release. Fixes: e81274cd6b52 ("dmaengine: add support to dynamic register/unregister of channels") Reported-by: Yixin Zhang Signed-off-by: Dave Jiang Tested-by: Yixin Zhang Link: https://lore.kernel.org/r/158679961260.7674.8485924270472851852.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/dmaengine.c | 60 ++++++++++++++++++++--------------------------- include/linux/dmaengine.h | 4 ++-- 2 files changed, 28 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 4830ba658ce1..d31076d9ef25 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -232,10 +232,6 @@ static void chan_dev_release(struct device *dev) struct dma_chan_dev *chan_dev; chan_dev = container_of(dev, typeof(*chan_dev), device); - if (atomic_dec_and_test(chan_dev->idr_ref)) { - ida_free(&dma_ida, chan_dev->dev_id); - kfree(chan_dev->idr_ref); - } kfree(chan_dev); } @@ -1043,27 +1039,9 @@ static int get_dma_id(struct dma_device *device) } static int __dma_async_device_channel_register(struct dma_device *device, - struct dma_chan *chan, - int chan_id) + struct dma_chan *chan) { int rc = 0; - int chancnt = device->chancnt; - atomic_t *idr_ref; - struct dma_chan *tchan; - - tchan = list_first_entry_or_null(&device->channels, - struct dma_chan, device_node); - if (!tchan) - return -ENODEV; - - if (tchan->dev) { - idr_ref = tchan->dev->idr_ref; - } else { - idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL); - if (!idr_ref) - return -ENOMEM; - atomic_set(idr_ref, 0); - } chan->local = alloc_percpu(typeof(*chan->local)); if (!chan->local) @@ -1079,29 +1057,36 @@ static int __dma_async_device_channel_register(struct dma_device *device, * When the chan_id is a negative value, we are dynamically adding * the channel. Otherwise we are static enumerating. */ - chan->chan_id = chan_id < 0 ? chancnt : chan_id; + mutex_lock(&device->chan_mutex); + chan->chan_id = ida_alloc(&device->chan_ida, GFP_KERNEL); + mutex_unlock(&device->chan_mutex); + if (chan->chan_id < 0) { + pr_err("%s: unable to alloc ida for chan: %d\n", + __func__, chan->chan_id); + goto err_out; + } + chan->dev->device.class = &dma_devclass; chan->dev->device.parent = device->dev; chan->dev->chan = chan; - chan->dev->idr_ref = idr_ref; chan->dev->dev_id = device->dev_id; - atomic_inc(idr_ref); dev_set_name(&chan->dev->device, "dma%dchan%d", device->dev_id, chan->chan_id); - rc = device_register(&chan->dev->device); if (rc) - goto err_out; + goto err_out_ida; chan->client_count = 0; - device->chancnt = chan->chan_id + 1; + device->chancnt++; return 0; + err_out_ida: + mutex_lock(&device->chan_mutex); + ida_free(&device->chan_ida, chan->chan_id); + mutex_unlock(&device->chan_mutex); err_out: free_percpu(chan->local); kfree(chan->dev); - if (atomic_dec_return(idr_ref) == 0) - kfree(idr_ref); return rc; } @@ -1110,7 +1095,7 @@ int dma_async_device_channel_register(struct dma_device *device, { int rc; - rc = __dma_async_device_channel_register(device, chan, -1); + rc = __dma_async_device_channel_register(device, chan); if (rc < 0) return rc; @@ -1130,6 +1115,9 @@ static void __dma_async_device_channel_unregister(struct dma_device *device, device->chancnt--; chan->dev->chan = NULL; mutex_unlock(&dma_list_mutex); + mutex_lock(&device->chan_mutex); + ida_free(&device->chan_ida, chan->chan_id); + mutex_unlock(&device->chan_mutex); device_unregister(&chan->dev->device); free_percpu(chan->local); } @@ -1152,7 +1140,7 @@ EXPORT_SYMBOL_GPL(dma_async_device_channel_unregister); */ int dma_async_device_register(struct dma_device *device) { - int rc, i = 0; + int rc; struct dma_chan* chan; if (!device) @@ -1257,9 +1245,12 @@ int dma_async_device_register(struct dma_device *device) if (rc != 0) return rc; + mutex_init(&device->chan_mutex); + ida_init(&device->chan_ida); + /* represent channels in sysfs. Probably want devs too */ list_for_each_entry(chan, &device->channels, device_node) { - rc = __dma_async_device_channel_register(device, chan, i++); + rc = __dma_async_device_channel_register(device, chan); if (rc < 0) goto err_out; } @@ -1334,6 +1325,7 @@ void dma_async_device_unregister(struct dma_device *device) */ dma_cap_set(DMA_PRIVATE, device->cap_mask); dma_channel_rebalance(); + ida_free(&dma_ida, device->dev_id); dma_device_put(device); mutex_unlock(&dma_list_mutex); } diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 31e58ec9f741..e1c03339918f 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -341,13 +341,11 @@ struct dma_chan { * @chan: driver channel device * @device: sysfs device * @dev_id: parent dma_device dev_id - * @idr_ref: reference count to gate release of dma_device dev_id */ struct dma_chan_dev { struct dma_chan *chan; struct device device; int dev_id; - atomic_t *idr_ref; }; /** @@ -835,6 +833,8 @@ struct dma_device { int dev_id; struct device *dev; struct module *owner; + struct ida chan_ida; + struct mutex chan_mutex; /* to protect chan_ida */ u32 src_addr_widths; u32 dst_addr_widths; -- cgit v1.2.3-71-gd317 From a78d163978567adc2733465289293dad479d842a Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 24 Apr 2020 17:08:30 +0200 Subject: vsock/virtio: fix multiple packet delivery to monitoring devices In virtio_transport.c, if the virtqueue is full, the transmitting packet is queued up and it will be sent in the next iteration. This causes the same packet to be delivered multiple times to monitoring devices. We want to continue to deliver packets to monitoring devices before it is put in the virtqueue, to avoid that replies can appear in the packet capture before the transmitted packet. This patch fixes the issue, adding a new flag (tap_delivered) in struct virtio_vsock_pkt, to check if the packet is already delivered to monitoring devices. In vhost/vsock.c, we are splitting packets, so we must set 'tap_delivered' to false when we queue up the same virtio_vsock_pkt to handle the remaining bytes. Signed-off-by: Stefano Garzarella Signed-off-by: David S. Miller --- drivers/vhost/vsock.c | 6 ++++++ include/linux/virtio_vsock.h | 1 + net/vmw_vsock/virtio_transport_common.c | 4 ++++ 3 files changed, 11 insertions(+) (limited to 'include') diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 4f50dcb89ac8..31a98c74f678 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -196,6 +196,12 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, * to send it with the next available buffer. */ if (pkt->off < pkt->len) { + /* We are queueing the same virtio_vsock_pkt to handle + * the remaining bytes, and we want to deliver it + * to monitoring devices in the next iteration. + */ + pkt->tap_delivered = false; + spin_lock_bh(&vsock->send_pkt_list_lock); list_add(&pkt->list, &vsock->send_pkt_list); spin_unlock_bh(&vsock->send_pkt_list_lock); diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 71c81e0dc8f2..dc636b727179 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -48,6 +48,7 @@ struct virtio_vsock_pkt { u32 len; u32 off; bool reply; + bool tap_delivered; }; struct virtio_vsock_pkt_info { diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 709038a4783e..69efc891885f 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -157,7 +157,11 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt) { + if (pkt->tap_delivered) + return; + vsock_deliver_tap(virtio_transport_build_skb, pkt); + pkt->tap_delivered = true; } EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); -- cgit v1.2.3-71-gd317 From 9495b7e92f716ab2bd6814fab5e97ab4a39adfdd Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 22 Apr 2020 12:09:54 +0200 Subject: driver core: platform: Initialize dma_parms for platform devices It's currently the platform driver's responsibility to initialize the pointer, dma_parms, for its corresponding struct device. The benefit with this approach allows us to avoid the initialization and to not waste memory for the struct device_dma_parameters, as this can be decided on a case by case basis. However, it has turned out that this approach is not very practical. Not only does it lead to open coding, but also to real errors. In principle callers of dma_set_max_seg_size() doesn't check the error code, but just assumes it succeeds. For these reasons, let's do the initialization from the common platform bus at the device registration point. This also follows the way the PCI devices are being managed, see pci_device_add(). Suggested-by: Christoph Hellwig Cc: Tested-by: Haibo Chen Reviewed-by: Arnd Bergmann Signed-off-by: Ulf Hansson Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20200422100954.31211-1-ulf.hansson@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 2 ++ include/linux/platform_device.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 5255550b7c34..b27d0f6c18c9 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -380,6 +380,8 @@ struct platform_object { */ static void setup_pdev_dma_masks(struct platform_device *pdev) { + pdev->dev.dma_parms = &pdev->dma_parms; + if (!pdev->dev.coherent_dma_mask) pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32); if (!pdev->dev.dma_mask) { diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index bdc35753ef7c..77a2aada106d 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -25,6 +25,7 @@ struct platform_device { bool id_auto; struct device dev; u64 platform_dma_mask; + struct device_dma_parameters dma_parms; u32 num_resources; struct resource *resource; -- cgit v1.2.3-71-gd317 From f458488425f1cc9a396aa1d09bb00c48783936da Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 22 Apr 2020 12:10:13 +0200 Subject: amba: Initialize dma_parms for amba devices It's currently the amba driver's responsibility to initialize the pointer, dma_parms, for its corresponding struct device. The benefit with this approach allows us to avoid the initialization and to not waste memory for the struct device_dma_parameters, as this can be decided on a case by case basis. However, it has turned out that this approach is not very practical. Not only does it lead to open coding, but also to real errors. In principle callers of dma_set_max_seg_size() doesn't check the error code, but just assumes it succeeds. For these reasons, let's do the initialization from the common amba bus at the device registration point. This also follows the way the PCI devices are being managed, see pci_device_add(). Suggested-by: Christoph Hellwig Cc: Russell King Cc: Tested-by: Haibo Chen Reviewed-by: Arnd Bergmann Signed-off-by: Ulf Hansson Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20200422101013.31267-1-ulf.hansson@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/amba/bus.c | 1 + include/linux/amba/bus.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index fe1523664816..8558b629880b 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -645,6 +645,7 @@ static void amba_device_initialize(struct amba_device *dev, const char *name) dev->dev.release = amba_device_release; dev->dev.bus = &amba_bustype; dev->dev.dma_mask = &dev->dev.coherent_dma_mask; + dev->dev.dma_parms = &dev->dma_parms; dev->res.name = dev_name(&dev->dev); } diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 26f0ecf401ea..0bbfd647f5c6 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -65,6 +65,7 @@ struct amba_device { struct device dev; struct resource res; struct clk *pclk; + struct device_dma_parameters dma_parms; unsigned int periphid; unsigned int cid; struct amba_cs_uci_id uci; -- cgit v1.2.3-71-gd317 From c938628c4478bd193bf608cf1e91a3390950a15d Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 17 Apr 2020 20:50:30 -0400 Subject: drm/amdgpu: add tiling flags from Mesa MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DCC_INDEPENDENT_128B is needed for displayble DCC on gfx10. SCANOUT is not needed by the kernel, but Mesa uses it. Signed-off-by: Marek Olšák Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 65f69723cbdc..d28b4ce744d5 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -346,6 +346,10 @@ struct drm_amdgpu_gem_userptr { #define AMDGPU_TILING_DCC_PITCH_MAX_MASK 0x3FFF #define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT 43 #define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK 0x1 +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT 44 +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x1 +#define AMDGPU_TILING_SCANOUT_SHIFT 63 +#define AMDGPU_TILING_SCANOUT_MASK 0x1 /* Set/Get helpers for tiling flags. */ #define AMDGPU_TILING_SET(field, value) \ -- cgit v1.2.3-71-gd317 From 7c4310ff56422ea43418305d22bbc5fe19150ec4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 3 Apr 2020 14:33:41 +1100 Subject: SUNRPC: defer slow parts of rpc_free_client() to a workqueue. The rpciod workqueue is on the write-out path for freeing dirty memory, so it is important that it never block waiting for memory to be allocated - this can lead to a deadlock. rpc_execute() - which is often called by an rpciod work item - calls rcp_task_release_client() which can lead to rpc_free_client(). rpc_free_client() makes two calls which could potentially block wating for memory allocation. rpc_clnt_debugfs_unregister() calls into debugfs and will block while any of the debugfs files are being accessed. In particular it can block while any of the 'open' methods are being called and all of these use malloc for one thing or another. So this can deadlock if the memory allocation waits for NFS to complete some writes via rpciod. rpc_clnt_remove_pipedir() can take the inode_lock() and while it isn't obvious that memory allocations can happen while the lock it held, it is safer to assume they might and to not let rpciod call rpc_clnt_remove_pipedir(). So this patch moves these two calls (together with the final kfree() and rpciod_down()) into a work-item to be run from the system work-queue. rpciod can continue its important work, and the final stages of the free can happen whenever they happen. I have seen this deadlock on a 4.12 based kernel where debugfs used synchronize_srcu() when removing objects. synchronize_srcu() requires a workqueue and there were no free workther threads and none could be allocated. While debugsfs no longer uses SRCU, I believe the deadlock is still possible. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 8 +++++++- net/sunrpc/clnt.c | 21 +++++++++++++++++---- 2 files changed, 24 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ca7e108248e2..7bd124e06b36 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -71,7 +71,13 @@ struct rpc_clnt { #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct dentry *cl_debugfs; /* debugfs directory */ #endif - struct rpc_xprt_iter cl_xpi; + /* cl_work is only needed after cl_xpi is no longer used, + * and that are of similar size + */ + union { + struct rpc_xprt_iter cl_xpi; + struct work_struct cl_work; + }; const struct cred *cl_cred; }; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 05404bb05607..8350d3a2e9a7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -880,6 +880,20 @@ EXPORT_SYMBOL_GPL(rpc_shutdown_client); /* * Free an RPC client */ +static void rpc_free_client_work(struct work_struct *work) +{ + struct rpc_clnt *clnt = container_of(work, struct rpc_clnt, cl_work); + + /* These might block on processes that might allocate memory, + * so they cannot be called in rpciod, so they are handled separately + * here. + */ + rpc_clnt_debugfs_unregister(clnt); + rpc_clnt_remove_pipedir(clnt); + + kfree(clnt); + rpciod_down(); +} static struct rpc_clnt * rpc_free_client(struct rpc_clnt *clnt) { @@ -890,17 +904,16 @@ rpc_free_client(struct rpc_clnt *clnt) rcu_dereference(clnt->cl_xprt)->servername); if (clnt->cl_parent != clnt) parent = clnt->cl_parent; - rpc_clnt_debugfs_unregister(clnt); - rpc_clnt_remove_pipedir(clnt); rpc_unregister_client(clnt); rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; xprt_put(rcu_dereference_raw(clnt->cl_xprt)); xprt_iter_destroy(&clnt->cl_xpi); - rpciod_down(); put_cred(clnt->cl_cred); rpc_free_clid(clnt); - kfree(clnt); + + INIT_WORK(&clnt->cl_work, rpc_free_client_work); + schedule_work(&clnt->cl_work); return parent; } -- cgit v1.2.3-71-gd317 From dff58530c4ca8ce7ee5a74db431c6e35362cf682 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 24 Apr 2020 17:45:50 -0400 Subject: NFSv4.1: fix handling of backchannel binding in BIND_CONN_TO_SESSION Currently, if the client sends BIND_CONN_TO_SESSION with NFS4_CDFC4_FORE_OR_BOTH but only gets NFS4_CDFS4_FORE back it ignores that it wasn't able to enable a backchannel. To make sure, the client sends BIND_CONN_TO_SESSION as the first operation on the connections (ie., no other session compounds haven't been sent before), and if the client's request to bind the backchannel is not satisfied, then reset the connection and retry. Cc: stable@vger.kernel.org Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 ++++++++ include/linux/nfs_xdr.h | 2 ++ include/linux/sunrpc/clnt.h | 5 +++++ 3 files changed, 15 insertions(+) (limited to 'include') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1c710a7834c2..a0c1e653a935 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7891,6 +7891,7 @@ static void nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) { struct nfs41_bind_conn_to_session_args *args = task->tk_msg.rpc_argp; + struct nfs41_bind_conn_to_session_res *res = task->tk_msg.rpc_resp; struct nfs_client *clp = args->client; switch (task->tk_status) { @@ -7899,6 +7900,12 @@ nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); } + if (args->dir == NFS4_CDFC4_FORE_OR_BOTH && + res->dir != NFS4_CDFS4_BOTH) { + rpc_task_close_connection(task); + if (args->retries++ < MAX_BIND_CONN_TO_SESSION_RETRIES) + rpc_restart_call(task); + } } static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = { @@ -7921,6 +7928,7 @@ int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt, struct nfs41_bind_conn_to_session_args args = { .client = clp, .dir = NFS4_CDFC4_FORE_OR_BOTH, + .retries = 0, }; struct nfs41_bind_conn_to_session_res res; struct rpc_message msg = { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 440230488025..e5f3e7d8d3d5 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1317,11 +1317,13 @@ struct nfs41_impl_id { struct nfstime4 date; }; +#define MAX_BIND_CONN_TO_SESSION_RETRIES 3 struct nfs41_bind_conn_to_session_args { struct nfs_client *client; struct nfs4_sessionid sessionid; u32 dir; bool use_conn_in_rdma_mode; + int retries; }; struct nfs41_bind_conn_to_session_res { diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 7bd124e06b36..02e7a5863d28 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -242,4 +242,9 @@ static inline int rpc_reply_expected(struct rpc_task *task) (task->tk_msg.rpc_proc->p_decode != NULL); } +static inline void rpc_task_close_connection(struct rpc_task *task) +{ + if (task->tk_xprt) + xprt_force_disconnect(task->tk_xprt); +} #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3-71-gd317 From b31d1d2b1c3a8452f425b09ebd374ecd3ddd5179 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 27 Apr 2020 15:59:02 -0700 Subject: platform/chrome: cros_ec_sensorhub: Allocate sensorhub resource before claiming sensors Allocate callbacks array before enumerating the sensors: The probe routine for these sensors (for instance cros_ec_sensors_probe) can be called within the sensorhub probe routine (cros_ec_sensors_probe()) Fixes: 145d59baff594 ("platform/chrome: cros_ec_sensorhub: Add FIFO support") Signed-off-by: Gwendal Grignou Reported-by: Douglas Anderson Tested-by: Douglas Anderson Signed-off-by: Enric Balletbo i Serra --- drivers/platform/chrome/cros_ec_sensorhub.c | 80 ++++++++++++++---------- drivers/platform/chrome/cros_ec_sensorhub_ring.c | 73 +++++++++++++-------- include/linux/platform_data/cros_ec_sensorhub.h | 1 + 3 files changed, 93 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/drivers/platform/chrome/cros_ec_sensorhub.c b/drivers/platform/chrome/cros_ec_sensorhub.c index b7f2c00db5e1..9c4af76a9956 100644 --- a/drivers/platform/chrome/cros_ec_sensorhub.c +++ b/drivers/platform/chrome/cros_ec_sensorhub.c @@ -52,28 +52,15 @@ static int cros_ec_sensorhub_register(struct device *dev, int sensor_type[MOTIONSENSE_TYPE_MAX] = { 0 }; struct cros_ec_command *msg = sensorhub->msg; struct cros_ec_dev *ec = sensorhub->ec; - int ret, i, sensor_num; + int ret, i; char *name; - sensor_num = cros_ec_get_sensor_count(ec); - if (sensor_num < 0) { - dev_err(dev, - "Unable to retrieve sensor information (err:%d)\n", - sensor_num); - return sensor_num; - } - - sensorhub->sensor_num = sensor_num; - if (sensor_num == 0) { - dev_err(dev, "Zero sensors reported.\n"); - return -EINVAL; - } msg->version = 1; msg->insize = sizeof(struct ec_response_motion_sense); msg->outsize = sizeof(struct ec_params_motion_sense); - for (i = 0; i < sensor_num; i++) { + for (i = 0; i < sensorhub->sensor_num; i++) { sensorhub->params->cmd = MOTIONSENSE_CMD_INFO; sensorhub->params->info.sensor_num = i; @@ -140,8 +127,7 @@ static int cros_ec_sensorhub_probe(struct platform_device *pdev) struct cros_ec_dev *ec = dev_get_drvdata(dev->parent); struct cros_ec_sensorhub *data; struct cros_ec_command *msg; - int ret; - int i; + int ret, i, sensor_num; msg = devm_kzalloc(dev, sizeof(struct cros_ec_command) + max((u16)sizeof(struct ec_params_motion_sense), @@ -166,10 +152,52 @@ static int cros_ec_sensorhub_probe(struct platform_device *pdev) dev_set_drvdata(dev, data); /* Check whether this EC is a sensor hub. */ - if (cros_ec_check_features(data->ec, EC_FEATURE_MOTION_SENSE)) { + if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE)) { + sensor_num = cros_ec_get_sensor_count(ec); + if (sensor_num < 0) { + dev_err(dev, + "Unable to retrieve sensor information (err:%d)\n", + sensor_num); + return sensor_num; + } + if (sensor_num == 0) { + dev_err(dev, "Zero sensors reported.\n"); + return -EINVAL; + } + data->sensor_num = sensor_num; + + /* + * Prepare the ring handler before enumering the + * sensors. + */ + if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE_FIFO)) { + ret = cros_ec_sensorhub_ring_allocate(data); + if (ret) + return ret; + } + + /* Enumerate the sensors.*/ ret = cros_ec_sensorhub_register(dev, data); if (ret) return ret; + + /* + * When the EC does not have a FIFO, the sensors will query + * their data themselves via sysfs or a software trigger. + */ + if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE_FIFO)) { + ret = cros_ec_sensorhub_ring_add(data); + if (ret) + return ret; + /* + * The msg and its data is not under the control of the + * ring handler. + */ + return devm_add_action_or_reset(dev, + cros_ec_sensorhub_ring_remove, + data); + } + } else { /* * If the device has sensors but does not claim to @@ -184,22 +212,6 @@ static int cros_ec_sensorhub_probe(struct platform_device *pdev) } } - /* - * If the EC does not have a FIFO, the sensors will query their data - * themselves via sysfs or a software trigger. - */ - if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE_FIFO)) { - ret = cros_ec_sensorhub_ring_add(data); - if (ret) - return ret; - /* - * The msg and its data is not under the control of the ring - * handler. - */ - return devm_add_action_or_reset(dev, - cros_ec_sensorhub_ring_remove, - data); - } return 0; } diff --git a/drivers/platform/chrome/cros_ec_sensorhub_ring.c b/drivers/platform/chrome/cros_ec_sensorhub_ring.c index c48e5b38a441..24e48d96ed76 100644 --- a/drivers/platform/chrome/cros_ec_sensorhub_ring.c +++ b/drivers/platform/chrome/cros_ec_sensorhub_ring.c @@ -957,17 +957,15 @@ static int cros_ec_sensorhub_event(struct notifier_block *nb, } /** - * cros_ec_sensorhub_ring_add() - Add the FIFO functionality if the EC - * supports it. + * cros_ec_sensorhub_ring_allocate() - Prepare the FIFO functionality if the EC + * supports it. * * @sensorhub : Sensor Hub object. * * Return: 0 on success. */ -int cros_ec_sensorhub_ring_add(struct cros_ec_sensorhub *sensorhub) +int cros_ec_sensorhub_ring_allocate(struct cros_ec_sensorhub *sensorhub) { - struct cros_ec_dev *ec = sensorhub->ec; - int ret; int fifo_info_length = sizeof(struct ec_response_motion_sense_fifo_info) + sizeof(u16) * sensorhub->sensor_num; @@ -978,6 +976,49 @@ int cros_ec_sensorhub_ring_add(struct cros_ec_sensorhub *sensorhub) if (!sensorhub->fifo_info) return -ENOMEM; + /* + * Allocate the callback area based on the number of sensors. + * Add one for the sensor ring. + */ + sensorhub->push_data = devm_kcalloc(sensorhub->dev, + sensorhub->sensor_num, + sizeof(*sensorhub->push_data), + GFP_KERNEL); + if (!sensorhub->push_data) + return -ENOMEM; + + sensorhub->tight_timestamps = cros_ec_check_features( + sensorhub->ec, + EC_FEATURE_MOTION_SENSE_TIGHT_TIMESTAMPS); + + if (sensorhub->tight_timestamps) { + sensorhub->batch_state = devm_kcalloc(sensorhub->dev, + sensorhub->sensor_num, + sizeof(*sensorhub->batch_state), + GFP_KERNEL); + if (!sensorhub->batch_state) + return -ENOMEM; + } + + return 0; +} + +/** + * cros_ec_sensorhub_ring_add() - Add the FIFO functionality if the EC + * supports it. + * + * @sensorhub : Sensor Hub object. + * + * Return: 0 on success. + */ +int cros_ec_sensorhub_ring_add(struct cros_ec_sensorhub *sensorhub) +{ + struct cros_ec_dev *ec = sensorhub->ec; + int ret; + int fifo_info_length = + sizeof(struct ec_response_motion_sense_fifo_info) + + sizeof(u16) * sensorhub->sensor_num; + /* Retrieve FIFO information */ sensorhub->msg->version = 2; sensorhub->params->cmd = MOTIONSENSE_CMD_FIFO_INFO; @@ -998,31 +1039,9 @@ int cros_ec_sensorhub_ring_add(struct cros_ec_sensorhub *sensorhub) if (!sensorhub->ring) return -ENOMEM; - /* - * Allocate the callback area based on the number of sensors. - */ - sensorhub->push_data = devm_kcalloc( - sensorhub->dev, sensorhub->sensor_num, - sizeof(*sensorhub->push_data), - GFP_KERNEL); - if (!sensorhub->push_data) - return -ENOMEM; - sensorhub->fifo_timestamp[CROS_EC_SENSOR_LAST_TS] = cros_ec_get_time_ns(); - sensorhub->tight_timestamps = cros_ec_check_features( - ec, EC_FEATURE_MOTION_SENSE_TIGHT_TIMESTAMPS); - - if (sensorhub->tight_timestamps) { - sensorhub->batch_state = devm_kcalloc(sensorhub->dev, - sensorhub->sensor_num, - sizeof(*sensorhub->batch_state), - GFP_KERNEL); - if (!sensorhub->batch_state) - return -ENOMEM; - } - /* Register the notifier that will act as a top half interrupt. */ sensorhub->notifier.notifier_call = cros_ec_sensorhub_event; ret = blocking_notifier_chain_register(&ec->ec_dev->event_notifier, diff --git a/include/linux/platform_data/cros_ec_sensorhub.h b/include/linux/platform_data/cros_ec_sensorhub.h index c588be843f61..0ecce6aa69d5 100644 --- a/include/linux/platform_data/cros_ec_sensorhub.h +++ b/include/linux/platform_data/cros_ec_sensorhub.h @@ -185,6 +185,7 @@ int cros_ec_sensorhub_register_push_data(struct cros_ec_sensorhub *sensorhub, void cros_ec_sensorhub_unregister_push_data(struct cros_ec_sensorhub *sensorhub, u8 sensor_num); +int cros_ec_sensorhub_ring_allocate(struct cros_ec_sensorhub *sensorhub); int cros_ec_sensorhub_ring_add(struct cros_ec_sensorhub *sensorhub); void cros_ec_sensorhub_ring_remove(void *arg); int cros_ec_sensorhub_ring_fifo_enable(struct cros_ec_sensorhub *sensorhub, -- cgit v1.2.3-71-gd317 From 6f49c2515e2258f08f2b905c9772dbf729610415 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 7 Apr 2020 21:20:34 -0700 Subject: dma-buf: fix documentation build warnings Fix documentation warnings in dma-buf.[hc]: ../drivers/dma-buf/dma-buf.c:678: warning: Function parameter or member 'importer_ops' not described in 'dma_buf_dynamic_attach' ../drivers/dma-buf/dma-buf.c:678: warning: Function parameter or member 'importer_priv' not described in 'dma_buf_dynamic_attach' ../include/linux/dma-buf.h:339: warning: Incorrect use of kernel-doc format: * @move_notify Signed-off-by: Randy Dunlap Cc: Sumit Semwal Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/7bcbe6fe-0b4b-87da-d003-b68a26eb4cf0@infradead.org --- drivers/dma-buf/dma-buf.c | 4 ++-- include/linux/dma-buf.h | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index de155d41d274..07df88f2e305 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -656,8 +656,8 @@ EXPORT_SYMBOL_GPL(dma_buf_put); * calls attach() of dma_buf_ops to allow device-specific attach functionality * @dmabuf: [in] buffer to attach device to. * @dev: [in] device to be attached. - * @importer_ops [in] importer operations for the attachment - * @importer_priv [in] importer private pointer for the attachment + * @importer_ops: [in] importer operations for the attachment + * @importer_priv: [in] importer private pointer for the attachment * * Returns struct dma_buf_attachment pointer for this attachment. Attachments * must be cleaned up by calling dma_buf_detach(). diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 1ade486fc2bb..57bcef6f988a 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -329,13 +329,12 @@ struct dma_buf { /** * struct dma_buf_attach_ops - importer operations for an attachment - * @move_notify: [optional] notification that the DMA-buf is moving * * Attachment operations implemented by the importer. */ struct dma_buf_attach_ops { /** - * @move_notify + * @move_notify: [optional] notification that the DMA-buf is moving * * If this callback is provided the framework can avoid pinning the * backing store while mappings exists. -- cgit v1.2.3-71-gd317 From 263e1201a2c324b60b15ecda5de9ebf1e7293e31 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 30 Apr 2020 15:01:51 +0200 Subject: mptcp: consolidate synack processing. Currently the MPTCP code uses 2 hooks to process syn-ack packets, mptcp_rcv_synsent() and the sk_rx_dst_set() callback. We can drop the first, moving the relevant code into the latter, reducing the hooking into the TCP code. This is also needed by the next patch. v1 -> v2: - use local tcp sock ptr instead of casting the sk variable several times - DaveM Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/mptcp.h | 1 - net/ipv4/tcp_input.c | 3 --- net/mptcp/options.c | 22 ---------------------- net/mptcp/subflow.c | 27 ++++++++++++++++++++++++--- 4 files changed, 24 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 0e7c5471010b..4ecfa7d5e0c7 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -72,7 +72,6 @@ void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr, int opsize, struct tcp_options_received *opt_rx); bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, unsigned int *size, struct mptcp_out_options *opts); -void mptcp_rcv_synsent(struct sock *sk); bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, struct mptcp_out_options *opts); bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bf4ced9273e8..81425542da44 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5990,9 +5990,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_initialize_rcv_mss(sk); - if (sk_is_mptcp(sk)) - mptcp_rcv_synsent(sk); - /* Remember, tcp_poll() does not lock socket! * Change state from SYN-SENT only after copied_seq * is initialized. */ diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 4a7c467b99db..8fea686a5562 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -344,28 +344,6 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, return false; } -void mptcp_rcv_synsent(struct sock *sk) -{ - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - struct tcp_sock *tp = tcp_sk(sk); - - if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) { - subflow->mp_capable = 1; - subflow->can_ack = 1; - subflow->remote_key = tp->rx_opt.mptcp.sndr_key; - pr_debug("subflow=%p, remote_key=%llu", subflow, - subflow->remote_key); - } else if (subflow->request_join && tp->rx_opt.mptcp.mp_join) { - subflow->mp_join = 1; - subflow->thmac = tp->rx_opt.mptcp.thmac; - subflow->remote_nonce = tp->rx_opt.mptcp.nonce; - pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, - subflow->thmac, subflow->remote_nonce); - } else if (subflow->request_mptcp) { - tcp_sk(sk)->is_mptcp = 0; - } -} - /* MP_JOIN client subflow must wait for 4th ack before sending any data: * TCP can't schedule delack timer before the subflow is fully established. * MPTCP uses the delack timer to do 3rd ack retransmissions diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 71256f03707f..84f6408594c9 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -222,6 +222,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct sock *parent = subflow->conn; + struct tcp_sock *tp = tcp_sk(sk); subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); @@ -230,14 +231,35 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) parent->sk_state_change(parent); } - if (subflow->conn_finished || !tcp_sk(sk)->is_mptcp) + /* be sure no special action on any packet other than syn-ack */ + if (subflow->conn_finished) + return; + + subflow->conn_finished = 1; + + if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) { + subflow->mp_capable = 1; + subflow->can_ack = 1; + subflow->remote_key = tp->rx_opt.mptcp.sndr_key; + pr_debug("subflow=%p, remote_key=%llu", subflow, + subflow->remote_key); + } else if (subflow->request_join && tp->rx_opt.mptcp.mp_join) { + subflow->mp_join = 1; + subflow->thmac = tp->rx_opt.mptcp.thmac; + subflow->remote_nonce = tp->rx_opt.mptcp.nonce; + pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, + subflow->thmac, subflow->remote_nonce); + } else if (subflow->request_mptcp) { + tp->is_mptcp = 0; + } + + if (!tp->is_mptcp) return; if (subflow->mp_capable) { pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk), subflow->remote_key); mptcp_finish_connect(sk); - subflow->conn_finished = 1; if (skb) { pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq); @@ -264,7 +286,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) if (!mptcp_finish_join(sk)) goto do_reset; - subflow->conn_finished = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); } else { do_reset: -- cgit v1.2.3-71-gd317 From cfde141ea3faa30e362bbdb5c28001bbbdb0b8e0 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 30 Apr 2020 15:01:52 +0200 Subject: mptcp: move option parsing into mptcp_incoming_options() The mptcp_options_received structure carries several per packet flags (mp_capable, mp_join, etc.). Such fields must be cleared on each packet, even on dropped ones or packet not carrying any MPTCP options, but the current mptcp code clears them only on TCP option reset. On several races/corner cases we end-up with stray bits in incoming options, leading to WARN_ON splats. e.g.: [ 171.164906] Bad mapping: ssn=32714 map_seq=1 map_data_len=32713 [ 171.165006] WARNING: CPU: 1 PID: 5026 at net/mptcp/subflow.c:533 warn_bad_map (linux-mptcp/net/mptcp/subflow.c:533 linux-mptcp/net/mptcp/subflow.c:531) [ 171.167632] Modules linked in: ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel geneve ip6_udp_tunnel udp_tunnel macsec macvtap tap ipvlan macvlan 8021q garp mrp xfrm_interface veth netdevsim nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun binfmt_misc intel_rapl_msr intel_rapl_common rfkill kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel joydev virtio_balloon pcspkr i2c_piix4 sunrpc ip_tables xfs libcrc32c crc32c_intel serio_raw virtio_console ata_generic virtio_blk virtio_net net_failover failover ata_piix libata [ 171.199464] CPU: 1 PID: 5026 Comm: repro Not tainted 5.7.0-rc1.mptcp_f227fdf5d388+ #95 [ 171.200886] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-2.fc30 04/01/2014 [ 171.202546] RIP: 0010:warn_bad_map (linux-mptcp/net/mptcp/subflow.c:533 linux-mptcp/net/mptcp/subflow.c:531) [ 171.206537] Code: c1 ea 03 0f b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 04 84 d2 75 1d 8b 55 3c 44 89 e6 48 c7 c7 20 51 13 95 e8 37 8b 22 fe <0f> 0b 48 83 c4 08 5b 5d 41 5c c3 89 4c 24 04 e8 db d6 94 fe 8b 4c [ 171.220473] RSP: 0018:ffffc90000150560 EFLAGS: 00010282 [ 171.221639] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 171.223108] RDX: 0000000000000000 RSI: 0000000000000008 RDI: fffff5200002a09e [ 171.224388] RBP: ffff8880aa6e3c00 R08: 0000000000000001 R09: fffffbfff2ec9955 [ 171.225706] R10: ffffffff9764caa7 R11: fffffbfff2ec9954 R12: 0000000000007fca [ 171.227211] R13: ffff8881066f4a7f R14: ffff8880aa6e3c00 R15: 0000000000000020 [ 171.228460] FS: 00007f8623719740(0000) GS:ffff88810be00000(0000) knlGS:0000000000000000 [ 171.230065] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 171.231303] CR2: 00007ffdab190a50 CR3: 00000001038ea006 CR4: 0000000000160ee0 [ 171.232586] Call Trace: [ 171.233109] [ 171.233531] get_mapping_status (linux-mptcp/net/mptcp/subflow.c:691) [ 171.234371] mptcp_subflow_data_available (linux-mptcp/net/mptcp/subflow.c:736 linux-mptcp/net/mptcp/subflow.c:832) [ 171.238181] subflow_state_change (linux-mptcp/net/mptcp/subflow.c:1085 (discriminator 1)) [ 171.239066] tcp_fin (linux-mptcp/net/ipv4/tcp_input.c:4217) [ 171.240123] tcp_data_queue (linux-mptcp/./include/linux/compiler.h:199 linux-mptcp/net/ipv4/tcp_input.c:4822) [ 171.245083] tcp_rcv_established (linux-mptcp/./include/linux/skbuff.h:1785 linux-mptcp/./include/net/tcp.h:1774 linux-mptcp/./include/net/tcp.h:1847 linux-mptcp/net/ipv4/tcp_input.c:5238 linux-mptcp/net/ipv4/tcp_input.c:5730) [ 171.254089] tcp_v4_rcv (linux-mptcp/./include/linux/spinlock.h:393 linux-mptcp/net/ipv4/tcp_ipv4.c:2009) [ 171.258969] ip_protocol_deliver_rcu (linux-mptcp/net/ipv4/ip_input.c:204 (discriminator 1)) [ 171.260214] ip_local_deliver_finish (linux-mptcp/./include/linux/rcupdate.h:651 linux-mptcp/net/ipv4/ip_input.c:232) [ 171.261389] ip_local_deliver (linux-mptcp/./include/linux/netfilter.h:307 linux-mptcp/./include/linux/netfilter.h:301 linux-mptcp/net/ipv4/ip_input.c:252) [ 171.265884] ip_rcv (linux-mptcp/./include/linux/netfilter.h:307 linux-mptcp/./include/linux/netfilter.h:301 linux-mptcp/net/ipv4/ip_input.c:539) [ 171.273666] process_backlog (linux-mptcp/./include/linux/rcupdate.h:651 linux-mptcp/net/core/dev.c:6135) [ 171.275328] net_rx_action (linux-mptcp/net/core/dev.c:6572 linux-mptcp/net/core/dev.c:6640) [ 171.280472] __do_softirq (linux-mptcp/./arch/x86/include/asm/jump_label.h:25 linux-mptcp/./include/linux/jump_label.h:200 linux-mptcp/./include/trace/events/irq.h:142 linux-mptcp/kernel/softirq.c:293) [ 171.281379] do_softirq_own_stack (linux-mptcp/arch/x86/entry/entry_64.S:1083) [ 171.282358] We could address the issue clearing explicitly the relevant fields in several places - tcp_parse_option, tcp_fast_parse_options, possibly others. Instead we move the MPTCP option parsing into the already existing mptcp ingress hook, so that we need to clear the fields in a single place. This allows us dropping an MPTCP hook from the TCP code and removing the quite large mptcp_options_received from the tcp_sock struct. On the flip side, the MPTCP sockets will traverse the option space twice (in tcp_parse_option() and in mptcp_incoming_options(). That looks acceptable: we already do that for syn and 3rd ack packets, plain TCP socket will benefit from it, and even MPTCP sockets will experience better code locality, reducing the jumps between TCP and MPTCP code. v1 -> v2: - rebased on current '-net' tree Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/tcp.h | 51 ---------------------------------------- include/net/mptcp.h | 2 -- net/ipv4/tcp_input.c | 4 ---- net/mptcp/options.c | 66 +++++++++++++++++++++++++++++----------------------- net/mptcp/protocol.c | 6 ++--- net/mptcp/protocol.h | 43 ++++++++++++++++++++++++++++++++-- net/mptcp/subflow.c | 65 +++++++++++++++++++++++++++------------------------ 7 files changed, 115 insertions(+), 122 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 421c99c12291..4f8159e90ce1 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -78,47 +78,6 @@ struct tcp_sack_block { #define TCP_SACK_SEEN (1 << 0) /*1 = peer is SACK capable, */ #define TCP_DSACK_SEEN (1 << 2) /*1 = DSACK was received from peer*/ -#if IS_ENABLED(CONFIG_MPTCP) -struct mptcp_options_received { - u64 sndr_key; - u64 rcvr_key; - u64 data_ack; - u64 data_seq; - u32 subflow_seq; - u16 data_len; - u16 mp_capable : 1, - mp_join : 1, - dss : 1, - add_addr : 1, - rm_addr : 1, - family : 4, - echo : 1, - backup : 1; - u32 token; - u32 nonce; - u64 thmac; - u8 hmac[20]; - u8 join_id; - u8 use_map:1, - dsn64:1, - data_fin:1, - use_ack:1, - ack64:1, - mpc_map:1, - __unused:2; - u8 addr_id; - u8 rm_id; - union { - struct in_addr addr; -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - struct in6_addr addr6; -#endif - }; - u64 ahmac; - u16 port; -}; -#endif - struct tcp_options_received { /* PAWS/RTTM data */ int ts_recent_stamp;/* Time we stored ts_recent (for aging) */ @@ -136,9 +95,6 @@ struct tcp_options_received { u8 num_sacks; /* Number of SACK blocks */ u16 user_mss; /* mss requested by user in ioctl */ u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ -#if IS_ENABLED(CONFIG_MPTCP) - struct mptcp_options_received mptcp; -#endif }; static inline void tcp_clear_options(struct tcp_options_received *rx_opt) @@ -148,13 +104,6 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt) #if IS_ENABLED(CONFIG_SMC) rx_opt->smc_ok = 0; #endif -#if IS_ENABLED(CONFIG_MPTCP) - rx_opt->mptcp.mp_capable = 0; - rx_opt->mptcp.mp_join = 0; - rx_opt->mptcp.add_addr = 0; - rx_opt->mptcp.rm_addr = 0; - rx_opt->mptcp.dss = 0; -#endif } /* This is the max number of SACKS that we'll generate and process. It's safe diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 4ecfa7d5e0c7..3bce2019e4da 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -68,8 +68,6 @@ static inline bool rsk_is_mptcp(const struct request_sock *req) return tcp_rsk(req)->is_mptcp; } -void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr, - int opsize, struct tcp_options_received *opt_rx); bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, unsigned int *size, struct mptcp_out_options *opts); bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 81425542da44..b996dc1069c5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3926,10 +3926,6 @@ void tcp_parse_options(const struct net *net, */ break; #endif - case TCPOPT_MPTCP: - mptcp_parse_option(skb, ptr, opsize, opt_rx); - break; - case TCPOPT_FASTOPEN: tcp_parse_fastopen_option( opsize - TCPOLEN_FASTOPEN_BASE, diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 8fea686a5562..eadbd59586e4 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -16,10 +16,10 @@ static bool mptcp_cap_flag_sha256(u8 flags) return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256; } -void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr, - int opsize, struct tcp_options_received *opt_rx) +static void mptcp_parse_option(const struct sk_buff *skb, + const unsigned char *ptr, int opsize, + struct mptcp_options_received *mp_opt) { - struct mptcp_options_received *mp_opt = &opt_rx->mptcp; u8 subtype = *ptr >> 4; int expected_opsize; u8 version; @@ -283,12 +283,20 @@ void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr, } void mptcp_get_options(const struct sk_buff *skb, - struct tcp_options_received *opt_rx) + struct mptcp_options_received *mp_opt) { - const unsigned char *ptr; const struct tcphdr *th = tcp_hdr(skb); - int length = (th->doff * 4) - sizeof(struct tcphdr); + const unsigned char *ptr; + int length; + /* initialize option status */ + mp_opt->mp_capable = 0; + mp_opt->mp_join = 0; + mp_opt->add_addr = 0; + mp_opt->rm_addr = 0; + mp_opt->dss = 0; + + length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); while (length > 0) { @@ -308,7 +316,7 @@ void mptcp_get_options(const struct sk_buff *skb, if (opsize > length) return; /* don't parse partial options */ if (opcode == TCPOPT_MPTCP) - mptcp_parse_option(skb, ptr, opsize, opt_rx); + mptcp_parse_option(skb, ptr, opsize, mp_opt); ptr += opsize - 2; length -= opsize; } @@ -797,41 +805,41 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); - struct mptcp_options_received *mp_opt; + struct mptcp_options_received mp_opt; struct mptcp_ext *mpext; - mp_opt = &opt_rx->mptcp; - if (!check_fully_established(msk, sk, subflow, skb, mp_opt)) + mptcp_get_options(skb, &mp_opt); + if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) return; - if (mp_opt->add_addr && add_addr_hmac_valid(msk, mp_opt)) { + if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) { struct mptcp_addr_info addr; - addr.port = htons(mp_opt->port); - addr.id = mp_opt->addr_id; - if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) { + addr.port = htons(mp_opt.port); + addr.id = mp_opt.addr_id; + if (mp_opt.family == MPTCP_ADDR_IPVERSION_4) { addr.family = AF_INET; - addr.addr = mp_opt->addr; + addr.addr = mp_opt.addr; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (mp_opt->family == MPTCP_ADDR_IPVERSION_6) { + else if (mp_opt.family == MPTCP_ADDR_IPVERSION_6) { addr.family = AF_INET6; - addr.addr6 = mp_opt->addr6; + addr.addr6 = mp_opt.addr6; } #endif - if (!mp_opt->echo) + if (!mp_opt.echo) mptcp_pm_add_addr_received(msk, &addr); - mp_opt->add_addr = 0; + mp_opt.add_addr = 0; } - if (!mp_opt->dss) + if (!mp_opt.dss) return; /* we can't wait for recvmsg() to update the ack_seq, otherwise * monodirectional flows will stuck */ - if (mp_opt->use_ack) - update_una(msk, mp_opt); + if (mp_opt.use_ack) + update_una(msk, &mp_opt); mpext = skb_ext_add(skb, SKB_EXT_MPTCP); if (!mpext) @@ -839,8 +847,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, memset(mpext, 0, sizeof(*mpext)); - if (mp_opt->use_map) { - if (mp_opt->mpc_map) { + if (mp_opt.use_map) { + if (mp_opt.mpc_map) { /* this is an MP_CAPABLE carrying MPTCP data * we know this map the first chunk of data */ @@ -851,12 +859,12 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, mpext->dsn64 = 1; mpext->mpc_map = 1; } else { - mpext->data_seq = mp_opt->data_seq; - mpext->subflow_seq = mp_opt->subflow_seq; - mpext->dsn64 = mp_opt->dsn64; - mpext->data_fin = mp_opt->data_fin; + mpext->data_seq = mp_opt.data_seq; + mpext->subflow_seq = mp_opt.subflow_seq; + mpext->dsn64 = mp_opt.dsn64; + mpext->data_fin = mp_opt.data_fin; } - mpext->data_len = mp_opt->data_len; + mpext->data_len = mp_opt.data_len; mpext->use_map = 1; } } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 6e0188f5d3f3..e1f23016ed3f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1334,7 +1334,7 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) #endif struct sock *mptcp_sk_clone(const struct sock *sk, - const struct tcp_options_received *opt_rx, + const struct mptcp_options_received *mp_opt, struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); @@ -1373,9 +1373,9 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->write_seq = subflow_req->idsn + 1; atomic64_set(&msk->snd_una, msk->write_seq); - if (opt_rx->mptcp.mp_capable) { + if (mp_opt->mp_capable) { msk->can_ack = true; - msk->remote_key = opt_rx->mptcp.sndr_key; + msk->remote_key = mp_opt->sndr_key; mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); ack_seq++; msk->ack_seq = ack_seq; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a2b3048037d0..e4ca6320ce76 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -91,6 +91,45 @@ #define MPTCP_WORK_RTX 2 #define MPTCP_WORK_EOF 3 +struct mptcp_options_received { + u64 sndr_key; + u64 rcvr_key; + u64 data_ack; + u64 data_seq; + u32 subflow_seq; + u16 data_len; + u16 mp_capable : 1, + mp_join : 1, + dss : 1, + add_addr : 1, + rm_addr : 1, + family : 4, + echo : 1, + backup : 1; + u32 token; + u32 nonce; + u64 thmac; + u8 hmac[20]; + u8 join_id; + u8 use_map:1, + dsn64:1, + data_fin:1, + use_ack:1, + ack64:1, + mpc_map:1, + __unused:2; + u8 addr_id; + u8 rm_id; + union { + struct in_addr addr; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + struct in6_addr addr6; +#endif + }; + u64 ahmac; + u16 port; +}; + static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) { return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) | @@ -331,10 +370,10 @@ int mptcp_proto_v6_init(void); #endif struct sock *mptcp_sk_clone(const struct sock *sk, - const struct tcp_options_received *opt_rx, + const struct mptcp_options_received *mp_opt, struct request_sock *req); void mptcp_get_options(const struct sk_buff *skb, - struct tcp_options_received *opt_rx); + struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 84f6408594c9..bad998529767 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -124,12 +124,11 @@ static void subflow_init_req(struct request_sock *req, { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); - struct tcp_options_received rx_opt; + struct mptcp_options_received mp_opt; pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); - memset(&rx_opt.mptcp, 0, sizeof(rx_opt.mptcp)); - mptcp_get_options(skb, &rx_opt); + mptcp_get_options(skb, &mp_opt); subflow_req->mp_capable = 0; subflow_req->mp_join = 0; @@ -142,16 +141,16 @@ static void subflow_init_req(struct request_sock *req, return; #endif - if (rx_opt.mptcp.mp_capable) { + if (mp_opt.mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); - if (rx_opt.mptcp.mp_join) + if (mp_opt.mp_join) return; - } else if (rx_opt.mptcp.mp_join) { + } else if (mp_opt.mp_join) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX); } - if (rx_opt.mptcp.mp_capable && listener->request_mptcp) { + if (mp_opt.mp_capable && listener->request_mptcp) { int err; err = mptcp_token_new_request(req); @@ -159,13 +158,13 @@ static void subflow_init_req(struct request_sock *req, subflow_req->mp_capable = 1; subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; - } else if (rx_opt.mptcp.mp_join && listener->request_mptcp) { + } else if (mp_opt.mp_join && listener->request_mptcp) { subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; subflow_req->mp_join = 1; - subflow_req->backup = rx_opt.mptcp.backup; - subflow_req->remote_id = rx_opt.mptcp.join_id; - subflow_req->token = rx_opt.mptcp.token; - subflow_req->remote_nonce = rx_opt.mptcp.nonce; + subflow_req->backup = mp_opt.backup; + subflow_req->remote_id = mp_opt.join_id; + subflow_req->token = mp_opt.token; + subflow_req->remote_nonce = mp_opt.nonce; pr_debug("token=%u, remote_nonce=%u", subflow_req->token, subflow_req->remote_nonce); if (!subflow_token_join_request(req, skb)) { @@ -221,6 +220,7 @@ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow) static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_options_received mp_opt; struct sock *parent = subflow->conn; struct tcp_sock *tp = tcp_sk(sk); @@ -237,16 +237,17 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->conn_finished = 1; - if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) { + mptcp_get_options(skb, &mp_opt); + if (subflow->request_mptcp && mp_opt.mp_capable) { subflow->mp_capable = 1; subflow->can_ack = 1; - subflow->remote_key = tp->rx_opt.mptcp.sndr_key; + subflow->remote_key = mp_opt.sndr_key; pr_debug("subflow=%p, remote_key=%llu", subflow, subflow->remote_key); - } else if (subflow->request_join && tp->rx_opt.mptcp.mp_join) { + } else if (subflow->request_join && mp_opt.mp_join) { subflow->mp_join = 1; - subflow->thmac = tp->rx_opt.mptcp.thmac; - subflow->remote_nonce = tp->rx_opt.mptcp.nonce; + subflow->thmac = mp_opt.thmac; + subflow->remote_nonce = mp_opt.nonce; pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, subflow->thmac, subflow->remote_nonce); } else if (subflow->request_mptcp) { @@ -343,7 +344,7 @@ drop: /* validate hmac received in third ACK */ static bool subflow_hmac_valid(const struct request_sock *req, - const struct tcp_options_received *rx_opt) + const struct mptcp_options_received *mp_opt) { const struct mptcp_subflow_request_sock *subflow_req; u8 hmac[MPTCPOPT_HMAC_LEN]; @@ -360,7 +361,7 @@ static bool subflow_hmac_valid(const struct request_sock *req, subflow_req->local_nonce, hmac); ret = true; - if (crypto_memneq(hmac, rx_opt->mptcp.hmac, sizeof(hmac))) + if (crypto_memneq(hmac, mp_opt->hmac, sizeof(hmac))) ret = false; sock_put((struct sock *)msk); @@ -416,7 +417,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); struct mptcp_subflow_request_sock *subflow_req; - struct tcp_options_received opt_rx; + struct mptcp_options_received mp_opt; bool fallback_is_fatal = false; struct sock *new_msk = NULL; bool fallback = false; @@ -424,7 +425,10 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); - opt_rx.mptcp.mp_capable = 0; + /* we need later a valid 'mp_capable' value even when options are not + * parsed + */ + mp_opt.mp_capable = 0; if (tcp_rsk(req)->is_mptcp == 0) goto create_child; @@ -439,22 +443,21 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, goto create_msk; } - mptcp_get_options(skb, &opt_rx); - if (!opt_rx.mptcp.mp_capable) { + mptcp_get_options(skb, &mp_opt); + if (!mp_opt.mp_capable) { fallback = true; goto create_child; } create_msk: - new_msk = mptcp_sk_clone(listener->conn, &opt_rx, req); + new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req); if (!new_msk) fallback = true; } else if (subflow_req->mp_join) { fallback_is_fatal = true; - opt_rx.mptcp.mp_join = 0; - mptcp_get_options(skb, &opt_rx); - if (!opt_rx.mptcp.mp_join || - !subflow_hmac_valid(req, &opt_rx)) { + mptcp_get_options(skb, &mp_opt); + if (!mp_opt.mp_join || + !subflow_hmac_valid(req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); return NULL; } @@ -494,9 +497,9 @@ create_child: /* with OoO packets we can reach here without ingress * mpc option */ - ctx->remote_key = opt_rx.mptcp.sndr_key; - ctx->fully_established = opt_rx.mptcp.mp_capable; - ctx->can_ack = opt_rx.mptcp.mp_capable; + ctx->remote_key = mp_opt.sndr_key; + ctx->fully_established = mp_opt.mp_capable; + ctx->can_ack = mp_opt.mp_capable; } else if (ctx->mp_join) { struct mptcp_sock *owner; -- cgit v1.2.3-71-gd317 From 54261af473be4c5481f6196064445d2945f2bdab Mon Sep 17 00:00:00 2001 From: KP Singh Date: Thu, 30 Apr 2020 17:52:40 +0200 Subject: security: Fix the default value of fs_context_parse_param hook security_fs_context_parse_param is called by vfs_parse_fs_param and a succussful return value (i.e 0) implies that a parameter will be consumed by the LSM framework. This stops all further parsing of the parmeter by VFS. Furthermore, if an LSM hook returns a success, the remaining LSM hooks are not invoked for the parameter. The current default behavior of returning success means that all the parameters are expected to be parsed by the LSM hook and none of them end up being populated by vfs in fs_context This was noticed when lsm=bpf is supplied on the command line before any other LSM. As the bpf lsm uses this default value to implement a default hook, this resulted in a failure to parse any fs_context parameters and a failure to mount the root filesystem. Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") Reported-by: Mikko Ylinen Signed-off-by: KP Singh Signed-off-by: James Morris --- include/linux/lsm_hook_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 9cd4455528e5..1bdd027766d4 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -55,7 +55,7 @@ LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm) LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm) LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc, struct fs_context *src_sc) -LSM_HOOK(int, 0, fs_context_parse_param, struct fs_context *fc, +LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc, struct fs_parameter *param) LSM_HOOK(int, 0, sb_alloc_security, struct super_block *sb) LSM_HOOK(void, LSM_RET_VOID, sb_free_security, struct super_block *sb) -- cgit v1.2.3-71-gd317 From b723748750ece7d844cdf2f52c01d37f83387208 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Mon, 27 Apr 2020 16:11:05 +0200 Subject: tunnel: Propagate ECT(1) when decapsulating as recommended by RFC6040 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RFC 6040 recommends propagating an ECT(1) mark from an outer tunnel header to the inner header if that inner header is already marked as ECT(0). When RFC 6040 decapsulation was implemented, this case of propagation was not added. This simply appears to be an oversight, so let's fix that. Fixes: eccc1bb8d4b4 ("tunnel: drop packet if ECN present with not-ECT") Reported-by: Bob Briscoe Reported-by: Olivier Tilmans Cc: Dave Taht Cc: Stephen Hemminger Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- include/net/inet_ecn.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index c8e2bebd8d93..0f0d1efe06dd 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -99,6 +99,20 @@ static inline int IP_ECN_set_ce(struct iphdr *iph) return 1; } +static inline int IP_ECN_set_ect1(struct iphdr *iph) +{ + u32 check = (__force u32)iph->check; + + if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0) + return 0; + + check += (__force u16)htons(0x100); + + iph->check = (__force __sum16)(check + (check>=0xFFFF)); + iph->tos ^= INET_ECN_MASK; + return 1; +} + static inline void IP_ECN_clear(struct iphdr *iph) { iph->tos &= ~INET_ECN_MASK; @@ -134,6 +148,22 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) return 1; } +static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph) +{ + __be32 from, to; + + if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0) + return 0; + + from = *(__be32 *)iph; + to = from ^ htonl(INET_ECN_MASK << 20); + *(__be32 *)iph = to; + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), + (__force __wsum)to); + return 1; +} + static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner) { dscp &= ~INET_ECN_MASK; @@ -159,6 +189,25 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb) return 0; } +static inline int INET_ECN_set_ect1(struct sk_buff *skb) +{ + switch (skb->protocol) { + case cpu_to_be16(ETH_P_IP): + if (skb_network_header(skb) + sizeof(struct iphdr) <= + skb_tail_pointer(skb)) + return IP_ECN_set_ect1(ip_hdr(skb)); + break; + + case cpu_to_be16(ETH_P_IPV6): + if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= + skb_tail_pointer(skb)) + return IP6_ECN_set_ect1(skb, ipv6_hdr(skb)); + break; + } + + return 0; +} + /* * RFC 6040 4.2 * To decapsulate the inner header at the tunnel egress, a compliant @@ -208,8 +257,12 @@ static inline int INET_ECN_decapsulate(struct sk_buff *skb, int rc; rc = __INET_ECN_decapsulate(outer, inner, &set_ce); - if (!rc && set_ce) - INET_ECN_set_ce(skb); + if (!rc) { + if (set_ce) + INET_ECN_set_ce(skb); + else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1) + INET_ECN_set_ect1(skb); + } return rc; } -- cgit v1.2.3-71-gd317 From 8f34e53b60b337e559f1ea19e2780ff95ab2fa65 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 1 May 2020 08:53:08 -0600 Subject: ipv6: Use global sernum for dst validation with nexthop objects Nik reported a bug with pcpu dst cache when nexthop objects are used illustrated by the following: $ ip netns add foo $ ip -netns foo li set lo up $ ip -netns foo addr add 2001:db8:11::1/128 dev lo $ ip netns exec foo sysctl net.ipv6.conf.all.forwarding=1 $ ip li add veth1 type veth peer name veth2 $ ip li set veth1 up $ ip addr add 2001:db8:10::1/64 dev veth1 $ ip li set dev veth2 netns foo $ ip -netns foo li set veth2 up $ ip -netns foo addr add 2001:db8:10::2/64 dev veth2 $ ip -6 nexthop add id 100 via 2001:db8:10::2 dev veth1 $ ip -6 route add 2001:db8:11::1/128 nhid 100 Create a pcpu entry on cpu 0: $ taskset -a -c 0 ip -6 route get 2001:db8:11::1 Re-add the route entry: $ ip -6 ro del 2001:db8:11::1 $ ip -6 route add 2001:db8:11::1/128 nhid 100 Route get on cpu 0 returns the stale pcpu: $ taskset -a -c 0 ip -6 route get 2001:db8:11::1 RTNETLINK answers: Network is unreachable While cpu 1 works: $ taskset -a -c 1 ip -6 route get 2001:db8:11::1 2001:db8:11::1 from :: via 2001:db8:10::2 dev veth1 src 2001:db8:10::1 metric 1024 pref medium Conversion of FIB entries to work with external nexthop objects missed an important difference between IPv4 and IPv6 - how dst entries are invalidated when the FIB changes. IPv4 has a per-network namespace generation id (rt_genid) that is bumped on changes to the FIB. Checking if a dst_entry is still valid means comparing rt_genid in the rtable to the current value of rt_genid for the namespace. IPv6 also has a per network namespace counter, fib6_sernum, but the count is saved per fib6_node. With the per-node counter only dst_entries based on fib entries under the node are invalidated when changes are made to the routes - limiting the scope of invalidations. IPv6 uses a reference in the rt6_info, 'from', to track the corresponding fib entry used to create the dst_entry. When validating a dst_entry, the 'from' is used to backtrack to the fib6_node and check the sernum of it to the cookie passed to the dst_check operation. With the inline format (nexthop definition inline with the fib6_info), dst_entries cached in the fib6_nh have a 1:1 correlation between fib entries, nexthop data and dst_entries. With external nexthops, IPv6 looks more like IPv4 which means multiple fib entries across disparate fib6_nodes can all reference the same fib6_nh. That means validation of dst_entries based on external nexthops needs to use the IPv4 format - the per-network namespace counter. Add sernum to rt6_info and set it when creating a pcpu dst entry. Update rt6_get_cookie to return sernum if it is set and update dst_check for IPv6 to look for sernum set and based the check on it if so. Finally, rt6_get_pcpu_route needs to validate the cached entry before returning a pcpu entry (similar to the rt_cache_valid calls in __mkroute_input and __mkroute_output for IPv4). This problem only affects routes using the new, external nexthops. Thanks to the kbuild test robot for catching the IS_ENABLED needed around rt_genid_ipv6 before I sent this out. Fixes: 5b98324ebe29 ("ipv6: Allow routes to use nexthop objects") Reported-by: Nikolay Aleksandrov Signed-off-by: David Ahern Reviewed-by: Nikolay Aleksandrov Tested-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 4 ++++ include/net/net_namespace.h | 7 +++++++ net/ipv6/route.c | 25 +++++++++++++++++++++++++ 3 files changed, 36 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 80262d2980f5..1d98828c6649 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -203,6 +203,7 @@ struct fib6_info { struct rt6_info { struct dst_entry dst; struct fib6_info __rcu *from; + int sernum; struct rt6key rt6i_dst; struct rt6key rt6i_src; @@ -291,6 +292,9 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt) struct fib6_info *from; u32 cookie = 0; + if (rt->sernum) + return rt->sernum; + rcu_read_lock(); from = rcu_dereference(rt->from); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index ab96fb59131c..8e001e049497 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -437,6 +437,13 @@ static inline int rt_genid_ipv4(const struct net *net) return atomic_read(&net->ipv4.rt_genid); } +#if IS_ENABLED(CONFIG_IPV6) +static inline int rt_genid_ipv6(const struct net *net) +{ + return atomic_read(&net->ipv6.fib6_sernum); +} +#endif + static inline void rt_genid_bump_ipv4(struct net *net) { atomic_inc(&net->ipv4.rt_genid); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 310cbddaa533..8d418038fe32 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1385,9 +1385,18 @@ static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res) } ip6_rt_copy_init(pcpu_rt, res); pcpu_rt->rt6i_flags |= RTF_PCPU; + + if (f6i->nh) + pcpu_rt->sernum = rt_genid_ipv6(dev_net(dev)); + return pcpu_rt; } +static bool rt6_is_valid(const struct rt6_info *rt6) +{ + return rt6->sernum == rt_genid_ipv6(dev_net(rt6->dst.dev)); +} + /* It should be called with rcu_read_lock() acquired */ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res) { @@ -1395,6 +1404,19 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res) pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu); + if (pcpu_rt && pcpu_rt->sernum && !rt6_is_valid(pcpu_rt)) { + struct rt6_info *prev, **p; + + p = this_cpu_ptr(res->nh->rt6i_pcpu); + prev = xchg(p, NULL); + if (prev) { + dst_dev_put(&prev->dst); + dst_release(&prev->dst); + } + + pcpu_rt = NULL; + } + return pcpu_rt; } @@ -2593,6 +2615,9 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt = container_of(dst, struct rt6_info, dst); + if (rt->sernum) + return rt6_is_valid(rt) ? dst : NULL; + rcu_read_lock(); /* All IPV6 dsts are created with ->obsolete set to the value -- cgit v1.2.3-71-gd317 From 9d82973e032e246ff5663c9805fbb5407ae932e3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 4 May 2020 09:16:37 -0700 Subject: gcc-10 warnings: fix low-hanging fruit Due to a bug-report that was compiler-dependent, I updated one of my machines to gcc-10. That shows a lot of new warnings. Happily they seem to be mostly the valid kind, but it's going to cause a round of churn for getting rid of them.. This is the really low-hanging fruit of removing a couple of zero-sized arrays in some core code. We have had a round of these patches before, and we'll have many more coming, and there is nothing special about these except that they were particularly trivial, and triggered more warnings than most. Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- include/linux/tty.h | 2 +- scripts/kallsyms.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4f6f59b4f22a..45cc10cdf6dd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -983,7 +983,7 @@ struct file_handle { __u32 handle_bytes; int handle_type; /* file identifier */ - unsigned char f_handle[0]; + unsigned char f_handle[]; }; static inline struct file *get_file(struct file *f) diff --git a/include/linux/tty.h b/include/linux/tty.h index bd5fe0e907e8..a99e9b8e4e31 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -66,7 +66,7 @@ struct tty_buffer { int read; int flags; /* Data points here */ - unsigned long data[0]; + unsigned long data[]; }; /* Values for .flags field of tty_buffer */ diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 3e8dea6e0a95..6dc3078649fa 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -34,7 +34,7 @@ struct sym_entry { unsigned int len; unsigned int start_pos; unsigned int percpu_absolute; - unsigned char sym[0]; + unsigned char sym[]; }; struct addr_range { -- cgit v1.2.3-71-gd317 From 1e6e9d0f4859ec698d55381ea26f4136eff3afe1 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 24 Apr 2020 10:50:00 -0500 Subject: uapi: revert flexible-array conversions These structures can get embedded in other structures in user-space and cause all sorts of warnings and problems. So, we better don't take any chances and keep the zero-length arrays in place for now. Signed-off-by: Gustavo A. R. Silva --- include/uapi/linux/bpf.h | 2 +- include/uapi/linux/dlm_device.h | 4 ++-- include/uapi/linux/fiemap.h | 2 +- include/uapi/linux/if_arcnet.h | 6 +++--- include/uapi/linux/mmc/ioctl.h | 2 +- include/uapi/linux/net_dropmon.h | 4 ++-- include/uapi/linux/netfilter_bridge/ebt_among.h | 2 +- include/uapi/scsi/scsi_bsg_fc.h | 2 +- 8 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7bbf1b65be10..f9b7fdd951e4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -73,7 +73,7 @@ struct bpf_insn { /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ - __u8 data[]; /* Arbitrary size */ + __u8 data[0]; /* Arbitrary size */ }; struct bpf_cgroup_storage_key { diff --git a/include/uapi/linux/dlm_device.h b/include/uapi/linux/dlm_device.h index e83954c69fff..f880d2831160 100644 --- a/include/uapi/linux/dlm_device.h +++ b/include/uapi/linux/dlm_device.h @@ -45,13 +45,13 @@ struct dlm_lock_params { void __user *bastaddr; struct dlm_lksb __user *lksb; char lvb[DLM_USER_LVB_LEN]; - char name[]; + char name[0]; }; struct dlm_lspace_params { __u32 flags; __u32 minor; - char name[]; + char name[0]; }; struct dlm_purge_params { diff --git a/include/uapi/linux/fiemap.h b/include/uapi/linux/fiemap.h index 7a900b2377b6..8c0bc24d5d95 100644 --- a/include/uapi/linux/fiemap.h +++ b/include/uapi/linux/fiemap.h @@ -34,7 +34,7 @@ struct fiemap { __u32 fm_mapped_extents;/* number of extents that were mapped (out) */ __u32 fm_extent_count; /* size of fm_extents array (in) */ __u32 fm_reserved; - struct fiemap_extent fm_extents[]; /* array of mapped extents (out) */ + struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ }; #define FIEMAP_MAX_OFFSET (~0ULL) diff --git a/include/uapi/linux/if_arcnet.h b/include/uapi/linux/if_arcnet.h index b122cfac7128..683878036d76 100644 --- a/include/uapi/linux/if_arcnet.h +++ b/include/uapi/linux/if_arcnet.h @@ -60,7 +60,7 @@ struct arc_rfc1201 { __u8 proto; /* protocol ID field - varies */ __u8 split_flag; /* for use with split packets */ __be16 sequence; /* sequence number */ - __u8 payload[]; /* space remaining in packet (504 bytes)*/ + __u8 payload[0]; /* space remaining in packet (504 bytes)*/ }; #define RFC1201_HDR_SIZE 4 @@ -69,7 +69,7 @@ struct arc_rfc1201 { */ struct arc_rfc1051 { __u8 proto; /* ARC_P_RFC1051_ARP/RFC1051_IP */ - __u8 payload[]; /* 507 bytes */ + __u8 payload[0]; /* 507 bytes */ }; #define RFC1051_HDR_SIZE 1 @@ -80,7 +80,7 @@ struct arc_rfc1051 { struct arc_eth_encap { __u8 proto; /* Always ARC_P_ETHER */ struct ethhdr eth; /* standard ethernet header (yuck!) */ - __u8 payload[]; /* 493 bytes */ + __u8 payload[0]; /* 493 bytes */ }; #define ETH_ENCAP_HDR_SIZE 14 diff --git a/include/uapi/linux/mmc/ioctl.h b/include/uapi/linux/mmc/ioctl.h index 98e29e7f54ac..00c08120f3ba 100644 --- a/include/uapi/linux/mmc/ioctl.h +++ b/include/uapi/linux/mmc/ioctl.h @@ -57,7 +57,7 @@ struct mmc_ioc_cmd { */ struct mmc_ioc_multi_cmd { __u64 num_of_cmds; - struct mmc_ioc_cmd cmds[]; + struct mmc_ioc_cmd cmds[0]; }; #define MMC_IOC_CMD _IOWR(MMC_BLOCK_MAJOR, 0, struct mmc_ioc_cmd) diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h index 67e31f329190..66048cc5d7b3 100644 --- a/include/uapi/linux/net_dropmon.h +++ b/include/uapi/linux/net_dropmon.h @@ -29,12 +29,12 @@ struct net_dm_config_entry { struct net_dm_config_msg { __u32 entries; - struct net_dm_config_entry options[]; + struct net_dm_config_entry options[0]; }; struct net_dm_alert_msg { __u32 entries; - struct net_dm_drop_point points[]; + struct net_dm_drop_point points[0]; }; struct net_dm_user_msg { diff --git a/include/uapi/linux/netfilter_bridge/ebt_among.h b/include/uapi/linux/netfilter_bridge/ebt_among.h index 73b26a280c4f..9acf757bc1f7 100644 --- a/include/uapi/linux/netfilter_bridge/ebt_among.h +++ b/include/uapi/linux/netfilter_bridge/ebt_among.h @@ -40,7 +40,7 @@ struct ebt_mac_wormhash_tuple { struct ebt_mac_wormhash { int table[257]; int poolsize; - struct ebt_mac_wormhash_tuple pool[]; + struct ebt_mac_wormhash_tuple pool[0]; }; #define ebt_mac_wormhash_size(x) ((x) ? sizeof(struct ebt_mac_wormhash) \ diff --git a/include/uapi/scsi/scsi_bsg_fc.h b/include/uapi/scsi/scsi_bsg_fc.h index 7f5930801f72..3ae65e93235c 100644 --- a/include/uapi/scsi/scsi_bsg_fc.h +++ b/include/uapi/scsi/scsi_bsg_fc.h @@ -209,7 +209,7 @@ struct fc_bsg_host_vendor { __u64 vendor_id; /* start of vendor command area */ - __u32 vendor_cmd[]; + __u32 vendor_cmd[0]; }; /* Response: -- cgit v1.2.3-71-gd317 From a7df4870d79b00742da6cc93ca2f336a71db77f7 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 30 Apr 2020 20:53:49 -0700 Subject: net_sched: fix tcm_parent in tc filter dump When we tell kernel to dump filters from root (ffff:ffff), those filters on ingress (ffff:0000) are matched, but their true parents must be dumped as they are. However, kernel dumps just whatever we tell it, that is either ffff:ffff or ffff:0000: $ nl-cls-list --dev=dummy0 --parent=root cls basic dev dummy0 id none parent root prio 49152 protocol ip match-all cls basic dev dummy0 id :1 parent root prio 49152 protocol ip match-all $ nl-cls-list --dev=dummy0 --parent=ffff: cls basic dev dummy0 id none parent ffff: prio 49152 protocol ip match-all cls basic dev dummy0 id :1 parent ffff: prio 49152 protocol ip match-all This is confusing and misleading, more importantly this is a regression since 4.15, so the old behavior must be restored. And, when tc filters are installed on a tc class, the parent should be the classid, rather than the qdisc handle. Commit edf6711c9840 ("net: sched: remove classid and q fields from tcf_proto") removed the classid we save for filters, we can just restore this classid in tcf_block. Steps to reproduce this: ip li set dev dummy0 up tc qd add dev dummy0 ingress tc filter add dev dummy0 parent ffff: protocol arp basic action pass tc filter show dev dummy0 root Before this patch: filter protocol arp pref 49152 basic filter protocol arp pref 49152 basic handle 0x1 action order 1: gact action pass random type none pass val 0 index 1 ref 1 bind 1 After this patch: filter parent ffff: protocol arp pref 49152 basic filter parent ffff: protocol arp pref 49152 basic handle 0x1 action order 1: gact action pass random type none pass val 0 index 1 ref 1 bind 1 Fixes: a10fa20101ae ("net: sched: propagate q and parent from caller down to tcf_fill_node") Fixes: edf6711c9840 ("net: sched: remove classid and q fields from tcf_proto") Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 + net/sched/cls_api.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 25d2ec4c8f00..8428aa614265 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -407,6 +407,7 @@ struct tcf_block { struct mutex lock; struct list_head chain_list; u32 index; /* block index for shared blocks */ + u32 classid; /* which class this block belongs to */ refcount_t refcnt; struct net *net; struct Qdisc *q; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 55bd1429678f..c0e5b64b3caf 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2070,6 +2070,7 @@ replay: err = PTR_ERR(block); goto errout; } + block->classid = parent; chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; if (chain_index > TC_ACT_EXT_VAL_MASK) { @@ -2612,12 +2613,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; parent = tcm->tcm_parent; - if (!parent) { + if (!parent) q = dev->qdisc; - parent = q->handle; - } else { + else q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); - } if (!q) goto out; cops = q->ops->cl_ops; @@ -2633,6 +2632,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) block = cops->tcf_block(q, cl, NULL); if (!block) goto out; + parent = block->classid; if (tcf_block_shared(block)) q = NULL; } -- cgit v1.2.3-71-gd317 From 115f32512f13c0280161908e9de45a97a87673bb Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 1 May 2020 00:35:50 +0530 Subject: bus: mhi: Fix parsing of mhi_flags With the current parsing of mhi_flags, the following statement always return false: eob = !!(flags & MHI_EOB); This is due to the fact that 'enum mhi_flags' starts with index 0 and we are using direct AND operation to extract each bit. Fix this by using BIT() macros for defining the flags so that the reset of the code need not be touched. Fixes: 189ff97cca53 ("bus: mhi: core: Add support for data transfer") Reported-by: Dan Carpenter Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-2-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index ad1996001965..5642806360f3 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -53,9 +53,9 @@ enum mhi_callback { * @MHI_CHAIN: Linked transfer */ enum mhi_flags { - MHI_EOB, - MHI_EOT, - MHI_CHAIN, + MHI_EOB = BIT(0), + MHI_EOT = BIT(1), + MHI_CHAIN = BIT(2), }; /** -- cgit v1.2.3-71-gd317 From 85a087df4a719ebab940efa3c79625e68161f57b Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 1 May 2020 00:35:52 +0530 Subject: bus: mhi: core: Remove link_status() callback If the MHI core detects invalid data due to a PCI read, it calls into the controller via link_status() to double check that the link is infact down. All in all, this is pretty pointless, and racy. There are no good reasons for this, and only drawbacks. Its pointless because chances are, the controller is going to do the same thing to determine if the link is down - attempt a PCI access and compare the result. This does not make the link status decision any smarter. Its racy because its possible that the link was down at the time of the MHI core access, but then recovered before the controller access. In this case, the controller will indicate the link is not down, and the MHI core will precede to use a bad value as the MHI core does not attempt to retry the access. Retrying the access in the MHI core is a bad idea because again, it is racy - what if the link is down again? Furthermore, there may be some higher level state associated with the link status, that is now invalid because the link went down. The only reason why the MHI core could see "invalid" data when doing a PCI access, that is actually valid, is if the register actually contained the PCI spec defined sentinel for an invalid access. In this case, it is arguable that the MHI implementation broken, and should be fixed, not worked around. Therefore, remove the link_status() callback before anyone attempts to implement it. Signed-off-by: Jeffrey Hugo Reviewed-by: Manivannan Sadhasivam Reviewed-by: Hemant Kumar Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-4-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/core/init.c | 6 ++---- drivers/bus/mhi/core/main.c | 5 ++--- include/linux/mhi.h | 2 -- 3 files changed, 4 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c index b38359c480ea..2af08d57ec28 100644 --- a/drivers/bus/mhi/core/init.c +++ b/drivers/bus/mhi/core/init.c @@ -812,10 +812,8 @@ int mhi_register_controller(struct mhi_controller *mhi_cntrl, if (!mhi_cntrl) return -EINVAL; - if (!mhi_cntrl->runtime_get || !mhi_cntrl->runtime_put) - return -EINVAL; - - if (!mhi_cntrl->status_cb || !mhi_cntrl->link_status) + if (!mhi_cntrl->runtime_get || !mhi_cntrl->runtime_put || + !mhi_cntrl->status_cb) return -EINVAL; ret = parse_config(mhi_cntrl, config); diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index 55928feea0c9..f8401535e61a 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -20,9 +20,8 @@ int __must_check mhi_read_reg(struct mhi_controller *mhi_cntrl, { u32 tmp = readl(base + offset); - /* If there is any unexpected value, query the link status */ - if (PCI_INVALID_READ(tmp) && - mhi_cntrl->link_status(mhi_cntrl)) + /* If the value is invalid, the link is down */ + if (PCI_INVALID_READ(tmp)) return -EIO; *out = tmp; diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 5642806360f3..c80ba559face 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -335,7 +335,6 @@ struct mhi_controller_config { * @syserr_worker: System error worker * @state_event: State change event * @status_cb: CB function to notify power states of the device (required) - * @link_status: CB function to query link status of the device (required) * @wake_get: CB function to assert device wake (optional) * @wake_put: CB function to de-assert device wake (optional) * @wake_toggle: CB function to assert and de-assert device wake (optional) @@ -417,7 +416,6 @@ struct mhi_controller { void (*status_cb)(struct mhi_controller *mhi_cntrl, enum mhi_callback cb); - int (*link_status)(struct mhi_controller *mhi_cntrl); void (*wake_get)(struct mhi_controller *mhi_cntrl, bool override); void (*wake_put)(struct mhi_controller *mhi_cntrl, bool override); void (*wake_toggle)(struct mhi_controller *mhi_cntrl); -- cgit v1.2.3-71-gd317 From 45723a44845c90c8e859fd0e2b0bb492322b5d0b Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 1 May 2020 00:35:53 +0530 Subject: bus: mhi: core: Offload register accesses to the controller When reading or writing MHI registers, the core assumes that the physical link is a memory mapped PCI link. This assumption may not hold for all MHI devices. The controller knows what is the physical link (ie PCI, I2C, SPI, etc), and therefore knows the proper methods to access that link. The controller can also handle link specific error scenarios, such as reading -1 when the PCI link went down. Therefore, it is appropriate that the MHI core requests the controller to make register accesses on behalf of the core, which abstracts the core from link specifics, and end up removing an unnecessary assumption. Signed-off-by: Jeffrey Hugo Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-5-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/core/init.c | 3 ++- drivers/bus/mhi/core/internal.h | 3 --- drivers/bus/mhi/core/main.c | 12 ++---------- include/linux/mhi.h | 6 ++++++ 4 files changed, 10 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c index 2af08d57ec28..eb2ab058a01d 100644 --- a/drivers/bus/mhi/core/init.c +++ b/drivers/bus/mhi/core/init.c @@ -813,7 +813,8 @@ int mhi_register_controller(struct mhi_controller *mhi_cntrl, return -EINVAL; if (!mhi_cntrl->runtime_get || !mhi_cntrl->runtime_put || - !mhi_cntrl->status_cb) + !mhi_cntrl->status_cb || !mhi_cntrl->read_reg || + !mhi_cntrl->write_reg) return -EINVAL; ret = parse_config(mhi_cntrl, config); diff --git a/drivers/bus/mhi/core/internal.h b/drivers/bus/mhi/core/internal.h index 5deadfaa053a..095d95bc0e37 100644 --- a/drivers/bus/mhi/core/internal.h +++ b/drivers/bus/mhi/core/internal.h @@ -11,9 +11,6 @@ extern struct bus_type mhi_bus_type; -/* MHI MMIO register mapping */ -#define PCI_INVALID_READ(val) (val == U32_MAX) - #define MHIREGLEN (0x0) #define MHIREGLEN_MHIREGLEN_MASK (0xFFFFFFFF) #define MHIREGLEN_MHIREGLEN_SHIFT (0) diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index f8401535e61a..2aceb69f6ce8 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -18,15 +18,7 @@ int __must_check mhi_read_reg(struct mhi_controller *mhi_cntrl, void __iomem *base, u32 offset, u32 *out) { - u32 tmp = readl(base + offset); - - /* If the value is invalid, the link is down */ - if (PCI_INVALID_READ(tmp)) - return -EIO; - - *out = tmp; - - return 0; + return mhi_cntrl->read_reg(mhi_cntrl, base + offset, out); } int __must_check mhi_read_reg_field(struct mhi_controller *mhi_cntrl, @@ -48,7 +40,7 @@ int __must_check mhi_read_reg_field(struct mhi_controller *mhi_cntrl, void mhi_write_reg(struct mhi_controller *mhi_cntrl, void __iomem *base, u32 offset, u32 val) { - writel(val, base + offset); + mhi_cntrl->write_reg(mhi_cntrl, base + offset, val); } void mhi_write_reg_field(struct mhi_controller *mhi_cntrl, void __iomem *base, diff --git a/include/linux/mhi.h b/include/linux/mhi.h index c80ba559face..84a6c9e72f52 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -342,6 +342,8 @@ struct mhi_controller_config { * @runtimet_put: CB function to decrement pm usage (required) * @map_single: CB function to create TRE buffer * @unmap_single: CB function to destroy TRE buffer + * @read_reg: Read a MHI register via the physical link (required) + * @write_reg: Write a MHI register via the physical link (required) * @buffer_len: Bounce buffer length * @bounce_buf: Use of bounce buffer * @fbc_download: MHI host needs to do complete image transfer (optional) @@ -425,6 +427,10 @@ struct mhi_controller { struct mhi_buf_info *buf); void (*unmap_single)(struct mhi_controller *mhi_cntrl, struct mhi_buf_info *buf); + int (*read_reg)(struct mhi_controller *mhi_cntrl, void __iomem *addr, + u32 *out); + void (*write_reg)(struct mhi_controller *mhi_cntrl, void __iomem *addr, + u32 val); size_t buffer_len; bool bounce_buf; -- cgit v1.2.3-71-gd317 From af2e58818082ac0db29539444ca17eb1e77f6000 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 1 May 2020 00:35:54 +0530 Subject: bus: mhi: core: Fix typo in comment There is a typo - "runtimet" should be "runtime". Fix it. Signed-off-by: Jeffrey Hugo Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-6-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 84a6c9e72f52..3d7c3c26eeb9 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -339,7 +339,7 @@ struct mhi_controller_config { * @wake_put: CB function to de-assert device wake (optional) * @wake_toggle: CB function to assert and de-assert device wake (optional) * @runtime_get: CB function to controller runtime resume (required) - * @runtimet_put: CB function to decrement pm usage (required) + * @runtime_put: CB function to decrement pm usage (required) * @map_single: CB function to create TRE buffer * @unmap_single: CB function to destroy TRE buffer * @read_reg: Read a MHI register via the physical link (required) -- cgit v1.2.3-71-gd317 From 21ce7f3e16fbf89faaf149cfe0f730edfc553914 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 4 May 2020 01:20:26 +0300 Subject: net: dsa: ocelot: the MAC table on Felix is twice as large When running 'bridge fdb dump' on Felix, sometimes learnt and static MAC addresses would appear, sometimes they wouldn't. Turns out, the MAC table has 4096 entries on VSC7514 (Ocelot) and 8192 entries on VSC9959 (Felix), so the existing code from the Ocelot common library only dumped half of Felix's MAC table. They are both organized as a 4-way set-associative TCAM, so we just need a single variable indicating the correct number of rows. Fixes: 56051948773e ("net: dsa: ocelot: add driver for Felix switch family") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix.c | 1 + drivers/net/dsa/ocelot/felix.h | 1 + drivers/net/dsa/ocelot/felix_vsc9959.c | 1 + drivers/net/ethernet/mscc/ocelot.c | 6 ++---- drivers/net/ethernet/mscc/ocelot_regs.c | 1 + include/soc/mscc/ocelot.h | 1 + 6 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index d0a3764ff0cf..e2c6bf0e430e 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -400,6 +400,7 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports) ocelot->stats_layout = felix->info->stats_layout; ocelot->num_stats = felix->info->num_stats; ocelot->shared_queue_sz = felix->info->shared_queue_sz; + ocelot->num_mact_rows = felix->info->num_mact_rows; ocelot->vcap_is2_keys = felix->info->vcap_is2_keys; ocelot->vcap_is2_actions= felix->info->vcap_is2_actions; ocelot->vcap = felix->info->vcap; diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 82d46f260041..9af106513e53 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -15,6 +15,7 @@ struct felix_info { const u32 *const *map; const struct ocelot_ops *ops; int shared_queue_sz; + int num_mact_rows; const struct ocelot_stat_layout *stats_layout; unsigned int num_stats; int num_ports; diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index b4078f3c5c38..8bf395f12b47 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1220,6 +1220,7 @@ struct felix_info felix_info_vsc9959 = { .vcap_is2_actions = vsc9959_vcap_is2_actions, .vcap = vsc9959_vcap_props, .shared_queue_sz = 128 * 1024, + .num_mact_rows = 2048, .num_ports = 6, .switch_pci_bar = 4, .imdio_pci_bar = 0, diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index a8c48a4a708f..887b3cc88354 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1031,10 +1031,8 @@ int ocelot_fdb_dump(struct ocelot *ocelot, int port, { int i, j; - /* Loop through all the mac tables entries. There are 1024 rows of 4 - * entries. - */ - for (i = 0; i < 1024; i++) { + /* Loop through all the mac tables entries. */ + for (i = 0; i < ocelot->num_mact_rows; i++) { for (j = 0; j < 4; j++) { struct ocelot_mact_entry entry; bool is_static; diff --git a/drivers/net/ethernet/mscc/ocelot_regs.c b/drivers/net/ethernet/mscc/ocelot_regs.c index b88b5899b227..7d4fd1b6adda 100644 --- a/drivers/net/ethernet/mscc/ocelot_regs.c +++ b/drivers/net/ethernet/mscc/ocelot_regs.c @@ -431,6 +431,7 @@ int ocelot_chip_init(struct ocelot *ocelot, const struct ocelot_ops *ops) ocelot->stats_layout = ocelot_stats_layout; ocelot->num_stats = ARRAY_SIZE(ocelot_stats_layout); ocelot->shared_queue_sz = 224 * 1024; + ocelot->num_mact_rows = 1024; ocelot->ops = ops; ret = ocelot_regfields_init(ocelot, ocelot_regfields); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 6d6a3947c8b7..efc8b613d486 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -502,6 +502,7 @@ struct ocelot { unsigned int num_stats; int shared_queue_sz; + int num_mact_rows; struct net_device *hw_bridge_dev; u16 bridge_mask; -- cgit v1.2.3-71-gd317 From 9274124f023b5c56dc4326637d4f787968b03607 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 4 May 2020 12:48:54 -0400 Subject: net: stricter validation of untrusted gso packets Syzkaller again found a path to a kernel crash through bad gso input: a packet with transport header extending beyond skb_headlen(skb). Tighten validation at kernel entry: - Verify that the transport header lies within the linear section. To avoid pulling linux/tcp.h, verify just sizeof tcphdr. tcp_gso_segment will call pskb_may_pull (th->doff * 4) before use. - Match the gso_type against the ip_proto found by the flow dissector. Fixes: bfd5f4a3d605 ("packet: Add GSO/csum offload support.") Reported-by: syzbot Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 0d1fe9297ac6..6f6ade63b04c 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -3,6 +3,8 @@ #define _LINUX_VIRTIO_NET_H #include +#include +#include #include static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, @@ -28,17 +30,25 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, bool little_endian) { unsigned int gso_type = 0; + unsigned int thlen = 0; + unsigned int ip_proto; if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: gso_type = SKB_GSO_TCPV4; + ip_proto = IPPROTO_TCP; + thlen = sizeof(struct tcphdr); break; case VIRTIO_NET_HDR_GSO_TCPV6: gso_type = SKB_GSO_TCPV6; + ip_proto = IPPROTO_TCP; + thlen = sizeof(struct tcphdr); break; case VIRTIO_NET_HDR_GSO_UDP: gso_type = SKB_GSO_UDP; + ip_proto = IPPROTO_UDP; + thlen = sizeof(struct udphdr); break; default: return -EINVAL; @@ -57,16 +67,22 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb_partial_csum_set(skb, start, off)) return -EINVAL; + + if (skb_transport_offset(skb) + thlen > skb_headlen(skb)) + return -EINVAL; } else { /* gso packets without NEEDS_CSUM do not set transport_offset. * probe and drop if does not match one of the above types. */ if (gso_type && skb->network_header) { + struct flow_keys_basic keys; + if (!skb->protocol) virtio_net_hdr_set_proto(skb, hdr); retry: - skb_probe_transport_header(skb); - if (!skb_transport_header_was_set(skb)) { + if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, + NULL, 0, 0, 0, + 0)) { /* UFO does not specify ipv4 or 6: try both */ if (gso_type & SKB_GSO_UDP && skb->protocol == htons(ETH_P_IP)) { @@ -75,6 +91,12 @@ retry: } return -EINVAL; } + + if (keys.control.thoff + thlen > skb_headlen(skb) || + keys.basic.ip_proto != ip_proto) + return -EINVAL; + + skb_set_transport_header(skb, keys.control.thoff); } } -- cgit v1.2.3-71-gd317 From 16f8036086a929694c3c62f577bb5925fe4fd607 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 6 May 2020 20:34:50 +0200 Subject: net: flow_offload: skip hw stats check for FLOW_ACTION_HW_STATS_DONT_CARE This patch adds FLOW_ACTION_HW_STATS_DONT_CARE which tells the driver that the frontend does not need counters, this hw stats type request never fails. The FLOW_ACTION_HW_STATS_DISABLED type explicitly requests the driver to disable the stats, however, if the driver cannot disable counters, it bails out. TCA_ACT_HW_STATS_* maintains the 1:1 mapping with FLOW_ACTION_HW_STATS_* except by disabled which is mapped to FLOW_ACTION_HW_STATS_DISABLED (this is 0 in tc). Add tc_act_hw_stats() to perform the mapping between TCA_ACT_HW_STATS_* and FLOW_ACTION_HW_STATS_*. Fixes: 319a1d19471e ("flow_offload: check for basic action hw stats type") Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 3 ++- include/net/flow_offload.h | 9 ++++++++- net/sched/cls_api.c | 14 ++++++++++++-- 3 files changed, 22 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 51117a5a6bbf..890b078851c9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -36,7 +36,8 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_acl_rulei_act_count(mlxsw_sp, rulei, extack); if (err) return err; - } else if (act->hw_stats != FLOW_ACTION_HW_STATS_DISABLED) { + } else if (act->hw_stats != FLOW_ACTION_HW_STATS_DISABLED && + act->hw_stats != FLOW_ACTION_HW_STATS_DONT_CARE) { NL_SET_ERR_MSG_MOD(extack, "Unsupported action HW stats type"); return -EOPNOTSUPP; } diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 3619c6acf60f..efc8350b42fb 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -166,15 +166,18 @@ enum flow_action_mangle_base { enum flow_action_hw_stats_bit { FLOW_ACTION_HW_STATS_IMMEDIATE_BIT, FLOW_ACTION_HW_STATS_DELAYED_BIT, + FLOW_ACTION_HW_STATS_DISABLED_BIT, }; enum flow_action_hw_stats { - FLOW_ACTION_HW_STATS_DISABLED = 0, + FLOW_ACTION_HW_STATS_DONT_CARE = 0, FLOW_ACTION_HW_STATS_IMMEDIATE = BIT(FLOW_ACTION_HW_STATS_IMMEDIATE_BIT), FLOW_ACTION_HW_STATS_DELAYED = BIT(FLOW_ACTION_HW_STATS_DELAYED_BIT), FLOW_ACTION_HW_STATS_ANY = FLOW_ACTION_HW_STATS_IMMEDIATE | FLOW_ACTION_HW_STATS_DELAYED, + FLOW_ACTION_HW_STATS_DISABLED = + BIT(FLOW_ACTION_HW_STATS_DISABLED_BIT), }; typedef void (*action_destr)(void *priv); @@ -325,7 +328,11 @@ __flow_action_hw_stats_check(const struct flow_action *action, return true; if (!flow_action_mixed_hw_stats_check(action, extack)) return false; + action_entry = flow_action_first_entry_get(action); + if (action_entry->hw_stats == FLOW_ACTION_HW_STATS_DONT_CARE) + return true; + if (!check_allow_bit && action_entry->hw_stats != FLOW_ACTION_HW_STATS_ANY) { NL_SET_ERR_MSG_MOD(extack, "Driver supports only default HW stats type \"any\""); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index c0e5b64b3caf..0a7ecc292bd3 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3523,6 +3523,16 @@ static void tcf_sample_get_group(struct flow_action_entry *entry, #endif } +static enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats) +{ + if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY)) + return FLOW_ACTION_HW_STATS_DONT_CARE; + else if (!hw_stats) + return FLOW_ACTION_HW_STATS_DISABLED; + + return hw_stats; +} + int tc_setup_flow_action(struct flow_action *flow_action, const struct tcf_exts *exts) { @@ -3546,7 +3556,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, if (err) goto err_out_locked; - entry->hw_stats = act->hw_stats; + entry->hw_stats = tc_act_hw_stats(act->hw_stats); if (is_tcf_gact_ok(act)) { entry->id = FLOW_ACTION_ACCEPT; @@ -3614,7 +3624,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->mangle.mask = tcf_pedit_mask(act, k); entry->mangle.val = tcf_pedit_val(act, k); entry->mangle.offset = tcf_pedit_offset(act, k); - entry->hw_stats = act->hw_stats; + entry->hw_stats = tc_act_hw_stats(act->hw_stats); entry = &flow_action->entries[++j]; } } else if (is_tcf_csum(act)) { -- cgit v1.2.3-71-gd317 From eb7ae5e06bb6e6ac6bb86872d27c43ebab92f6b2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 May 2020 14:47:54 +0200 Subject: bdi: move bdi_dev_name out of line bdi_dev_name is not a fast path function, move it out of line. This prepares for using it from modular callers without having to export an implementation detail like bdi_unknown_name. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Greg Kroah-Hartman Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 9 +-------- mm/backing-dev.c | 10 +++++++++- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f88197c1ffc2..c9ad5c3b7b4b 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -505,13 +505,6 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) (1 << WB_async_congested)); } -extern const char *bdi_unknown_name; - -static inline const char *bdi_dev_name(struct backing_dev_info *bdi) -{ - if (!bdi || !bdi->dev) - return bdi_unknown_name; - return dev_name(bdi->dev); -} +const char *bdi_dev_name(struct backing_dev_info *bdi); #endif /* _LINUX_BACKING_DEV_H */ diff --git a/mm/backing-dev.c b/mm/backing-dev.c index c81b4f3a7268..c2c44c89ee5d 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -21,7 +21,7 @@ struct backing_dev_info noop_backing_dev_info = { EXPORT_SYMBOL_GPL(noop_backing_dev_info); static struct class *bdi_class; -const char *bdi_unknown_name = "(unknown)"; +static const char *bdi_unknown_name = "(unknown)"; /* * bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU @@ -1043,6 +1043,14 @@ void bdi_put(struct backing_dev_info *bdi) } EXPORT_SYMBOL(bdi_put); +const char *bdi_dev_name(struct backing_dev_info *bdi) +{ + if (!bdi || !bdi->dev) + return bdi_unknown_name; + return dev_name(bdi->dev); +} +EXPORT_SYMBOL_GPL(bdi_dev_name); + static wait_queue_head_t congestion_wqh[2] = { __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) -- cgit v1.2.3-71-gd317 From 386c82a70319d42dba4f1b30e5e7076f2b4d8c2f Mon Sep 17 00:00:00 2001 From: Yiwei Zhang Date: Tue, 28 Apr 2020 15:08:25 -0700 Subject: gpu/trace: Minor comment updates for gpu_mem_total tracepoint This change updates the improper comment for the 'size' attribute in the tracepoint definition. Most gfx drivers pre-fault in physical pages instead of making virtual allocations. So we drop the 'Virtual' keyword here and leave this to the implementations. Link: http://lkml.kernel.org/r/20200428220825.169606-1-zzyiwei@google.com Signed-off-by: Yiwei Zhang Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/gpu_mem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/gpu_mem.h b/include/trace/events/gpu_mem.h index 1897822a9150..26d871f96e94 100644 --- a/include/trace/events/gpu_mem.h +++ b/include/trace/events/gpu_mem.h @@ -24,7 +24,7 @@ * * @pid: Put 0 for global total, while positive pid for process total. * - * @size: Virtual size of the allocation in bytes. + * @size: Size of the allocation in bytes. * */ TRACE_EVENT(gpu_mem_total, -- cgit v1.2.3-71-gd317 From d51cfc53ade3189455a1b88ec7a2ff0c24597cf8 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Mon, 4 May 2020 14:47:55 +0200 Subject: bdi: use bdi_dev_name() to get device name Use the common interface bdi_dev_name() to get device name. Signed-off-by: Yufen Yu Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jan Kara Reviewed-by: Bart Van Assche Add missing include BFQ Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 6 ++++-- block/blk-cgroup.c | 2 +- fs/ceph/debugfs.c | 2 +- include/trace/events/wbt.h | 8 ++++---- 4 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 78ba57efd16b..3d411716d7ee 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -123,6 +123,7 @@ #include #include #include +#include #include "blk.h" #include "blk-mq.h" @@ -4976,8 +4977,9 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); switch (ioprio_class) { default: - dev_err(bfqq->bfqd->queue->backing_dev_info->dev, - "bfq: bad prio class %d\n", ioprio_class); + pr_err("bdi %s: bfq: bad prio class %d\n", + bdi_dev_name(bfqq->bfqd->queue->backing_dev_info), + ioprio_class); /* fall through */ case IOPRIO_CLASS_NONE: /* diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index c5dc833212e1..930212c1a512 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -496,7 +496,7 @@ const char *blkg_dev_name(struct blkcg_gq *blkg) { /* some drivers (floppy) instantiate a queue w/o disk registered */ if (blkg->q->backing_dev_info->dev) - return dev_name(blkg->q->backing_dev_info->dev); + return bdi_dev_name(blkg->q->backing_dev_info); return NULL; } diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 481ac97b4d25..dcaed75de9e6 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -271,7 +271,7 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) &congestion_kb_fops); snprintf(name, sizeof(name), "../../bdi/%s", - dev_name(fsc->sb->s_bdi->dev)); + bdi_dev_name(fsc->sb->s_bdi)); fsc->debugfs_bdi = debugfs_create_symlink("bdi", fsc->client->debugfs_dir, diff --git a/include/trace/events/wbt.h b/include/trace/events/wbt.h index 784814160197..9c66e59d859c 100644 --- a/include/trace/events/wbt.h +++ b/include/trace/events/wbt.h @@ -33,7 +33,7 @@ TRACE_EVENT(wbt_stat, ), TP_fast_assign( - strlcpy(__entry->name, dev_name(bdi->dev), + strlcpy(__entry->name, bdi_dev_name(bdi), ARRAY_SIZE(__entry->name)); __entry->rmean = stat[0].mean; __entry->rmin = stat[0].min; @@ -68,7 +68,7 @@ TRACE_EVENT(wbt_lat, ), TP_fast_assign( - strlcpy(__entry->name, dev_name(bdi->dev), + strlcpy(__entry->name, bdi_dev_name(bdi), ARRAY_SIZE(__entry->name)); __entry->lat = div_u64(lat, 1000); ), @@ -105,7 +105,7 @@ TRACE_EVENT(wbt_step, ), TP_fast_assign( - strlcpy(__entry->name, dev_name(bdi->dev), + strlcpy(__entry->name, bdi_dev_name(bdi), ARRAY_SIZE(__entry->name)); __entry->msg = msg; __entry->step = step; @@ -141,7 +141,7 @@ TRACE_EVENT(wbt_timer, ), TP_fast_assign( - strlcpy(__entry->name, dev_name(bdi->dev), + strlcpy(__entry->name, bdi_dev_name(bdi), ARRAY_SIZE(__entry->name)); __entry->status = status; __entry->step = step; -- cgit v1.2.3-71-gd317 From 6bd87eec23cbc9ed222bed0f5b5b02bf300e9a8d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 May 2020 14:47:56 +0200 Subject: bdi: add a ->dev_name field to struct backing_dev_info Cache a copy of the name for the life time of the backing_dev_info structure so that we can reference it even after unregistering. Fixes: 68f23b89067f ("memcg: fix a crash in wb_workfn when a device disappears") Reported-by: Yufen Yu Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 1 + mm/backing-dev.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index ee577a83cfe6..7367150f962a 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -219,6 +219,7 @@ struct backing_dev_info { wait_queue_head_t wb_waitq; struct device *dev; + char dev_name[64]; struct device *owner; struct timer_list laptop_mode_wb_timer; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index c2c44c89ee5d..efc5b83acd2d 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -938,7 +938,8 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args) if (bdi->dev) /* The driver needs to use separate queues per device */ return 0; - dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args); + vsnprintf(bdi->dev_name, sizeof(bdi->dev_name), fmt, args); + dev = device_create(bdi_class, NULL, MKDEV(0, 0), bdi, bdi->dev_name); if (IS_ERR(dev)) return PTR_ERR(dev); @@ -1047,7 +1048,7 @@ const char *bdi_dev_name(struct backing_dev_info *bdi) { if (!bdi || !bdi->dev) return bdi_unknown_name; - return dev_name(bdi->dev); + return bdi->dev_name; } EXPORT_SYMBOL_GPL(bdi_dev_name); -- cgit v1.2.3-71-gd317 From 6553896666433e7efec589838b400a2a652b3ffa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Mar 2020 22:47:17 +0100 Subject: vmlinux.lds.h: Create section for protection against instrumentation Some code pathes, especially the low level entry code, must be protected against instrumentation for various reasons: - Low level entry code can be a fragile beast, especially on x86. - With NO_HZ_FULL RCU state needs to be established before using it. Having a dedicated section for such code allows to validate with tooling that no unsafe functions are invoked. Add the .noinstr.text section and the noinstr attribute to mark functions. noinstr implies notrace. Kprobes will gain a section check later. Provide also a set of markers: instrumentation_begin()/end() These are used to mark code inside a noinstr function which calls into regular instrumentable text section as safe. The instrumentation markers are only active when CONFIG_DEBUG_ENTRY is enabled as the end marker emits a NOP to prevent the compiler from merging the annotation points. This means the objtool verification requires a kernel compiled with this option. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134100.075416272@linutronix.de --- arch/powerpc/kernel/vmlinux.lds.S | 1 + include/asm-generic/sections.h | 3 +++ include/asm-generic/vmlinux.lds.h | 10 ++++++++ include/linux/compiler.h | 53 +++++++++++++++++++++++++++++++++++++++ include/linux/compiler_types.h | 4 +++ scripts/mod/modpost.c | 2 +- 6 files changed, 72 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 31a0f201fb6f..a1706b63b82d 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -90,6 +90,7 @@ SECTIONS #ifdef CONFIG_PPC64 *(.tramp.ftrace.text); #endif + NOINSTR_TEXT SCHED_TEXT CPUIDLE_TEXT LOCK_TEXT diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index d1779d442aa5..66397ed10acb 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -53,6 +53,9 @@ extern char __ctors_start[], __ctors_end[]; /* Start and end of .opd section - used for function descriptors. */ extern char __start_opd[], __end_opd[]; +/* Start and end of instrumentation protected text section */ +extern char __noinstr_text_start[], __noinstr_text_end[]; + extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 71e387a5fe90..db600ef218d7 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -540,6 +540,15 @@ . = ALIGN((align)); \ __end_rodata = .; +/* + * Non-instrumentable text section + */ +#define NOINSTR_TEXT \ + ALIGN_FUNCTION(); \ + __noinstr_text_start = .; \ + *(.noinstr.text) \ + __noinstr_text_end = .; + /* * .text section. Map to function alignment to avoid address changes * during second ld run in second ld pass when generating System.map @@ -551,6 +560,7 @@ #define TEXT_TEXT \ ALIGN_FUNCTION(); \ *(.text.hot TEXT_MAIN .text.fixup .text.unlikely) \ + NOINSTR_TEXT \ *(.text..refcount) \ *(.ref.text) \ MEM_KEEP(init.text*) \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 034b0a644efc..e9ead0505671 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -120,12 +120,65 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Annotate a C jump table to allow objtool to follow the code flow */ #define __annotate_jump_table __section(.rodata..c_jump_table) +#ifdef CONFIG_DEBUG_ENTRY +/* Begin/end of an instrumentation safe region */ +#define instrumentation_begin() ({ \ + asm volatile("%c0:\n\t" \ + ".pushsection .discard.instr_begin\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) + +/* + * Because instrumentation_{begin,end}() can nest, objtool validation considers + * _begin() a +1 and _end() a -1 and computes a sum over the instructions. + * When the value is greater than 0, we consider instrumentation allowed. + * + * There is a problem with code like: + * + * noinstr void foo() + * { + * instrumentation_begin(); + * ... + * if (cond) { + * instrumentation_begin(); + * ... + * instrumentation_end(); + * } + * bar(); + * instrumentation_end(); + * } + * + * If instrumentation_end() would be an empty label, like all the other + * annotations, the inner _end(), which is at the end of a conditional block, + * would land on the instruction after the block. + * + * If we then consider the sum of the !cond path, we'll see that the call to + * bar() is with a 0-value, even though, we meant it to happen with a positive + * value. + * + * To avoid this, have _end() be a NOP instruction, this ensures it will be + * part of the condition block and does not escape. + */ +#define instrumentation_end() ({ \ + asm volatile("%c0: nop\n\t" \ + ".pushsection .discard.instr_end\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) +#endif /* CONFIG_DEBUG_ENTRY */ + #else #define annotate_reachable() #define annotate_unreachable() #define __annotate_jump_table #endif +#ifndef instrumentation_begin +#define instrumentation_begin() do { } while(0) +#define instrumentation_end() do { } while(0) +#endif + #ifndef ASM_UNREACHABLE # define ASM_UNREACHABLE #endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index e970f97a7fcb..5da257cbebf1 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -118,6 +118,10 @@ struct ftrace_likely_data { #define notrace __attribute__((__no_instrument_function__)) #endif +/* Section for code which can't be instrumented at all */ +#define noinstr \ + noinline notrace __attribute((__section__(".noinstr.text"))) + /* * it doesn't make sense on ARM (currently the only user of __naked) * to trace naked functions because then mcount is called without diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 5c3c50c5ec52..0053d4fea847 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -948,7 +948,7 @@ static void check_section(const char *modname, struct elf_info *elf, #define DATA_SECTIONS ".data", ".data.rel" #define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ - ".kprobes.text", ".cpuidle.text" + ".kprobes.text", ".cpuidle.text", ".noinstr.text" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ ".fixup", ".entry.text", ".exception.text", ".text.*", \ ".coldtext" -- cgit v1.2.3-71-gd317 From 0995a5dfbe49badff78e78761fb66f46579f2f9a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 Mar 2020 13:09:50 +0100 Subject: tracing: Provide lockdep less trace_hardirqs_on/off() variants trace_hardirqs_on/off() is only partially safe vs. RCU idle. The tracer core itself is safe, but the resulting tracepoints can be utilized by e.g. BPF which is unsafe. Provide variants which do not contain the lockdep invocation so the lockdep and tracer invocations can be split at the call site and placed properly. This is required because lockdep needs to be aware of the state before switching away from RCU idle and after switching to RCU idle because these transitions can take locks. As these code pathes are going to be non-instrumentable the tracer can be invoked after RCU is turned on and before the switch to RCU idle. So for these new variants there is no need to invoke the rcuidle aware tracer functions. Name them so they match the lockdep counterparts. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134100.270771162@linutronix.de --- include/linux/irqflags.h | 4 ++++ kernel/trace/trace_preemptirq.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'include') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 61a9ced3aa50..f150e69ab81d 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -29,6 +29,8 @@ #endif #ifdef CONFIG_TRACE_IRQFLAGS + extern void trace_hardirqs_on_prepare(void); + extern void trace_hardirqs_off_prepare(void); extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); # define lockdep_hardirq_context(p) ((p)->hardirq_context) @@ -96,6 +98,8 @@ do { \ } while (0) #else +# define trace_hardirqs_on_prepare() do { } while (0) +# define trace_hardirqs_off_prepare() do { } while (0) # define trace_hardirqs_on() do { } while (0) # define trace_hardirqs_off() do { } while (0) # define lockdep_hardirq_context(p) 0 diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c index 4d8e99fdbbbe..c00880162b06 100644 --- a/kernel/trace/trace_preemptirq.c +++ b/kernel/trace/trace_preemptirq.c @@ -19,6 +19,24 @@ /* Per-cpu variable to prevent redundant calls when IRQs already off */ static DEFINE_PER_CPU(int, tracing_irq_cpu); +/* + * Like trace_hardirqs_on() but without the lockdep invocation. This is + * used in the low level entry code where the ordering vs. RCU is important + * and lockdep uses a staged approach which splits the lockdep hardirq + * tracking into a RCU on and a RCU off section. + */ +void trace_hardirqs_on_prepare(void) +{ + if (this_cpu_read(tracing_irq_cpu)) { + if (!in_nmi()) + trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1); + tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1); + this_cpu_write(tracing_irq_cpu, 0); + } +} +EXPORT_SYMBOL(trace_hardirqs_on_prepare); +NOKPROBE_SYMBOL(trace_hardirqs_on_prepare); + void trace_hardirqs_on(void) { if (this_cpu_read(tracing_irq_cpu)) { @@ -33,6 +51,25 @@ void trace_hardirqs_on(void) EXPORT_SYMBOL(trace_hardirqs_on); NOKPROBE_SYMBOL(trace_hardirqs_on); +/* + * Like trace_hardirqs_off() but without the lockdep invocation. This is + * used in the low level entry code where the ordering vs. RCU is important + * and lockdep uses a staged approach which splits the lockdep hardirq + * tracking into a RCU on and a RCU off section. + */ +void trace_hardirqs_off_prepare(void) +{ + if (!this_cpu_read(tracing_irq_cpu)) { + this_cpu_write(tracing_irq_cpu, 1); + tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1); + if (!in_nmi()) + trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1); + } + +} +EXPORT_SYMBOL(trace_hardirqs_off_prepare); +NOKPROBE_SYMBOL(trace_hardirqs_off_prepare); + void trace_hardirqs_off(void) { if (!this_cpu_read(tracing_irq_cpu)) { -- cgit v1.2.3-71-gd317 From c86e9b987cea3dd0209203e714553a47f5d7c6dd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 18 Mar 2020 14:22:03 +0100 Subject: lockdep: Prepare for noinstr sections Force inlining and prevent instrumentation of all sorts by marking the functions which are invoked from low level entry code with 'noinstr'. Split the irqflags tracking into two parts. One which does the heavy lifting while RCU is watching and the final one which can be invoked after RCU is turned off. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Link: https://lkml.kernel.org/r/20200505134100.484532537@linutronix.de --- include/linux/irqflags.h | 2 + include/linux/sched.h | 1 + kernel/locking/lockdep.c | 86 +++++++++++++++++++++++++++++++---------- kernel/trace/trace_preemptirq.c | 2 + lib/debug_locks.c | 2 +- 5 files changed, 71 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index f150e69ab81d..d7f7e436c3af 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -19,11 +19,13 @@ #ifdef CONFIG_PROVE_LOCKING extern void lockdep_softirqs_on(unsigned long ip); extern void lockdep_softirqs_off(unsigned long ip); + extern void lockdep_hardirqs_on_prepare(unsigned long ip); extern void lockdep_hardirqs_on(unsigned long ip); extern void lockdep_hardirqs_off(unsigned long ip); #else static inline void lockdep_softirqs_on(unsigned long ip) { } static inline void lockdep_softirqs_off(unsigned long ip) { } + static inline void lockdep_hardirqs_on_prepare(unsigned long ip) { } static inline void lockdep_hardirqs_on(unsigned long ip) { } static inline void lockdep_hardirqs_off(unsigned long ip) { } #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 4418f5cb8324..658de6164853 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -983,6 +983,7 @@ struct task_struct { unsigned int hardirq_disable_event; int hardirqs_enabled; int hardirq_context; + u64 hardirq_chain_key; unsigned long softirq_disable_ip; unsigned long softirq_enable_ip; unsigned int softirq_disable_event; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index ac10db66cc63..9ccd675a8b5a 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3635,13 +3635,10 @@ mark_held_locks(struct task_struct *curr, enum lock_usage_bit base_bit) /* * Hardirqs will be enabled: */ -static void __trace_hardirqs_on_caller(unsigned long ip) +static void __trace_hardirqs_on_caller(void) { struct task_struct *curr = current; - /* we'll do an OFF -> ON transition: */ - curr->hardirqs_enabled = 1; - /* * We are going to turn hardirqs on, so set the * usage bit for all held locks: @@ -3654,15 +3651,19 @@ static void __trace_hardirqs_on_caller(unsigned long ip) * this bit from being set before) */ if (curr->softirqs_enabled) - if (!mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ)) - return; - - curr->hardirq_enable_ip = ip; - curr->hardirq_enable_event = ++curr->irq_events; - debug_atomic_inc(hardirqs_on_events); + mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ); } -void lockdep_hardirqs_on(unsigned long ip) +/** + * lockdep_hardirqs_on_prepare - Prepare for enabling interrupts + * @ip: Caller address + * + * Invoked before a possible transition to RCU idle from exit to user or + * guest mode. This ensures that all RCU operations are done before RCU + * stops watching. After the RCU transition lockdep_hardirqs_on() has to be + * invoked to set the final state. + */ +void lockdep_hardirqs_on_prepare(unsigned long ip) { if (unlikely(!debug_locks || current->lockdep_recursion)) return; @@ -3698,20 +3699,62 @@ void lockdep_hardirqs_on(unsigned long ip) if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) return; + current->hardirq_chain_key = current->curr_chain_key; + current->lockdep_recursion++; - __trace_hardirqs_on_caller(ip); + __trace_hardirqs_on_caller(); lockdep_recursion_finish(); } -NOKPROBE_SYMBOL(lockdep_hardirqs_on); +EXPORT_SYMBOL_GPL(lockdep_hardirqs_on_prepare); + +void noinstr lockdep_hardirqs_on(unsigned long ip) +{ + struct task_struct *curr = current; + + if (unlikely(!debug_locks || curr->lockdep_recursion)) + return; + + if (curr->hardirqs_enabled) { + /* + * Neither irq nor preemption are disabled here + * so this is racy by nature but losing one hit + * in a stat is not a big deal. + */ + __debug_atomic_inc(redundant_hardirqs_on); + return; + } + + /* + * We're enabling irqs and according to our state above irqs weren't + * already enabled, yet we find the hardware thinks they are in fact + * enabled.. someone messed up their IRQ state tracing. + */ + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return; + + /* + * Ensure the lock stack remained unchanged between + * lockdep_hardirqs_on_prepare() and lockdep_hardirqs_on(). + */ + DEBUG_LOCKS_WARN_ON(current->hardirq_chain_key != + current->curr_chain_key); + + /* we'll do an OFF -> ON transition: */ + curr->hardirqs_enabled = 1; + curr->hardirq_enable_ip = ip; + curr->hardirq_enable_event = ++curr->irq_events; + debug_atomic_inc(hardirqs_on_events); +} +EXPORT_SYMBOL_GPL(lockdep_hardirqs_on); /* * Hardirqs were disabled: */ -void lockdep_hardirqs_off(unsigned long ip) +void noinstr lockdep_hardirqs_off(unsigned long ip) { struct task_struct *curr = current; - if (unlikely(!debug_locks || current->lockdep_recursion)) + if (unlikely(!debug_locks || curr->lockdep_recursion)) return; /* @@ -3729,10 +3772,11 @@ void lockdep_hardirqs_off(unsigned long ip) curr->hardirq_disable_ip = ip; curr->hardirq_disable_event = ++curr->irq_events; debug_atomic_inc(hardirqs_off_events); - } else + } else { debug_atomic_inc(redundant_hardirqs_off); + } } -NOKPROBE_SYMBOL(lockdep_hardirqs_off); +EXPORT_SYMBOL_GPL(lockdep_hardirqs_off); /* * Softirqs will be enabled: @@ -4408,8 +4452,8 @@ static void print_unlock_imbalance_bug(struct task_struct *curr, dump_stack(); } -static int match_held_lock(const struct held_lock *hlock, - const struct lockdep_map *lock) +static noinstr int match_held_lock(const struct held_lock *hlock, + const struct lockdep_map *lock) { if (hlock->instance == lock) return 1; @@ -4696,7 +4740,7 @@ __lock_release(struct lockdep_map *lock, unsigned long ip) return 0; } -static nokprobe_inline +static __always_inline int __lock_is_held(const struct lockdep_map *lock, int read) { struct task_struct *curr = current; @@ -4956,7 +5000,7 @@ void lock_release(struct lockdep_map *lock, unsigned long ip) } EXPORT_SYMBOL_GPL(lock_release); -int lock_is_held_type(const struct lockdep_map *lock, int read) +noinstr int lock_is_held_type(const struct lockdep_map *lock, int read) { unsigned long flags; int ret = 0; diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c index c00880162b06..fb0691b8a88d 100644 --- a/kernel/trace/trace_preemptirq.c +++ b/kernel/trace/trace_preemptirq.c @@ -46,6 +46,7 @@ void trace_hardirqs_on(void) this_cpu_write(tracing_irq_cpu, 0); } + lockdep_hardirqs_on_prepare(CALLER_ADDR0); lockdep_hardirqs_on(CALLER_ADDR0); } EXPORT_SYMBOL(trace_hardirqs_on); @@ -93,6 +94,7 @@ __visible void trace_hardirqs_on_caller(unsigned long caller_addr) this_cpu_write(tracing_irq_cpu, 0); } + lockdep_hardirqs_on_prepare(CALLER_ADDR0); lockdep_hardirqs_on(CALLER_ADDR0); } EXPORT_SYMBOL(trace_hardirqs_on_caller); diff --git a/lib/debug_locks.c b/lib/debug_locks.c index a75ee30b77cb..06d3135bd184 100644 --- a/lib/debug_locks.c +++ b/lib/debug_locks.c @@ -36,7 +36,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent); /* * Generic 'turn off all lock debugging' function: */ -int debug_locks_off(void) +noinstr int debug_locks_off(void) { if (debug_locks && __debug_locks_off()) { if (!debug_locks_silent) { -- cgit v1.2.3-71-gd317 From af1e56b78534c38bb0e0c712ca70e59f816b74e9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 19 Mar 2020 14:53:56 +0100 Subject: context_tracking: Make guest_enter/exit() .noinstr ready Force inlining of the helpers and mark the instrumentable parts accordingly. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134341.672545766@linutronix.de --- include/linux/context_tracking.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 8150f5ac176c..8cac62ee6add 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -101,12 +101,14 @@ static inline void context_tracking_init(void) { } #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN /* must be called with irqs disabled */ -static inline void guest_enter_irqoff(void) +static __always_inline void guest_enter_irqoff(void) { + instrumentation_begin(); if (vtime_accounting_enabled_this_cpu()) vtime_guest_enter(current); else current->flags |= PF_VCPU; + instrumentation_end(); if (context_tracking_enabled()) __context_tracking_enter(CONTEXT_GUEST); @@ -118,39 +120,48 @@ static inline void guest_enter_irqoff(void) * one time slice). Lets treat guest mode as quiescent state, just like * we do with user-mode execution. */ - if (!context_tracking_enabled_this_cpu()) + if (!context_tracking_enabled_this_cpu()) { + instrumentation_begin(); rcu_virt_note_context_switch(smp_processor_id()); + instrumentation_end(); + } } -static inline void guest_exit_irqoff(void) +static __always_inline void guest_exit_irqoff(void) { if (context_tracking_enabled()) __context_tracking_exit(CONTEXT_GUEST); + instrumentation_begin(); if (vtime_accounting_enabled_this_cpu()) vtime_guest_exit(current); else current->flags &= ~PF_VCPU; + instrumentation_end(); } #else -static inline void guest_enter_irqoff(void) +static __always_inline void guest_enter_irqoff(void) { /* * This is running in ioctl context so its safe * to assume that it's the stime pending cputime * to flush. */ + instrumentation_begin(); vtime_account_kernel(current); current->flags |= PF_VCPU; rcu_virt_note_context_switch(smp_processor_id()); + instrumentation_end(); } -static inline void guest_exit_irqoff(void) +static __always_inline void guest_exit_irqoff(void) { + instrumentation_begin(); /* Flush the guest cputime we spent on the guest */ vtime_account_kernel(current); current->flags &= ~PF_VCPU; + instrumentation_end(); } #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ -- cgit v1.2.3-71-gd317