From 77ae9957897df86e627089688265e0db029dd0df Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Jan 2017 09:40:07 +0000 Subject: drm/i915: Enable userspace to opt-out of implicit fencing Userspace is faced with a dilemma. The kernel requires implicit fencing to manage resource usage (we always must wait for the GPU to finish before releasing its PTE) and for third parties. However, userspace may wish to avoid this serialisation if it is either using explicit fencing between parties and wants more fine-grained access to buffers (e.g. it may partition the buffer between uses and track fences on ranges rather than the implicit fences tracking the whole object). It follows that userspace needs a mechanism to avoid the kernel's serialisation on its implicit fences before execbuf execution. The next question is whether this is an object, execbuf or context flag. Hybrid users (such as using explicit EGL_ANDROID_native_sync fencing on shared winsys buffers, but implicit fencing on internal surfaces) require a per-object level flag. Given that this flag need to be only set once for the lifetime of the object, this reduces the convenience of having an execbuf or context level flag (and avoids having multiple pieces of uABI controlling the same feature). Incorrect use of this flag will result in rendering corruption and GPU hangs - but will not result in use-after-free or similar resource tracking issues. Serious caveat: write ordering is not strictly correct after setting this flag on a render target on multiple engines. This affects all subsequent GEM operations (execbuf, set-domain, pread) and shared dma-buf operations. A fix is possible - but costly (both in terms of further ABI changes and runtime overhead). Testcase: igt/gem_exec_async Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Acked-by: Chad Versace Link: http://patchwork.freedesktop.org/patch/msgid/20170127094008.27489-1-chris@chris-wilson.co.uk --- include/uapi/drm/i915_drm.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 57093b455db6..6620b6ad76ed 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -397,6 +397,12 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_SCHEDULER 41 #define I915_PARAM_HUC_STATUS 42 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of + * synchronisation with implicit fencing on individual objects. + * See EXEC_OBJECT_ASYNC. + */ +#define I915_PARAM_HAS_EXEC_ASYNC 43 + typedef struct drm_i915_getparam { __s32 param; /* @@ -737,8 +743,29 @@ struct drm_i915_gem_exec_object2 { #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) #define EXEC_OBJECT_PINNED (1<<4) #define EXEC_OBJECT_PAD_TO_SIZE (1<<5) +/* The kernel implicitly tracks GPU activity on all GEM objects, and + * synchronises operations with outstanding rendering. This includes + * rendering on other devices if exported via dma-buf. However, sometimes + * this tracking is too coarse and the user knows better. For example, + * if the object is split into non-overlapping ranges shared between different + * clients or engines (i.e. suballocating objects), the implicit tracking + * by kernel assumes that each operation affects the whole object rather + * than an individual range, causing needless synchronisation between clients. + * The kernel will also forgo any CPU cache flushes prior to rendering from + * the object as the client is expected to be also handling such domain + * tracking. + * + * The kernel maintains the implicit tracking in order to manage resources + * used by the GPU - this flag only disables the synchronisation prior to + * rendering with this object in this execbuf. + * + * Opting out of implicit synhronisation requires the user to do its own + * explicit tracking to avoid rendering corruption. See, for example, + * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously. + */ +#define EXEC_OBJECT_ASYNC (1<<6) /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1) __u64 flags; union { -- cgit v1.2.3-71-gd317 From fec0445caa273209d2809760ac7c63e743d6f512 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Jan 2017 09:40:08 +0000 Subject: drm/i915: Support explicit fencing for execbuf Now that the user can opt-out of implicit fencing, we need to give them back control over the fencing. We employ sync_file to wrap our drm_i915_gem_request and provide an fd that userspace can merge with other sync_file fds and pass back to the kernel to wait upon before future execution. Testcase: igt/gem_exec_fence Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Acked-by: Chad Versace Link: http://patchwork.freedesktop.org/patch/msgid/20170127094008.27489-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig | 1 + drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 54 +++++++++++++++++++++++++++--- include/uapi/drm/i915_drm.h | 36 +++++++++++++++++++- 4 files changed, 87 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 183f5dc1c3f2..1ae0bb91ee60 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -19,6 +19,7 @@ config DRM_I915 select INPUT if ACPI select ACPI_VIDEO if ACPI select ACPI_BUTTON if ACPI + select SYNC_FILE help Choose this option if you have a system that has "Intel Graphics Media Accelerator" or "HD Graphics" integrated graphics, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 6c9c2037fadf..3aa5bf58cf32 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -350,6 +350,7 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_COHERENT_PHYS_GTT: case I915_PARAM_HAS_EXEC_SOFTPIN: case I915_PARAM_HAS_EXEC_ASYNC: + case I915_PARAM_HAS_EXEC_FENCE: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from @@ -2550,7 +2551,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 6fd60682bf93..91c2393199a3 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -1595,6 +1596,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct i915_execbuffer_params *params = ¶ms_master; const u32 ctx_id = i915_execbuffer2_get_context_id(*args); u32 dispatch_flags; + struct dma_fence *in_fence = NULL; + struct sync_file *out_fence = NULL; + int out_fence_fd = -1; int ret; bool need_relocs; @@ -1638,6 +1642,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, dispatch_flags |= I915_DISPATCH_RS; } + if (args->flags & I915_EXEC_FENCE_IN) { + in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); + if (!in_fence) { + ret = -EINVAL; + goto pre_mutex_err; + } + } + + if (args->flags & I915_EXEC_FENCE_OUT) { + out_fence_fd = get_unused_fd_flags(O_CLOEXEC); + if (out_fence_fd < 0) { + ret = out_fence_fd; + out_fence_fd = -1; + goto pre_mutex_err; + } + } + /* Take a local wakeref for preparing to dispatch the execbuf as * we expect to access the hardware fairly frequently in the * process. Upon first dispatch, we acquire another prolonged @@ -1782,6 +1803,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err_batch_unpin; } + if (in_fence) { + ret = i915_gem_request_await_dma_fence(params->request, + in_fence); + if (ret < 0) + goto err_request; + } + + if (out_fence_fd != -1) { + out_fence = sync_file_create(¶ms->request->fence); + if (!out_fence) { + ret = -ENOMEM; + goto err_request; + } + } + /* Whilst this request exists, batch_obj will be on the * active_list, and so will hold the active reference. Only when this * request is retired will the the batch_obj be moved onto the @@ -1809,6 +1845,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, ret = execbuf_submit(params, args, &eb->vmas); err_request: __i915_add_request(params->request, ret == 0); + if (out_fence) { + if (ret == 0) { + fd_install(out_fence_fd, out_fence->file); + args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */ + args->rsvd2 |= (u64)out_fence_fd << 32; + out_fence_fd = -1; + } else { + fput(out_fence->file); + } + } err_batch_unpin: /* @@ -1830,6 +1876,9 @@ pre_mutex_err: /* intel_gpu_busy should also get a ref, so it will free when the device * is really idle. */ intel_runtime_pm_put(dev_priv); + if (out_fence_fd != -1) + put_unused_fd(out_fence_fd); + dma_fence_put(in_fence); return ret; } @@ -1937,11 +1986,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EINVAL; } - if (args->rsvd2 != 0) { - DRM_DEBUG("dirty rvsd2 field\n"); - return -EINVAL; - } - exec2_list = drm_malloc_gfp(args->buffer_count, sizeof(*exec2_list), GFP_TEMPORARY); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 6620b6ad76ed..3554495bef13 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -246,6 +246,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_OVERLAY_PUT_IMAGE 0x27 #define DRM_I915_OVERLAY_ATTRS 0x28 #define DRM_I915_GEM_EXECBUFFER2 0x29 +#define DRM_I915_GEM_EXECBUFFER2_WR DRM_I915_GEM_EXECBUFFER2 #define DRM_I915_GET_SPRITE_COLORKEY 0x2a #define DRM_I915_SET_SPRITE_COLORKEY 0x2b #define DRM_I915_GEM_WAIT 0x2c @@ -280,6 +281,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init) #define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) #define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) +#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2) #define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin) #define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin) #define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) @@ -403,6 +405,13 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_ASYNC 43 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support - + * both being able to pass in a sync_file fd to wait upon before executing, + * and being able to return a new sync_file fd that is signaled when the + * current request is complete. See I915_EXEC_FENCE_IN and I915_EXEC_FENCE_OUT. + */ +#define I915_PARAM_HAS_EXEC_FENCE 44 + typedef struct drm_i915_getparam { __s32 param; /* @@ -855,7 +864,32 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_RESOURCE_STREAMER (1<<15) -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1) +/* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent + * a sync_file fd to wait upon (in a nonblocking manner) prior to executing + * the batch. + * + * Returns -EINVAL if the sync_file fd cannot be found. + */ +#define I915_EXEC_FENCE_IN (1<<16) + +/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd + * in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given + * to the caller, and it should be close() after use. (The fd is a regular + * file descriptor and will be cleaned up on process termination. It holds + * a reference to the request, but nothing else.) + * + * The sync_file fd can be combined with other sync_file and passed either + * to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip + * will only occur after this request completes), or to other devices. + * + * Using I915_EXEC_FENCE_OUT requires use of + * DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written + * back to userspace. Failure to do so will cause the out-fence to always + * be reported as zero, and the real fence fd to be leaked. + */ +#define I915_EXEC_FENCE_OUT (1<<17) + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- cgit v1.2.3-71-gd317 From 40ee6fbef75fe6452dc9e69e6f9f1a2c7808ed67 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Fri, 16 Dec 2016 12:29:06 +0200 Subject: drm: Add a new connector atomic property for link status At the time userspace does setcrtc, we've already promised the mode would work. The promise is based on the theoretical capabilities of the link, but it's possible we can't reach this in practice. The DP spec describes how the link should be reduced, but we can't reduce the link below the requirements of the mode. Black screen follows. One idea would be to have setcrtc return a failure. However, it already should not fail as the atomic checks have passed. It would also conflict with the idea of making setcrtc asynchronous in the future, returning before the actual mode setting and link training. Another idea is to train the link "upfront" at hotplug time, before pruning the mode list, so that we can do the pruning based on practical not theoretical capabilities. However, the changes for link training are pretty drastic, all for the sake of error handling and DP compliance, when the most common happy day scenario is the current approach of link training at mode setting time, using the optimal parameters for the mode. It is also not certain all hardware could do this without the pipe on; not even all our hardware can do this. Some of this can be solved, but not trivially. Both of the above ideas also fail to address link degradation *during* operation. The solution is to add a new "link-status" connector property in order to address link training failure in a way that: a) changes the current happy day scenario as little as possible, to avoid regressions, b) can be implemented the same way by all drm drivers, c) is still opt-in for the drivers and userspace, and opting out doesn't regress the user experience, d) doesn't prevent drivers from implementing better or alternate approaches, possibly without userspace involvement. And, of course, handles all the issues presented. In the usual happy day scenario, this is always "good". If something fails during or after a mode set, the kernel driver can set the link status to "bad" and issue a hotplug uevent for userspace to have it re-check the valid modes through GET_CONNECTOR IOCTL, and try modeset again. If the theoretical capabilities of the link can't be reached, the mode list is trimmed based on that. v7 by Jani: * Rebase, simplify set property while at it, checkpatch fix v6: * Fix a typo in kernel doc (Sean Paul) v5: * Clarify doc for silent rejection of atomic properties by driver (Daniel Vetter) v4: * Add comments in kernel-doc format (Daniel Vetter) * Update the kernel-doc for link-status (Sean Paul) v3: * Fixed a build error (Jani Saarinen) v2: * Removed connector->link_status (Daniel Vetter) * Set connector->state->link_status in drm_mode_connector_set_link_status_property (Daniel Vetter) * Set the connector_changed flag to true if connector->state->link_status changed. * Reset link_status to GOOD in update_output_state (Daniel Vetter) * Never allow userspace to set link status from Good To Bad (Daniel Vetter) Reviewed-by: Sean Paul Reviewed-by: Daniel Vetter Reviewed-by: Jani Nikula Acked-by: Tony Cheng Acked-by: Harry Wentland Cc: Jani Nikula Cc: Daniel Vetter Cc: Ville Syrjala Cc: Chris Wilson Cc: Sean Paul Signed-off-by: Manasi Navare Signed-off-by: Jani Nikula Acked-by: Eric Anholt (for the -modesetting patch) Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/0182487051aa9f1594820e35a4853de2f8747b4e.1481883920.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_atomic.c | 16 ++++++++++++ drivers/gpu/drm/drm_atomic_helper.c | 15 +++++++++++ drivers/gpu/drm/drm_connector.c | 52 +++++++++++++++++++++++++++++++++++++ include/drm/drm_connector.h | 19 ++++++++++++++ include/drm/drm_mode_config.h | 5 ++++ include/uapi/drm/drm_mode.h | 4 +++ 6 files changed, 111 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index afec53832145..3b6e6e924e3c 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1109,6 +1109,20 @@ int drm_atomic_connector_set_property(struct drm_connector *connector, state->tv.saturation = val; } else if (property == config->tv_hue_property) { state->tv.hue = val; + } else if (property == config->link_status_property) { + /* Never downgrade from GOOD to BAD on userspace's request here, + * only hw issues can do that. + * + * For an atomic property the userspace doesn't need to be able + * to understand all the properties, but needs to be able to + * restore the state it wants on VT switch. So if the userspace + * tries to change the link_status from GOOD to BAD, driver + * silently rejects it and returns a 0. This prevents userspace + * from accidently breaking the display when it restores the + * state. + */ + if (state->link_status != DRM_LINK_STATUS_GOOD) + state->link_status = val; } else if (connector->funcs->atomic_set_property) { return connector->funcs->atomic_set_property(connector, state, property, val); @@ -1183,6 +1197,8 @@ drm_atomic_connector_get_property(struct drm_connector *connector, *val = state->tv.saturation; } else if (property == config->tv_hue_property) { *val = state->tv.hue; + } else if (property == config->link_status_property) { + *val = state->link_status; } else if (connector->funcs->atomic_get_property) { return connector->funcs->atomic_get_property(connector, state, property, val); diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 9203f3e933f7..2e62b332ae8b 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -530,6 +530,13 @@ drm_atomic_helper_check_modeset(struct drm_device *dev, connector_state); if (ret) return ret; + if (connector->state->crtc) { + crtc_state = drm_atomic_get_existing_crtc_state(state, + connector->state->crtc); + if (connector->state->link_status != + connector_state->link_status) + crtc_state->connectors_changed = true; + } } /* @@ -2241,6 +2248,8 @@ static int update_output_state(struct drm_atomic_state *state, NULL); if (ret) return ret; + /* Make sure legacy setCrtc always re-trains */ + conn_state->link_status = DRM_LINK_STATUS_GOOD; } } @@ -2284,6 +2293,12 @@ static int update_output_state(struct drm_atomic_state *state, * * Provides a default crtc set_config handler using the atomic driver interface. * + * NOTE: For backwards compatibility with old userspace this automatically + * resets the "link-status" property to GOOD, to force any link + * re-training. The SETCRTC ioctl does not define whether an update does + * need a full modeset or just a plane update, hence we're allowed to do + * that. See also drm_mode_connector_set_link_status_property(). + * * Returns: * Returns 0 on success, negative errno numbers on failure. */ diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 45464c8b797d..ab73e86ffdf1 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -244,6 +244,10 @@ int drm_connector_init(struct drm_device *dev, drm_object_attach_property(&connector->base, config->dpms_property, 0); + drm_object_attach_property(&connector->base, + config->link_status_property, + 0); + if (drm_core_check_feature(dev, DRIVER_ATOMIC)) { drm_object_attach_property(&connector->base, config->prop_crtc_id, 0); } @@ -599,6 +603,12 @@ static const struct drm_prop_enum_list drm_dpms_enum_list[] = { }; DRM_ENUM_NAME_FN(drm_get_dpms_name, drm_dpms_enum_list) +static const struct drm_prop_enum_list drm_link_status_enum_list[] = { + { DRM_MODE_LINK_STATUS_GOOD, "Good" }, + { DRM_MODE_LINK_STATUS_BAD, "Bad" }, +}; +DRM_ENUM_NAME_FN(drm_get_link_status_name, drm_link_status_enum_list) + /** * drm_display_info_set_bus_formats - set the supported bus formats * @info: display info to store bus formats in @@ -718,6 +728,11 @@ DRM_ENUM_NAME_FN(drm_get_tv_subconnector_name, * tiling and virtualize both &drm_crtc and &drm_plane if needed. Drivers * should update this value using drm_mode_connector_set_tile_property(). * Userspace cannot change this property. + * link-status: + * Connector link-status property to indicate the status of link. The default + * value of link-status is "GOOD". If something fails during or after modeset, + * the kernel driver may set this to "BAD" and issue a hotplug uevent. Drivers + * should update this value using drm_mode_connector_set_link_status_property(). * * Connectors also have one standardized atomic property: * @@ -759,6 +774,13 @@ int drm_connector_create_standard_properties(struct drm_device *dev) return -ENOMEM; dev->mode_config.tile_property = prop; + prop = drm_property_create_enum(dev, 0, "link-status", + drm_link_status_enum_list, + ARRAY_SIZE(drm_link_status_enum_list)); + if (!prop) + return -ENOMEM; + dev->mode_config.link_status_property = prop; + return 0; } @@ -1088,6 +1110,36 @@ int drm_mode_connector_update_edid_property(struct drm_connector *connector, } EXPORT_SYMBOL(drm_mode_connector_update_edid_property); +/** + * drm_mode_connector_set_link_status_property - Set link status property of a connector + * @connector: drm connector + * @link_status: new value of link status property (0: Good, 1: Bad) + * + * In usual working scenario, this link status property will always be set to + * "GOOD". If something fails during or after a mode set, the kernel driver + * may set this link status property to "BAD". The caller then needs to send a + * hotplug uevent for userspace to re-check the valid modes through + * GET_CONNECTOR_IOCTL and retry modeset. + * + * Note: Drivers cannot rely on userspace to support this property and + * issue a modeset. As such, they may choose to handle issues (like + * re-training a link) without userspace's intervention. + * + * The reason for adding this property is to handle link training failures, but + * it is not limited to DP or link training. For example, if we implement + * asynchronous setcrtc, this property can be used to report any failures in that. + */ +void drm_mode_connector_set_link_status_property(struct drm_connector *connector, + uint64_t link_status) +{ + struct drm_device *dev = connector->dev; + + drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); + connector->state->link_status = link_status; + drm_modeset_unlock(&dev->mode_config.connection_mutex); +} +EXPORT_SYMBOL(drm_mode_connector_set_link_status_property); + int drm_mode_connector_set_obj_prop(struct drm_mode_object *obj, struct drm_property *property, uint64_t value) diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index e5e1eddd19fb..f08aa5dfc9d7 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -89,6 +89,17 @@ enum subpixel_order { SubPixelNone, }; +/** + * enum drm_link_status - connector's link_status property value + * + * This enum is used as the connector's link status property value. + * It is set to the values defined in uapi. + */ +enum drm_link_status { + DRM_LINK_STATUS_GOOD = DRM_MODE_LINK_STATUS_GOOD, + DRM_LINK_STATUS_BAD = DRM_MODE_LINK_STATUS_BAD, +}; + /** * struct drm_display_info - runtime data about the connected sink * @@ -243,6 +254,12 @@ struct drm_connector_state { struct drm_encoder *best_encoder; + /** + * @link_status: Connector link_status to keep track of whether link is + * GOOD or BAD to notify userspace if retraining is necessary. + */ + enum drm_link_status link_status; + struct drm_atomic_state *state; struct drm_tv_connector_state tv; @@ -837,6 +854,8 @@ int drm_mode_connector_set_path_property(struct drm_connector *connector, int drm_mode_connector_set_tile_property(struct drm_connector *connector); int drm_mode_connector_update_edid_property(struct drm_connector *connector, const struct edid *edid); +void drm_mode_connector_set_link_status_property(struct drm_connector *connector, + uint64_t link_status); /** * struct drm_tile_group - Tile group metadata diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index f220ee909bdb..ea169a90b3c4 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -438,6 +438,11 @@ struct drm_mode_config { * multiple CRTCs. */ struct drm_property *tile_property; + /** + * @link_status_property: Default connector property for link status + * of a connector + */ + struct drm_property *link_status_property; /** * @plane_type_property: Default plane property to differentiate * CURSOR, PRIMARY and OVERLAY legacy uses of planes. diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index ce7efe2e8a5e..8c67fc03d53d 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -123,6 +123,10 @@ extern "C" { #define DRM_MODE_DIRTY_ON 1 #define DRM_MODE_DIRTY_ANNOTATE 2 +/* Link Status options */ +#define DRM_MODE_LINK_STATUS_GOOD 0 +#define DRM_MODE_LINK_STATUS_BAD 1 + struct drm_mode_modeinfo { __u32 clock; __u16 hdisplay; -- cgit v1.2.3-71-gd317 From 6e9b73c695022b2c083517aaed455671ed0cdb2b Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 9 Feb 2017 13:14:13 -0200 Subject: [media] cec.h: small typo fix ad -> as It won't bring about world peace, but every little bit helps :-) Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/cec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h index 14b6f24b189e..a0dfe27bc6c7 100644 --- a/include/uapi/linux/cec.h +++ b/include/uapi/linux/cec.h @@ -223,7 +223,7 @@ static inline int cec_msg_status_is_ok(const struct cec_msg *msg) #define CEC_LOG_ADDR_BACKUP_2 13 #define CEC_LOG_ADDR_SPECIFIC 14 #define CEC_LOG_ADDR_UNREGISTERED 15 /* as initiator address */ -#define CEC_LOG_ADDR_BROADCAST 15 /* ad destination address */ +#define CEC_LOG_ADDR_BROADCAST 15 /* as destination address */ /* The logical address types that the CEC device wants to claim */ #define CEC_LOG_ADDR_TYPE_TV 0 -- cgit v1.2.3-71-gd317 From 4a4b8169501b18c3450ac735a7e277b24886a651 Mon Sep 17 00:00:00 2001 From: Andrew Zaborowski Date: Fri, 10 Feb 2017 10:02:31 +0100 Subject: cfg80211: Accept multiple RSSI thresholds for CQM Change the SET CQM command's RSSI threshold attribute to accept any number of thresholds as a sorted array. The API should be backwards compatible so that if one s32 threshold value is passed, the old mechanism is enabled. The netlink event generated is the same in both cases. cfg80211 handles an arbitrary number of RSSI thresholds but drivers have to provide a method (set_cqm_rssi_range_config) that configures a range set by a high and a low value. Drivers have to call back when the RSSI goes out of that range and there's no additional event for each time the range is reconfigured as there was with the current one-threshold API. This method doesn't have a hysteresis parameter because there's no benefit to the cfg80211 code from having the hysteresis be handled by hardware/driver in terms of the number of wakeups. At the same time it would likely be less consistent between drivers if offloaded or done in the drivers. Signed-off-by: Andrew Zaborowski Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 13 ++++ include/uapi/linux/nl80211.h | 9 ++- net/wireless/core.c | 9 +++ net/wireless/core.h | 9 +++ net/wireless/nl80211.c | 138 +++++++++++++++++++++++++++++++++++++++---- net/wireless/rdev-ops.h | 12 ++++ net/wireless/trace.h | 22 +++++++ 7 files changed, 198 insertions(+), 14 deletions(-) (limited to 'include/uapi') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ead1aa6d003e..ffc08687b31d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2712,6 +2712,11 @@ struct cfg80211_nan_func { * the current level is above/below the configured threshold; this may * need some care when the configuration is changed (without first being * disabled.) + * @set_cqm_rssi_range_config: Configure two RSSI thresholds in the + * connection quality monitor. An event is to be sent only when the + * signal level is found to be outside the two values. The driver should + * set %NL80211_EXT_FEATURE_CQM_RSSI_LIST if this method is implemented. + * If it is provided then there's no point providing @set_cqm_rssi_config. * @set_cqm_txe_config: Configure connection quality monitor TX error * thresholds. * @sched_scan_start: Tell the driver to start a scheduled scan. @@ -3001,6 +3006,10 @@ struct cfg80211_ops { struct net_device *dev, s32 rssi_thold, u32 rssi_hyst); + int (*set_cqm_rssi_range_config)(struct wiphy *wiphy, + struct net_device *dev, + s32 rssi_low, s32 rssi_high); + int (*set_cqm_txe_config)(struct wiphy *wiphy, struct net_device *dev, u32 rate, u32 pkts, u32 intvl); @@ -3871,6 +3880,7 @@ void wiphy_free(struct wiphy *wiphy); struct cfg80211_conn; struct cfg80211_internal_bss; struct cfg80211_cached_keys; +struct cfg80211_cqm_config; /** * struct wireless_dev - wireless device state @@ -3934,6 +3944,7 @@ struct cfg80211_cached_keys; * @event_list: (private) list for internal event processing * @event_lock: (private) lock for event list * @owner_nlportid: (private) owner socket port ID + * @cqm_config: (private) nl80211 RSSI monitor state */ struct wireless_dev { struct wiphy *wiphy; @@ -4002,6 +4013,8 @@ struct wireless_dev { bool prev_bssid_valid; } wext; #endif + + struct cfg80211_cqm_config *cqm_config; }; static inline u8 *wdev_address(struct wireless_dev *wdev) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5ed257c4cd4e..9a499b15cfbc 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3942,7 +3942,10 @@ enum nl80211_ps_state { * @__NL80211_ATTR_CQM_INVALID: invalid * @NL80211_ATTR_CQM_RSSI_THOLD: RSSI threshold in dBm. This value specifies * the threshold for the RSSI level at which an event will be sent. Zero - * to disable. + * to disable. Alternatively, if %NL80211_EXT_FEATURE_CQM_RSSI_LIST is + * set, multiple values can be supplied as a low-to-high sorted array of + * threshold values in dBm. Events will be sent when the RSSI value + * crosses any of the thresholds. * @NL80211_ATTR_CQM_RSSI_HYST: RSSI hysteresis in dBm. This value specifies * the minimum amount the RSSI level must change after an event before a * new event may be issued (to reduce effects of RSSI oscillation). @@ -4753,6 +4756,9 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI: The driver supports sched_scan * for reporting BSSs with better RSSI than the current connected BSS * (%NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI). + * @NL80211_EXT_FEATURE_CQM_RSSI_LIST: With this driver the + * %NL80211_ATTR_CQM_RSSI_THOLD attribute accepts a list of zero or more + * RSSI threshold values to monitor rather than exactly one threshold. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4771,6 +4777,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA, NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED, NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI, + NL80211_EXT_FEATURE_CQM_RSSI_LIST, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/core.c b/net/wireless/core.c index e55e05bc4805..04143df20f7f 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -954,6 +954,12 @@ void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked) } EXPORT_SYMBOL(wiphy_rfkill_set_hw_state); +void cfg80211_cqm_config_free(struct wireless_dev *wdev) +{ + kfree(wdev->cqm_config); + wdev->cqm_config = NULL; +} + void cfg80211_unregister_wdev(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); @@ -980,6 +986,8 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) WARN_ON_ONCE(1); break; } + + cfg80211_cqm_config_free(wdev); } EXPORT_SYMBOL(cfg80211_unregister_wdev); @@ -1234,6 +1242,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, kzfree(wdev->wext.keys); #endif flush_work(&wdev->disconnect_wk); + cfg80211_cqm_config_free(wdev); } /* * synchronise (so that we won't find this netdev diff --git a/net/wireless/core.h b/net/wireless/core.h index 58ca206982fe..efa690a7ef8d 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -272,6 +272,13 @@ struct cfg80211_iface_destroy { u32 nlportid; }; +struct cfg80211_cqm_config { + u32 rssi_hyst; + s32 last_rssi_event_value; + int n_rssi_thresholds; + s32 rssi_thresholds[0]; +}; + void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev); /* free object */ @@ -512,4 +519,6 @@ void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, #define CFG80211_DEV_WARN_ON(cond) ({bool __r = (cond); __r; }) #endif +void cfg80211_cqm_config_free(struct wireless_dev *wdev); + #endif /* __NET_WIRELESS_CORE_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d7f8be4e321a..d516527fcb8e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9473,7 +9473,7 @@ static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info) static const struct nla_policy nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] = { - [NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_U32 }, + [NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_BINARY }, [NL80211_ATTR_CQM_RSSI_HYST] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_TXE_RATE] = { .type = NLA_U32 }, @@ -9502,28 +9502,123 @@ static int nl80211_set_cqm_txe(struct genl_info *info, return rdev_set_cqm_txe_config(rdev, dev, rate, pkts, intvl); } +static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, + struct net_device *dev) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + s32 last, low, high; + u32 hyst; + int i, n; + int err; + + /* RSSI reporting disabled? */ + if (!wdev->cqm_config) + return rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0); + + /* + * Obtain current RSSI value if possible, if not and no RSSI threshold + * event has been received yet, we should receive an event after a + * connection is established and enough beacons received to calculate + * the average. + */ + if (!wdev->cqm_config->last_rssi_event_value && wdev->current_bss && + rdev->ops->get_station) { + struct station_info sinfo; + u8 *mac_addr; + + mac_addr = wdev->current_bss->pub.bssid; + + err = rdev_get_station(rdev, dev, mac_addr, &sinfo); + if (err) + return err; + + if (sinfo.filled & BIT(NL80211_STA_INFO_BEACON_SIGNAL_AVG)) + wdev->cqm_config->last_rssi_event_value = + (s8) sinfo.rx_beacon_signal_avg; + } + + last = wdev->cqm_config->last_rssi_event_value; + hyst = wdev->cqm_config->rssi_hyst; + n = wdev->cqm_config->n_rssi_thresholds; + + for (i = 0; i < n; i++) + if (last < wdev->cqm_config->rssi_thresholds[i]) + break; + + low = i > 0 ? + (wdev->cqm_config->rssi_thresholds[i - 1] - hyst) : S32_MIN; + high = i < n ? + (wdev->cqm_config->rssi_thresholds[i] + hyst - 1) : S32_MAX; + + return rdev_set_cqm_rssi_range_config(rdev, dev, low, high); +} + static int nl80211_set_cqm_rssi(struct genl_info *info, - s32 threshold, u32 hysteresis) + const s32 *thresholds, int n_thresholds, + u32 hysteresis) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; + int i, err; + s32 prev = S32_MIN; - if (threshold > 0) - return -EINVAL; - - /* disabling - hysteresis should also be zero then */ - if (threshold == 0) - hysteresis = 0; + /* Check all values negative and sorted */ + for (i = 0; i < n_thresholds; i++) { + if (thresholds[i] > 0 || thresholds[i] <= prev) + return -EINVAL; - if (!rdev->ops->set_cqm_rssi_config) - return -EOPNOTSUPP; + prev = thresholds[i]; + } if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; - return rdev_set_cqm_rssi_config(rdev, dev, threshold, hysteresis); + wdev_lock(wdev); + cfg80211_cqm_config_free(wdev); + wdev_unlock(wdev); + + if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) { + if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */ + return rdev_set_cqm_rssi_config(rdev, dev, 0, 0); + + return rdev_set_cqm_rssi_config(rdev, dev, + thresholds[0], hysteresis); + } + + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_CQM_RSSI_LIST)) + return -EOPNOTSUPP; + + if (n_thresholds == 1 && thresholds[0] == 0) /* Disabling */ + n_thresholds = 0; + + wdev_lock(wdev); + if (n_thresholds) { + struct cfg80211_cqm_config *cqm_config; + + cqm_config = kzalloc(sizeof(struct cfg80211_cqm_config) + + n_thresholds * sizeof(s32), GFP_KERNEL); + if (!cqm_config) { + err = -ENOMEM; + goto unlock; + } + + cqm_config->rssi_hyst = hysteresis; + cqm_config->n_rssi_thresholds = n_thresholds; + memcpy(cqm_config->rssi_thresholds, thresholds, + n_thresholds * sizeof(s32)); + + wdev->cqm_config = cqm_config; + } + + err = cfg80211_cqm_rssi_update(rdev, dev); + +unlock: + wdev_unlock(wdev); + + return err; } static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info) @@ -9543,10 +9638,16 @@ static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info) if (attrs[NL80211_ATTR_CQM_RSSI_THOLD] && attrs[NL80211_ATTR_CQM_RSSI_HYST]) { - s32 threshold = nla_get_s32(attrs[NL80211_ATTR_CQM_RSSI_THOLD]); + const s32 *thresholds = + nla_data(attrs[NL80211_ATTR_CQM_RSSI_THOLD]); + int len = nla_len(attrs[NL80211_ATTR_CQM_RSSI_THOLD]); u32 hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]); - return nl80211_set_cqm_rssi(info, threshold, hysteresis); + if (len % 4) + return -EINVAL; + + return nl80211_set_cqm_rssi(info, thresholds, len / 4, + hysteresis); } if (attrs[NL80211_ATTR_CQM_TXE_RATE] && @@ -13983,6 +14084,8 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, s32 rssi_level, gfp_t gfp) { struct sk_buff *msg; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); trace_cfg80211_cqm_rssi_notify(dev, rssi_event, rssi_level); @@ -13990,6 +14093,15 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH)) return; + if (wdev->cqm_config) { + wdev->cqm_config->last_rssi_event_value = rssi_level; + + cfg80211_cqm_rssi_update(rdev, dev); + + if (rssi_level == 0) + rssi_level = wdev->cqm_config->last_rssi_event_value; + } + msg = cfg80211_prepare_cqm(dev, NULL, gfp); if (!msg) return; diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 2f425075ada8..f2baf5921091 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -749,6 +749,18 @@ rdev_set_cqm_rssi_config(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_cqm_rssi_range_config(struct cfg80211_registered_device *rdev, + struct net_device *dev, s32 low, s32 high) +{ + int ret; + trace_rdev_set_cqm_rssi_range_config(&rdev->wiphy, dev, low, high); + ret = rdev->ops->set_cqm_rssi_range_config(&rdev->wiphy, dev, + low, high); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline int rdev_set_cqm_txe_config(struct cfg80211_registered_device *rdev, struct net_device *dev, u32 rate, u32 pkts, u32 intvl) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 776e80cef9b4..fd55786f0462 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1322,6 +1322,28 @@ TRACE_EVENT(rdev_set_cqm_rssi_config, __entry->rssi_thold, __entry->rssi_hyst) ); +TRACE_EVENT(rdev_set_cqm_rssi_range_config, + TP_PROTO(struct wiphy *wiphy, + struct net_device *netdev, s32 low, s32 high), + TP_ARGS(wiphy, netdev, low, high), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(s32, rssi_low) + __field(s32, rssi_high) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->rssi_low = low; + __entry->rssi_high = high; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT + ", range: %d - %d ", + WIPHY_PR_ARG, NETDEV_PR_ARG, + __entry->rssi_low, __entry->rssi_high) +); + TRACE_EVENT(rdev_set_cqm_txe_config, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u32 rate, u32 pkts, u32 intvl), -- cgit v1.2.3-71-gd317 From b35a51c7dd25a823767969e3089542d7478777e9 Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Mon, 27 Feb 2017 17:04:33 +0530 Subject: cfg80211: Make pre-CAC results valid only for ETSI domain DFS requirement for ETSI domain (section 4.7.1.4 in ETSI EN 301 893 V1.8.1) is the only one which explicitly states that once DFS channel is marked as available afer the CAC, this channel will remain in available state even moving to a different operating channel. But the same is not explicitly stated in FCC DFS requirement. Also, Pre-CAC requriements are not explicitly mentioned in FCC requirement. Current implementation in keeping DFS channel in available state is same as described in ETSI domain. For non-ETSI DFS domain, this patch gives a grace period of 2 seconds since the completion of successful CAC before moving the channel's DFS state to 'usable' from 'available' state. The same grace period is checked against the channel's dfs_state_entered timestamp while deciding if a DFS channel is available for operation. There is a new radar event, NL80211_RADAR_PRE_CAC_EXPIRED, reported when DFS channel is moved from available to usable state after the grace period. Also make sure the DFS channel state is reset to usable once the beaconing operation on that channel is brought down (like stop_ap, leave_ibss and leave_mesh) in non-ETSI domain. Signed-off-by: Vasanthakumar Thiagarajan Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 5 +++ net/wireless/ap.c | 5 +++ net/wireless/chan.c | 101 +++++++++++++++++++++++++++++++++++++++++++ net/wireless/core.h | 10 +++++ net/wireless/ibss.c | 1 + net/wireless/mesh.c | 1 + net/wireless/mlme.c | 40 +++++++++++++---- net/wireless/reg.c | 28 ++++++++++++ net/wireless/reg.h | 14 ++++++ 9 files changed, 196 insertions(+), 9 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9a499b15cfbc..cd4dfef58fab 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4913,12 +4913,17 @@ enum nl80211_smps_mode { * change to the channel status. * @NL80211_RADAR_NOP_FINISHED: The Non-Occupancy Period for this channel is * over, channel becomes usable. + * @NL80211_RADAR_PRE_CAC_EXPIRED: Channel Availability Check done on this + * non-operating channel is expired and no longer valid. New CAC must + * be done on this channel before starting the operation. This is not + * applicable for ETSI dfs domain where pre-CAC is valid for ever. */ enum nl80211_radar_event { NL80211_RADAR_DETECTED, NL80211_RADAR_CAC_FINISHED, NL80211_RADAR_CAC_ABORTED, NL80211_RADAR_NOP_FINISHED, + NL80211_RADAR_PRE_CAC_EXPIRED, }; /** diff --git a/net/wireless/ap.c b/net/wireless/ap.c index bdad1f951561..25666d3009be 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -32,6 +32,11 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, rdev_set_qos_map(rdev, dev, NULL); if (notify) nl80211_send_ap_stopped(wdev); + + /* Should we apply the grace period during beaconing interface + * shutdown also? + */ + cfg80211_sched_dfs_chan_update(rdev); } return err; diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 5497d022fada..099f13c0c39e 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -456,6 +456,107 @@ bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, return (r1 + r2 > 0); } +/* + * Checks if center frequency of chan falls with in the bandwidth + * range of chandef. + */ +bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, + struct ieee80211_channel *chan) +{ + int width; + u32 cf_offset, freq; + + if (chandef->chan->center_freq == chan->center_freq) + return true; + + width = cfg80211_chandef_get_width(chandef); + if (width <= 20) + return false; + + cf_offset = width / 2 - 10; + + for (freq = chandef->center_freq1 - width / 2 + 10; + freq <= chandef->center_freq1 + width / 2 - 10; freq += 20) { + if (chan->center_freq == freq) + return true; + } + + if (!chandef->center_freq2) + return false; + + for (freq = chandef->center_freq2 - width / 2 + 10; + freq <= chandef->center_freq2 + width / 2 - 10; freq += 20) { + if (chan->center_freq == freq) + return true; + } + + return false; +} + +bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev) +{ + bool active = false; + + ASSERT_WDEV_LOCK(wdev); + + if (!wdev->chandef.chan) + return false; + + switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + active = wdev->beacon_interval != 0; + break; + case NL80211_IFTYPE_ADHOC: + active = wdev->ssid_len != 0; + break; + case NL80211_IFTYPE_MESH_POINT: + active = wdev->mesh_id_len != 0; + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_OCB: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_P2P_DEVICE: + /* Can NAN type be considered as beaconing interface? */ + case NL80211_IFTYPE_NAN: + break; + case NL80211_IFTYPE_UNSPECIFIED: + case NUM_NL80211_IFTYPES: + WARN_ON(1); + } + + return active; +} + +bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, + struct ieee80211_channel *chan) +{ + struct wireless_dev *wdev; + + ASSERT_RTNL(); + + if (!(chan->flags & IEEE80211_CHAN_RADAR)) + return false; + + list_for_each_entry(wdev, &wiphy->wdev_list, list) { + wdev_lock(wdev); + if (!cfg80211_beaconing_iface_active(wdev)) { + wdev_unlock(wdev); + continue; + } + + if (cfg80211_is_sub_chan(&wdev->chandef, chan)) { + wdev_unlock(wdev); + return true; + } + wdev_unlock(wdev); + } + + return false; +} static bool cfg80211_get_chans_dfs_available(struct wiphy *wiphy, u32 center_freq, diff --git a/net/wireless/core.h b/net/wireless/core.h index efa690a7ef8d..519a29ebde5b 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -466,6 +466,16 @@ unsigned int cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef); +void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev); + +bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, + struct ieee80211_channel *chan); + +bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev); + +bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, + struct ieee80211_channel *chan); + static inline unsigned int elapsed_jiffies_msecs(unsigned long start) { unsigned long end = jiffies; diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 364f900a3dc4..10bf040a0982 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -190,6 +190,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) if (!nowext) wdev->wext.ibss.ssid_len = 0; #endif + cfg80211_sched_dfs_chan_update(rdev); } void cfg80211_clear_ibss(struct net_device *dev, bool nowext) diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 2d8518a37eab..ec0b1c20ac99 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -262,6 +262,7 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, wdev->beacon_interval = 0; memset(&wdev->chandef, 0, sizeof(wdev->chandef)); rdev_set_qos_map(rdev, dev, NULL); + cfg80211_sched_dfs_chan_update(rdev); } return err; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 22b3d9990065..cd29366a5206 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -745,6 +745,12 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, } EXPORT_SYMBOL(cfg80211_rx_mgmt); +void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev) +{ + cancel_delayed_work(&rdev->dfs_update_channels_wk); + queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk, 0); +} + void cfg80211_dfs_channels_update_work(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); @@ -755,6 +761,8 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) struct wiphy *wiphy; bool check_again = false; unsigned long timeout, next_time = 0; + unsigned long time_dfs_update; + enum nl80211_radar_event radar_event; int bandid, i; rdev = container_of(delayed_work, struct cfg80211_registered_device, @@ -770,11 +778,27 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) for (i = 0; i < sband->n_channels; i++) { c = &sband->channels[i]; - if (c->dfs_state != NL80211_DFS_UNAVAILABLE) + if (!(c->flags & IEEE80211_CHAN_RADAR)) + continue; + + if (c->dfs_state != NL80211_DFS_UNAVAILABLE && + c->dfs_state != NL80211_DFS_AVAILABLE) continue; - timeout = c->dfs_state_entered + msecs_to_jiffies( - IEEE80211_DFS_MIN_NOP_TIME_MS); + if (c->dfs_state == NL80211_DFS_UNAVAILABLE) { + time_dfs_update = IEEE80211_DFS_MIN_NOP_TIME_MS; + radar_event = NL80211_RADAR_NOP_FINISHED; + } else { + if (regulatory_pre_cac_allowed(wiphy) || + cfg80211_any_wiphy_oper_chan(wiphy, c)) + continue; + + time_dfs_update = REG_PRE_CAC_EXPIRY_GRACE_MS; + radar_event = NL80211_RADAR_PRE_CAC_EXPIRED; + } + + timeout = c->dfs_state_entered + + msecs_to_jiffies(time_dfs_update); if (time_after_eq(jiffies, timeout)) { c->dfs_state = NL80211_DFS_USABLE; @@ -784,8 +808,8 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) NL80211_CHAN_NO_HT); nl80211_radar_notify(rdev, &chandef, - NL80211_RADAR_NOP_FINISHED, - NULL, GFP_ATOMIC); + radar_event, NULL, + GFP_ATOMIC); continue; } @@ -810,7 +834,6 @@ void cfg80211_radar_event(struct wiphy *wiphy, gfp_t gfp) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - unsigned long timeout; trace_cfg80211_radar_event(wiphy, chandef); @@ -820,9 +843,7 @@ void cfg80211_radar_event(struct wiphy *wiphy, */ cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_UNAVAILABLE); - timeout = msecs_to_jiffies(IEEE80211_DFS_MIN_NOP_TIME_MS); - queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk, - timeout); + cfg80211_sched_dfs_chan_update(rdev); nl80211_radar_notify(rdev, chandef, NL80211_RADAR_DETECTED, NULL, gfp); } @@ -851,6 +872,7 @@ void cfg80211_cac_event(struct net_device *netdev, msecs_to_jiffies(wdev->cac_time_ms); WARN_ON(!time_after_eq(jiffies, timeout)); cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_AVAILABLE); + cfg80211_sched_dfs_chan_update(rdev); break; case NL80211_RADAR_CAC_ABORTED: break; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 753efcd51fa3..e59b192459e8 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3120,6 +3120,34 @@ bool regulatory_indoor_allowed(void) return reg_is_indoor; } +bool regulatory_pre_cac_allowed(struct wiphy *wiphy) +{ + const struct ieee80211_regdomain *regd = NULL; + const struct ieee80211_regdomain *wiphy_regd = NULL; + bool pre_cac_allowed = false; + + rcu_read_lock(); + + regd = rcu_dereference(cfg80211_regdomain); + wiphy_regd = rcu_dereference(wiphy->regd); + if (!wiphy_regd) { + if (regd->dfs_region == NL80211_DFS_ETSI) + pre_cac_allowed = true; + + rcu_read_unlock(); + + return pre_cac_allowed; + } + + if (regd->dfs_region == wiphy_regd->dfs_region && + wiphy_regd->dfs_region == NL80211_DFS_ETSI) + pre_cac_allowed = true; + + rcu_read_unlock(); + + return pre_cac_allowed; +} + int __init regulatory_init(void) { int err = 0; diff --git a/net/wireless/reg.h b/net/wireless/reg.h index f6ced316b5a4..ff078f093989 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -143,4 +143,18 @@ int cfg80211_get_unii(int freq); */ bool regulatory_indoor_allowed(void); +/* + * Grace period to timeout pre-CAC results on the dfs channels. This timeout + * value is used for Non-ETSI domain. + * TODO: May be make this timeout available through regdb? + */ +#define REG_PRE_CAC_EXPIRY_GRACE_MS 2000 + +/** + * regulatory_pre_cac_allowed - if pre-CAC allowed in the current dfs domain + * @wiphy: wiphy for which pre-CAC capability is checked. + + * Pre-CAC is allowed only in ETSI domain. + */ +bool regulatory_pre_cac_allowed(struct wiphy *wiphy); #endif /* __NET_WIRELESS_REG_H */ -- cgit v1.2.3-71-gd317 From 4eb220cb35a9c4f69a2438b987bb3d509d56cc80 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Tue, 14 Feb 2017 21:26:21 -0800 Subject: HID: wacom: generic: add 3 tablet touch keys This patch add support to the 3 touch keys on Wacom Cintiq Pro. These touch keys are in the middle of the other two keys on the top edge of the tablet. Signed-off-by: Ping Cheng Reviewed-by: Benjamin Tissoires Tested-by: Aaron Armstrong Skomra Acked-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 12 ++++++++++++ drivers/hid/wacom_wac.h | 3 +++ include/uapi/linux/input-event-codes.h | 1 + 3 files changed, 16 insertions(+) (limited to 'include/uapi') diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 4aa3de9f1163..dbda99272374 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1768,6 +1768,18 @@ static void wacom_wac_pad_usage_mapping(struct hid_device *hdev, wacom_map_usage(input, usage, field, EV_ABS, ABS_WHEEL, 0); features->device_type |= WACOM_DEVICETYPE_PAD; break; + case WACOM_HID_WD_BUTTONCONFIG: + wacom_map_usage(input, usage, field, EV_KEY, KEY_BUTTONCONFIG, 0); + features->device_type |= WACOM_DEVICETYPE_PAD; + break; + case WACOM_HID_WD_ONSCREEN_KEYBOARD: + wacom_map_usage(input, usage, field, EV_KEY, KEY_ONSCREEN_KEYBOARD, 0); + features->device_type |= WACOM_DEVICETYPE_PAD; + break; + case WACOM_HID_WD_CONTROLPANEL: + wacom_map_usage(input, usage, field, EV_KEY, KEY_CONTROLPANEL, 0); + features->device_type |= WACOM_DEVICETYPE_PAD; + break; } switch (equivalent_usage & 0xfffffff0) { diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h index 857ccee16f38..5eba31d6c46a 100644 --- a/drivers/hid/wacom_wac.h +++ b/drivers/hid/wacom_wac.h @@ -120,6 +120,9 @@ #define WACOM_HID_WD_BATTERY_LEVEL (WACOM_HID_UP_WACOMDIGITIZER | 0x043b) #define WACOM_HID_WD_EXPRESSKEY00 (WACOM_HID_UP_WACOMDIGITIZER | 0x0910) #define WACOM_HID_WD_EXPRESSKEYCAP00 (WACOM_HID_UP_WACOMDIGITIZER | 0x0950) +#define WACOM_HID_WD_CONTROLPANEL (WACOM_HID_UP_WACOMDIGITIZER | 0x0982) +#define WACOM_HID_WD_ONSCREEN_KEYBOARD (WACOM_HID_UP_WACOMDIGITIZER | 0x0983) +#define WACOM_HID_WD_BUTTONCONFIG (WACOM_HID_UP_WACOMDIGITIZER | 0x0986) #define WACOM_HID_WD_BUTTONHOME (WACOM_HID_UP_WACOMDIGITIZER | 0x0990) #define WACOM_HID_WD_BUTTONUP (WACOM_HID_UP_WACOMDIGITIZER | 0x0991) #define WACOM_HID_WD_BUTTONDOWN (WACOM_HID_UP_WACOMDIGITIZER | 0x0992) diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 3af60ee69053..f5a8d96e1e09 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -641,6 +641,7 @@ * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.) */ #define KEY_DATA 0x277 +#define KEY_ONSCREEN_KEYBOARD 0x278 #define BTN_TRIGGER_HAPPY 0x2c0 #define BTN_TRIGGER_HAPPY1 0x2c0 -- cgit v1.2.3-71-gd317 From 3206caded81ad9bdb2e7ff4c0b94ec5913df8618 Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Thu, 2 Mar 2017 17:00:14 +0100 Subject: netfilter: nft_hash: support of symmetric hash This patch provides symmetric hash support according to source ip address and port, and destination ip address and port. For this purpose, the __skb_get_hash_symmetric() is used to identify the flow as it uses FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL flag by default. The new attribute NFTA_HASH_TYPE has been included to support different types of hashing functions. Currently supported NFT_HASH_JENKINS through jhash and NFT_HASH_SYM through symhash. The main difference between both types are: - jhash requires an expression with sreg, symhash doesn't. - symhash supports modulus and offset, but not seed. Examples: nft add rule ip nat prerouting ct mark set jhash ip saddr mod 2 nft add rule ip nat prerouting ct mark set symhash mod 2 By default, jenkins hash will be used if no hash type is provided for compatibility reasons. Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 13 +++++ net/netfilter/nft_hash.c | 99 +++++++++++++++++++++++++++++++- 2 files changed, 111 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 05215d30fe5c..4f7d75682c59 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -815,6 +815,17 @@ enum nft_rt_keys { NFT_RT_NEXTHOP6, }; +/** + * enum nft_hash_types - nf_tables hash expression types + * + * @NFT_HASH_JENKINS: Jenkins Hash + * @NFT_HASH_SYM: Symmetric Hash + */ +enum nft_hash_types { + NFT_HASH_JENKINS, + NFT_HASH_SYM, +}; + /** * enum nft_hash_attributes - nf_tables hash expression netlink attributes * @@ -824,6 +835,7 @@ enum nft_rt_keys { * @NFTA_HASH_MODULUS: modulus value (NLA_U32) * @NFTA_HASH_SEED: seed value (NLA_U32) * @NFTA_HASH_OFFSET: add this offset value to hash result (NLA_U32) + * @NFTA_HASH_TYPE: hash operation (NLA_U32: nft_hash_types) */ enum nft_hash_attributes { NFTA_HASH_UNSPEC, @@ -833,6 +845,7 @@ enum nft_hash_attributes { NFTA_HASH_MODULUS, NFTA_HASH_SEED, NFTA_HASH_OFFSET, + NFTA_HASH_TYPE, __NFTA_HASH_MAX, }; #define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index ccb834ef049b..a6a4633725bb 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -38,6 +38,25 @@ static void nft_jhash_eval(const struct nft_expr *expr, regs->data[priv->dreg] = h + priv->offset; } +struct nft_symhash { + enum nft_registers dreg:8; + u32 modulus; + u32 offset; +}; + +static void nft_symhash_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_symhash *priv = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + u32 h; + + h = reciprocal_scale(__skb_get_hash_symmetric(skb), priv->modulus); + + regs->data[priv->dreg] = h + priv->offset; +} + static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_SREG] = { .type = NLA_U32 }, [NFTA_HASH_DREG] = { .type = NLA_U32 }, @@ -45,6 +64,7 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_MODULUS] = { .type = NLA_U32 }, [NFTA_HASH_SEED] = { .type = NLA_U32 }, [NFTA_HASH_OFFSET] = { .type = NLA_U32 }, + [NFTA_HASH_TYPE] = { .type = NLA_U32 }, }; static int nft_jhash_init(const struct nft_ctx *ctx, @@ -92,6 +112,32 @@ static int nft_jhash_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, sizeof(u32)); } +static int nft_symhash_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_symhash *priv = nft_expr_priv(expr); + + if (!tb[NFTA_HASH_DREG] || + !tb[NFTA_HASH_MODULUS]) + return -EINVAL; + + if (tb[NFTA_HASH_OFFSET]) + priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); + + priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); + + priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); + if (priv->modulus <= 1) + return -ERANGE; + + if (priv->offset + priv->modulus - 1 < priv->offset) + return -EOVERFLOW; + + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, sizeof(u32)); +} + static int nft_jhash_dump(struct sk_buff *skb, const struct nft_expr *expr) { @@ -110,6 +156,28 @@ static int nft_jhash_dump(struct sk_buff *skb, if (priv->offset != 0) if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset))) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_JENKINS))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static int nft_symhash_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_symhash *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus))) + goto nla_put_failure; + if (priv->offset != 0) + if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_SYM))) + goto nla_put_failure; return 0; nla_put_failure: @@ -125,9 +193,38 @@ static const struct nft_expr_ops nft_jhash_ops = { .dump = nft_jhash_dump, }; +static const struct nft_expr_ops nft_symhash_ops = { + .type = &nft_hash_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_symhash)), + .eval = nft_symhash_eval, + .init = nft_symhash_init, + .dump = nft_symhash_dump, +}; + +static const struct nft_expr_ops * +nft_hash_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + u32 type; + + if (!tb[NFTA_HASH_TYPE]) + return &nft_jhash_ops; + + type = ntohl(nla_get_be32(tb[NFTA_HASH_TYPE])); + switch (type) { + case NFT_HASH_SYM: + return &nft_symhash_ops; + case NFT_HASH_JENKINS: + return &nft_jhash_ops; + default: + break; + } + return ERR_PTR(-EOPNOTSUPP); +} + static struct nft_expr_type nft_hash_type __read_mostly = { .name = "hash", - .ops = &nft_jhash_ops, + .select_ops = &nft_hash_select_ops, .policy = nft_hash_policy, .maxattr = NFTA_HASH_MAX, .owner = THIS_MODULE, -- cgit v1.2.3-71-gd317 From df789fe752065f2ce761ba434125e335b514899f Mon Sep 17 00:00:00 2001 From: David Forster Date: Thu, 23 Feb 2017 16:27:18 +0000 Subject: ipv6: Provide ipv6 version of "disable_policy" sysctl This provides equivalent functionality to the existing ipv4 "disable_policy" systcl. ie. Allows IPsec processing to be skipped on terminating packets on a per-interface basis. Signed-off-by: David Forster Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+) (limited to 'include/uapi') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 71be5b330d21..f0d79bd054ca 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -70,6 +70,7 @@ struct ipv6_devconf { #endif __u32 enhanced_dad; __u32 addr_gen_mode; + __s32 disable_policy; struct ctl_table_header *sysctl_header; }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 8ef9e75e004e..d8f6a1ac9af4 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -183,6 +183,7 @@ enum { DEVCONF_SEG6_REQUIRE_HMAC, DEVCONF_ENHANCED_DAD, DEVCONF_ADDR_GEN_MODE, + DEVCONF_DISABLE_POLICY, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 363172527e43..8c69768a5c46 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -245,6 +245,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { #endif .enhanced_dad = 1, .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, + .disable_policy = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -297,6 +298,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { #endif .enhanced_dad = 1, .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, + .disable_policy = 0, }; /* Check if a valid qdisc is available */ @@ -944,6 +946,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, const struct in6_addr *peer_addr, int pfxlen, int scope, u32 flags, u32 valid_lft, u32 prefered_lft) { + struct net *net = dev_net(idev->dev); struct inet6_ifaddr *ifa = NULL; struct rt6_info *rt; unsigned int hash; @@ -990,6 +993,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, goto out; } + if (net->ipv6.devconf_all->disable_policy || + idev->cnf.disable_policy) + rt->dst.flags |= DST_NOPOLICY; + neigh_parms_data_state_setall(idev->nd_parms); ifa->addr = *addr; @@ -5003,6 +5010,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, #endif array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad; array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode; + array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy; } static inline size_t inet6_ifla6_size(void) @@ -5827,6 +5835,105 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, return ret; } +static +void addrconf_set_nopolicy(struct rt6_info *rt, int action) +{ + if (rt) { + if (action) + rt->dst.flags |= DST_NOPOLICY; + else + rt->dst.flags &= ~DST_NOPOLICY; + } +} + +static +void addrconf_disable_policy_idev(struct inet6_dev *idev, int val) +{ + struct inet6_ifaddr *ifa; + + read_lock_bh(&idev->lock); + list_for_each_entry(ifa, &idev->addr_list, if_list) { + spin_lock(&ifa->lock); + if (ifa->rt) { + struct rt6_info *rt = ifa->rt; + struct fib6_table *table = rt->rt6i_table; + int cpu; + + read_lock(&table->tb6_lock); + addrconf_set_nopolicy(ifa->rt, val); + if (rt->rt6i_pcpu) { + for_each_possible_cpu(cpu) { + struct rt6_info **rtp; + + rtp = per_cpu_ptr(rt->rt6i_pcpu, cpu); + addrconf_set_nopolicy(*rtp, val); + } + } + read_unlock(&table->tb6_lock); + } + spin_unlock(&ifa->lock); + } + read_unlock_bh(&idev->lock); +} + +static +int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val) +{ + struct inet6_dev *idev; + struct net *net; + + if (!rtnl_trylock()) + return restart_syscall(); + + *valp = val; + + net = (struct net *)ctl->extra2; + if (valp == &net->ipv6.devconf_dflt->disable_policy) { + rtnl_unlock(); + return 0; + } + + if (valp == &net->ipv6.devconf_all->disable_policy) { + struct net_device *dev; + + for_each_netdev(net, dev) { + idev = __in6_dev_get(dev); + if (idev) + addrconf_disable_policy_idev(idev, val); + } + } else { + idev = (struct inet6_dev *)ctl->extra1; + addrconf_disable_policy_idev(idev, val); + } + + rtnl_unlock(); + return 0; +} + +static +int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int *valp = ctl->data; + int val = *valp; + loff_t pos = *ppos; + struct ctl_table lctl; + int ret; + + lctl = *ctl; + lctl.data = &val; + ret = proc_dointvec(&lctl, write, buffer, lenp, ppos); + + if (write && (*valp != val)) + ret = addrconf_disable_policy(ctl, valp, val); + + if (ret) + *ppos = pos; + + return ret; +} + static int minus_one = -1; static const int one = 1; static const int two_five_five = 255; @@ -6184,6 +6291,13 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = addrconf_sysctl_addr_gen_mode, }, + { + .procname = "disable_policy", + .data = &ipv6_devconf.disable_policy, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_disable_policy, + }, { /* sentinel */ } -- cgit v1.2.3-71-gd317 From 967c9cca2cc50569efc65945325c173cecba83bd Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Wed, 11 Mar 2015 14:39:39 +0100 Subject: tee: generic TEE subsystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initial patch for generic TEE subsystem. This subsystem provides: * Registration/un-registration of TEE drivers. * Shared memory between normal world and secure world. * Ioctl interface for interaction with user space. * Sysfs implementation_id of TEE driver A TEE (Trusted Execution Environment) driver is a driver that interfaces with a trusted OS running in some secure environment, for example, TrustZone on ARM cpus, or a separate secure co-processor etc. The TEE subsystem can serve a TEE driver for a Global Platform compliant TEE, but it's not limited to only Global Platform TEEs. This patch builds on other similar implementations trying to solve the same problem: * "optee_linuxdriver" by among others Jean-michel DELORME and Emmanuel MICHEL * "Generic TrustZone Driver" by Javier González Acked-by: Andreas Dannenberg Tested-by: Jerome Forissier (HiKey) Tested-by: Volodymyr Babchuk (RCAR H3) Tested-by: Scott Branden Reviewed-by: Javier González Signed-off-by: Jens Wiklander --- Documentation/ioctl/ioctl-number.txt | 1 + MAINTAINERS | 7 + drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/tee/Kconfig | 8 + drivers/tee/Makefile | 4 + drivers/tee/tee_core.c | 893 +++++++++++++++++++++++++++++++++++ drivers/tee/tee_private.h | 129 +++++ drivers/tee/tee_shm.c | 358 ++++++++++++++ drivers/tee/tee_shm_pool.c | 156 ++++++ include/linux/tee_drv.h | 277 +++++++++++ include/uapi/linux/tee.h | 346 ++++++++++++++ 12 files changed, 2182 insertions(+) create mode 100644 drivers/tee/Kconfig create mode 100644 drivers/tee/Makefile create mode 100644 drivers/tee/tee_core.c create mode 100644 drivers/tee/tee_private.h create mode 100644 drivers/tee/tee_shm.c create mode 100644 drivers/tee/tee_shm_pool.c create mode 100644 include/linux/tee_drv.h create mode 100644 include/uapi/linux/tee.h (limited to 'include/uapi') diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 08244bea5048..002331b0b48a 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -308,6 +308,7 @@ Code Seq#(hex) Include File Comments 0xA3 80-8F Port ACL in development: 0xA3 90-9F linux/dtlk.h +0xA4 00-1F uapi/linux/tee.h Generic TEE subsystem 0xAA 00-3F linux/uapi/linux/userfaultfd.h 0xAB 00-1F linux/nbd.h 0xAC 00-1F linux/raw.h diff --git a/MAINTAINERS b/MAINTAINERS index c265a5fe4848..017521958c86 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11086,6 +11086,13 @@ F: drivers/hwtracing/stm/ F: include/linux/stm.h F: include/uapi/linux/stm.h +TEE SUBSYSTEM +M: Jens Wiklander +S: Maintained +F: include/linux/tee_drv.h +F: include/uapi/linux/tee.h +F: drivers/tee/ + THUNDERBOLT DRIVER M: Andreas Noever S: Maintained diff --git a/drivers/Kconfig b/drivers/Kconfig index 117ca14ccf85..ba2901e76769 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -204,4 +204,6 @@ source "drivers/fpga/Kconfig" source "drivers/fsi/Kconfig" +source "drivers/tee/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 2eced9afba53..5db9aa6beeaf 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -177,3 +177,4 @@ obj-$(CONFIG_ANDROID) += android/ obj-$(CONFIG_NVMEM) += nvmem/ obj-$(CONFIG_FPGA) += fpga/ obj-$(CONFIG_FSI) += fsi/ +obj-$(CONFIG_TEE) += tee/ diff --git a/drivers/tee/Kconfig b/drivers/tee/Kconfig new file mode 100644 index 000000000000..50c244ead46d --- /dev/null +++ b/drivers/tee/Kconfig @@ -0,0 +1,8 @@ +# Generic Trusted Execution Environment Configuration +config TEE + tristate "Trusted Execution Environment support" + select DMA_SHARED_BUFFER + select GENERIC_ALLOCATOR + help + This implements a generic interface towards a Trusted Execution + Environment (TEE). diff --git a/drivers/tee/Makefile b/drivers/tee/Makefile new file mode 100644 index 000000000000..ec64047a86e2 --- /dev/null +++ b/drivers/tee/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_TEE) += tee.o +tee-objs += tee_core.o +tee-objs += tee_shm.o +tee-objs += tee_shm_pool.o diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c new file mode 100644 index 000000000000..5c60bf4423e6 --- /dev/null +++ b/drivers/tee/tee_core.c @@ -0,0 +1,893 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#define pr_fmt(fmt) "%s: " fmt, __func__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "tee_private.h" + +#define TEE_NUM_DEVICES 32 + +#define TEE_IOCTL_PARAM_SIZE(x) (sizeof(struct tee_param) * (x)) + +/* + * Unprivileged devices in the lower half range and privileged devices in + * the upper half range. + */ +static DECLARE_BITMAP(dev_mask, TEE_NUM_DEVICES); +static DEFINE_SPINLOCK(driver_lock); + +static struct class *tee_class; +static dev_t tee_devt; + +static int tee_open(struct inode *inode, struct file *filp) +{ + int rc; + struct tee_device *teedev; + struct tee_context *ctx; + + teedev = container_of(inode->i_cdev, struct tee_device, cdev); + if (!tee_device_get(teedev)) + return -EINVAL; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + rc = -ENOMEM; + goto err; + } + + ctx->teedev = teedev; + INIT_LIST_HEAD(&ctx->list_shm); + filp->private_data = ctx; + rc = teedev->desc->ops->open(ctx); + if (rc) + goto err; + + return 0; +err: + kfree(ctx); + tee_device_put(teedev); + return rc; +} + +static int tee_release(struct inode *inode, struct file *filp) +{ + struct tee_context *ctx = filp->private_data; + struct tee_device *teedev = ctx->teedev; + struct tee_shm *shm; + + ctx->teedev->desc->ops->release(ctx); + mutex_lock(&ctx->teedev->mutex); + list_for_each_entry(shm, &ctx->list_shm, link) + shm->ctx = NULL; + mutex_unlock(&ctx->teedev->mutex); + kfree(ctx); + tee_device_put(teedev); + return 0; +} + +static int tee_ioctl_version(struct tee_context *ctx, + struct tee_ioctl_version_data __user *uvers) +{ + struct tee_ioctl_version_data vers; + + ctx->teedev->desc->ops->get_version(ctx->teedev, &vers); + if (copy_to_user(uvers, &vers, sizeof(vers))) + return -EFAULT; + return 0; +} + +static int tee_ioctl_shm_alloc(struct tee_context *ctx, + struct tee_ioctl_shm_alloc_data __user *udata) +{ + long ret; + struct tee_ioctl_shm_alloc_data data; + struct tee_shm *shm; + + if (copy_from_user(&data, udata, sizeof(data))) + return -EFAULT; + + /* Currently no input flags are supported */ + if (data.flags) + return -EINVAL; + + data.id = -1; + + shm = tee_shm_alloc(ctx, data.size, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF); + if (IS_ERR(shm)) + return PTR_ERR(shm); + + data.id = shm->id; + data.flags = shm->flags; + data.size = shm->size; + + if (copy_to_user(udata, &data, sizeof(data))) + ret = -EFAULT; + else + ret = tee_shm_get_fd(shm); + + /* + * When user space closes the file descriptor the shared memory + * should be freed or if tee_shm_get_fd() failed then it will + * be freed immediately. + */ + tee_shm_put(shm); + return ret; +} + +static int params_from_user(struct tee_context *ctx, struct tee_param *params, + size_t num_params, + struct tee_ioctl_param __user *uparams) +{ + size_t n; + + for (n = 0; n < num_params; n++) { + struct tee_shm *shm; + struct tee_ioctl_param ip; + + if (copy_from_user(&ip, uparams + n, sizeof(ip))) + return -EFAULT; + + /* All unused attribute bits has to be zero */ + if (ip.attr & ~TEE_IOCTL_PARAM_ATTR_TYPE_MASK) + return -EINVAL; + + params[n].attr = ip.attr; + switch (ip.attr) { + case TEE_IOCTL_PARAM_ATTR_TYPE_NONE: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT: + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: + params[n].u.value.a = ip.a; + params[n].u.value.b = ip.b; + params[n].u.value.c = ip.c; + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + /* + * If we fail to get a pointer to a shared memory + * object (and increase the ref count) from an + * identifier we return an error. All pointers that + * has been added in params have an increased ref + * count. It's the callers responibility to do + * tee_shm_put() on all resolved pointers. + */ + shm = tee_shm_get_from_id(ctx, ip.c); + if (IS_ERR(shm)) + return PTR_ERR(shm); + + params[n].u.memref.shm_offs = ip.a; + params[n].u.memref.size = ip.b; + params[n].u.memref.shm = shm; + break; + default: + /* Unknown attribute */ + return -EINVAL; + } + } + return 0; +} + +static int params_to_user(struct tee_ioctl_param __user *uparams, + size_t num_params, struct tee_param *params) +{ + size_t n; + + for (n = 0; n < num_params; n++) { + struct tee_ioctl_param __user *up = uparams + n; + struct tee_param *p = params + n; + + switch (p->attr) { + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: + if (put_user(p->u.value.a, &up->a) || + put_user(p->u.value.b, &up->b) || + put_user(p->u.value.c, &up->c)) + return -EFAULT; + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + if (put_user((u64)p->u.memref.size, &up->b)) + return -EFAULT; + default: + break; + } + } + return 0; +} + +static bool param_is_memref(struct tee_param *param) +{ + switch (param->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) { + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + return true; + default: + return false; + } +} + +static int tee_ioctl_open_session(struct tee_context *ctx, + struct tee_ioctl_buf_data __user *ubuf) +{ + int rc; + size_t n; + struct tee_ioctl_buf_data buf; + struct tee_ioctl_open_session_arg __user *uarg; + struct tee_ioctl_open_session_arg arg; + struct tee_ioctl_param __user *uparams = NULL; + struct tee_param *params = NULL; + bool have_session = false; + + if (!ctx->teedev->desc->ops->open_session) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, sizeof(buf))) + return -EFAULT; + + if (buf.buf_len > TEE_MAX_ARG_SIZE || + buf.buf_len < sizeof(struct tee_ioctl_open_session_arg)) + return -EINVAL; + + uarg = u64_to_user_ptr(buf.buf_ptr); + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + if (sizeof(arg) + TEE_IOCTL_PARAM_SIZE(arg.num_params) != buf.buf_len) + return -EINVAL; + + if (arg.num_params) { + params = kcalloc(arg.num_params, sizeof(struct tee_param), + GFP_KERNEL); + if (!params) + return -ENOMEM; + uparams = uarg->params; + rc = params_from_user(ctx, params, arg.num_params, uparams); + if (rc) + goto out; + } + + rc = ctx->teedev->desc->ops->open_session(ctx, &arg, params); + if (rc) + goto out; + have_session = true; + + if (put_user(arg.session, &uarg->session) || + put_user(arg.ret, &uarg->ret) || + put_user(arg.ret_origin, &uarg->ret_origin)) { + rc = -EFAULT; + goto out; + } + rc = params_to_user(uparams, arg.num_params, params); +out: + /* + * If we've succeeded to open the session but failed to communicate + * it back to user space, close the session again to avoid leakage. + */ + if (rc && have_session && ctx->teedev->desc->ops->close_session) + ctx->teedev->desc->ops->close_session(ctx, arg.session); + + if (params) { + /* Decrease ref count for all valid shared memory pointers */ + for (n = 0; n < arg.num_params; n++) + if (param_is_memref(params + n) && + params[n].u.memref.shm) + tee_shm_put(params[n].u.memref.shm); + kfree(params); + } + + return rc; +} + +static int tee_ioctl_invoke(struct tee_context *ctx, + struct tee_ioctl_buf_data __user *ubuf) +{ + int rc; + size_t n; + struct tee_ioctl_buf_data buf; + struct tee_ioctl_invoke_arg __user *uarg; + struct tee_ioctl_invoke_arg arg; + struct tee_ioctl_param __user *uparams = NULL; + struct tee_param *params = NULL; + + if (!ctx->teedev->desc->ops->invoke_func) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, sizeof(buf))) + return -EFAULT; + + if (buf.buf_len > TEE_MAX_ARG_SIZE || + buf.buf_len < sizeof(struct tee_ioctl_invoke_arg)) + return -EINVAL; + + uarg = u64_to_user_ptr(buf.buf_ptr); + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + if (sizeof(arg) + TEE_IOCTL_PARAM_SIZE(arg.num_params) != buf.buf_len) + return -EINVAL; + + if (arg.num_params) { + params = kcalloc(arg.num_params, sizeof(struct tee_param), + GFP_KERNEL); + if (!params) + return -ENOMEM; + uparams = uarg->params; + rc = params_from_user(ctx, params, arg.num_params, uparams); + if (rc) + goto out; + } + + rc = ctx->teedev->desc->ops->invoke_func(ctx, &arg, params); + if (rc) + goto out; + + if (put_user(arg.ret, &uarg->ret) || + put_user(arg.ret_origin, &uarg->ret_origin)) { + rc = -EFAULT; + goto out; + } + rc = params_to_user(uparams, arg.num_params, params); +out: + if (params) { + /* Decrease ref count for all valid shared memory pointers */ + for (n = 0; n < arg.num_params; n++) + if (param_is_memref(params + n) && + params[n].u.memref.shm) + tee_shm_put(params[n].u.memref.shm); + kfree(params); + } + return rc; +} + +static int tee_ioctl_cancel(struct tee_context *ctx, + struct tee_ioctl_cancel_arg __user *uarg) +{ + struct tee_ioctl_cancel_arg arg; + + if (!ctx->teedev->desc->ops->cancel_req) + return -EINVAL; + + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + return ctx->teedev->desc->ops->cancel_req(ctx, arg.cancel_id, + arg.session); +} + +static int +tee_ioctl_close_session(struct tee_context *ctx, + struct tee_ioctl_close_session_arg __user *uarg) +{ + struct tee_ioctl_close_session_arg arg; + + if (!ctx->teedev->desc->ops->close_session) + return -EINVAL; + + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + return ctx->teedev->desc->ops->close_session(ctx, arg.session); +} + +static int params_to_supp(struct tee_context *ctx, + struct tee_ioctl_param __user *uparams, + size_t num_params, struct tee_param *params) +{ + size_t n; + + for (n = 0; n < num_params; n++) { + struct tee_ioctl_param ip; + struct tee_param *p = params + n; + + ip.attr = p->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK; + switch (p->attr) { + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: + ip.a = p->u.value.a; + ip.b = p->u.value.b; + ip.c = p->u.value.c; + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + ip.b = p->u.memref.size; + if (!p->u.memref.shm) { + ip.a = 0; + ip.c = (u64)-1; /* invalid shm id */ + break; + } + ip.a = p->u.memref.shm_offs; + ip.c = p->u.memref.shm->id; + break; + default: + ip.a = 0; + ip.b = 0; + ip.c = 0; + break; + } + + if (copy_to_user(uparams + n, &ip, sizeof(ip))) + return -EFAULT; + } + + return 0; +} + +static int tee_ioctl_supp_recv(struct tee_context *ctx, + struct tee_ioctl_buf_data __user *ubuf) +{ + int rc; + struct tee_ioctl_buf_data buf; + struct tee_iocl_supp_recv_arg __user *uarg; + struct tee_param *params; + u32 num_params; + u32 func; + + if (!ctx->teedev->desc->ops->supp_recv) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, sizeof(buf))) + return -EFAULT; + + if (buf.buf_len > TEE_MAX_ARG_SIZE || + buf.buf_len < sizeof(struct tee_iocl_supp_recv_arg)) + return -EINVAL; + + uarg = u64_to_user_ptr(buf.buf_ptr); + if (get_user(num_params, &uarg->num_params)) + return -EFAULT; + + if (sizeof(*uarg) + TEE_IOCTL_PARAM_SIZE(num_params) != buf.buf_len) + return -EINVAL; + + params = kcalloc(num_params, sizeof(struct tee_param), GFP_KERNEL); + if (!params) + return -ENOMEM; + + rc = ctx->teedev->desc->ops->supp_recv(ctx, &func, &num_params, params); + if (rc) + goto out; + + if (put_user(func, &uarg->func) || + put_user(num_params, &uarg->num_params)) { + rc = -EFAULT; + goto out; + } + + rc = params_to_supp(ctx, uarg->params, num_params, params); +out: + kfree(params); + return rc; +} + +static int params_from_supp(struct tee_param *params, size_t num_params, + struct tee_ioctl_param __user *uparams) +{ + size_t n; + + for (n = 0; n < num_params; n++) { + struct tee_param *p = params + n; + struct tee_ioctl_param ip; + + if (copy_from_user(&ip, uparams + n, sizeof(ip))) + return -EFAULT; + + /* All unused attribute bits has to be zero */ + if (ip.attr & ~TEE_IOCTL_PARAM_ATTR_TYPE_MASK) + return -EINVAL; + + p->attr = ip.attr; + switch (ip.attr) { + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: + /* Only out and in/out values can be updated */ + p->u.value.a = ip.a; + p->u.value.b = ip.b; + p->u.value.c = ip.c; + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + /* + * Only the size of the memref can be updated. + * Since we don't have access to the original + * parameters here, only store the supplied size. + * The driver will copy the updated size into the + * original parameters. + */ + p->u.memref.shm = NULL; + p->u.memref.shm_offs = 0; + p->u.memref.size = ip.b; + break; + default: + memset(&p->u, 0, sizeof(p->u)); + break; + } + } + return 0; +} + +static int tee_ioctl_supp_send(struct tee_context *ctx, + struct tee_ioctl_buf_data __user *ubuf) +{ + long rc; + struct tee_ioctl_buf_data buf; + struct tee_iocl_supp_send_arg __user *uarg; + struct tee_param *params; + u32 num_params; + u32 ret; + + /* Not valid for this driver */ + if (!ctx->teedev->desc->ops->supp_send) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, sizeof(buf))) + return -EFAULT; + + if (buf.buf_len > TEE_MAX_ARG_SIZE || + buf.buf_len < sizeof(struct tee_iocl_supp_send_arg)) + return -EINVAL; + + uarg = u64_to_user_ptr(buf.buf_ptr); + if (get_user(ret, &uarg->ret) || + get_user(num_params, &uarg->num_params)) + return -EFAULT; + + if (sizeof(*uarg) + TEE_IOCTL_PARAM_SIZE(num_params) > buf.buf_len) + return -EINVAL; + + params = kcalloc(num_params, sizeof(struct tee_param), GFP_KERNEL); + if (!params) + return -ENOMEM; + + rc = params_from_supp(params, num_params, uarg->params); + if (rc) + goto out; + + rc = ctx->teedev->desc->ops->supp_send(ctx, ret, num_params, params); +out: + kfree(params); + return rc; +} + +static long tee_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct tee_context *ctx = filp->private_data; + void __user *uarg = (void __user *)arg; + + switch (cmd) { + case TEE_IOC_VERSION: + return tee_ioctl_version(ctx, uarg); + case TEE_IOC_SHM_ALLOC: + return tee_ioctl_shm_alloc(ctx, uarg); + case TEE_IOC_OPEN_SESSION: + return tee_ioctl_open_session(ctx, uarg); + case TEE_IOC_INVOKE: + return tee_ioctl_invoke(ctx, uarg); + case TEE_IOC_CANCEL: + return tee_ioctl_cancel(ctx, uarg); + case TEE_IOC_CLOSE_SESSION: + return tee_ioctl_close_session(ctx, uarg); + case TEE_IOC_SUPPL_RECV: + return tee_ioctl_supp_recv(ctx, uarg); + case TEE_IOC_SUPPL_SEND: + return tee_ioctl_supp_send(ctx, uarg); + default: + return -EINVAL; + } +} + +static const struct file_operations tee_fops = { + .owner = THIS_MODULE, + .open = tee_open, + .release = tee_release, + .unlocked_ioctl = tee_ioctl, + .compat_ioctl = tee_ioctl, +}; + +static void tee_release_device(struct device *dev) +{ + struct tee_device *teedev = container_of(dev, struct tee_device, dev); + + spin_lock(&driver_lock); + clear_bit(teedev->id, dev_mask); + spin_unlock(&driver_lock); + mutex_destroy(&teedev->mutex); + idr_destroy(&teedev->idr); + kfree(teedev); +} + +/** + * tee_device_alloc() - Allocate a new struct tee_device instance + * @teedesc: Descriptor for this driver + * @dev: Parent device for this device + * @pool: Shared memory pool, NULL if not used + * @driver_data: Private driver data for this device + * + * Allocates a new struct tee_device instance. The device is + * removed by tee_device_unregister(). + * + * @returns a pointer to a 'struct tee_device' or an ERR_PTR on failure + */ +struct tee_device *tee_device_alloc(const struct tee_desc *teedesc, + struct device *dev, + struct tee_shm_pool *pool, + void *driver_data) +{ + struct tee_device *teedev; + void *ret; + int rc; + int offs = 0; + + if (!teedesc || !teedesc->name || !teedesc->ops || + !teedesc->ops->get_version || !teedesc->ops->open || + !teedesc->ops->release || !pool) + return ERR_PTR(-EINVAL); + + teedev = kzalloc(sizeof(*teedev), GFP_KERNEL); + if (!teedev) { + ret = ERR_PTR(-ENOMEM); + goto err; + } + + if (teedesc->flags & TEE_DESC_PRIVILEGED) + offs = TEE_NUM_DEVICES / 2; + + spin_lock(&driver_lock); + teedev->id = find_next_zero_bit(dev_mask, TEE_NUM_DEVICES, offs); + if (teedev->id < TEE_NUM_DEVICES) + set_bit(teedev->id, dev_mask); + spin_unlock(&driver_lock); + + if (teedev->id >= TEE_NUM_DEVICES) { + ret = ERR_PTR(-ENOMEM); + goto err; + } + + snprintf(teedev->name, sizeof(teedev->name), "tee%s%d", + teedesc->flags & TEE_DESC_PRIVILEGED ? "priv" : "", + teedev->id - offs); + + teedev->dev.class = tee_class; + teedev->dev.release = tee_release_device; + teedev->dev.parent = dev; + + teedev->dev.devt = MKDEV(MAJOR(tee_devt), teedev->id); + + rc = dev_set_name(&teedev->dev, "%s", teedev->name); + if (rc) { + ret = ERR_PTR(rc); + goto err_devt; + } + + cdev_init(&teedev->cdev, &tee_fops); + teedev->cdev.owner = teedesc->owner; + teedev->cdev.kobj.parent = &teedev->dev.kobj; + + dev_set_drvdata(&teedev->dev, driver_data); + device_initialize(&teedev->dev); + + /* 1 as tee_device_unregister() does one final tee_device_put() */ + teedev->num_users = 1; + init_completion(&teedev->c_no_users); + mutex_init(&teedev->mutex); + idr_init(&teedev->idr); + + teedev->desc = teedesc; + teedev->pool = pool; + + return teedev; +err_devt: + unregister_chrdev_region(teedev->dev.devt, 1); +err: + pr_err("could not register %s driver\n", + teedesc->flags & TEE_DESC_PRIVILEGED ? "privileged" : "client"); + if (teedev && teedev->id < TEE_NUM_DEVICES) { + spin_lock(&driver_lock); + clear_bit(teedev->id, dev_mask); + spin_unlock(&driver_lock); + } + kfree(teedev); + return ret; +} +EXPORT_SYMBOL_GPL(tee_device_alloc); + +static ssize_t implementation_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct tee_device *teedev = container_of(dev, struct tee_device, dev); + struct tee_ioctl_version_data vers; + + teedev->desc->ops->get_version(teedev, &vers); + return scnprintf(buf, PAGE_SIZE, "%d\n", vers.impl_id); +} +static DEVICE_ATTR_RO(implementation_id); + +static struct attribute *tee_dev_attrs[] = { + &dev_attr_implementation_id.attr, + NULL +}; + +static const struct attribute_group tee_dev_group = { + .attrs = tee_dev_attrs, +}; + +/** + * tee_device_register() - Registers a TEE device + * @teedev: Device to register + * + * tee_device_unregister() need to be called to remove the @teedev if + * this function fails. + * + * @returns < 0 on failure + */ +int tee_device_register(struct tee_device *teedev) +{ + int rc; + + if (teedev->flags & TEE_DEVICE_FLAG_REGISTERED) { + dev_err(&teedev->dev, "attempt to register twice\n"); + return -EINVAL; + } + + rc = cdev_add(&teedev->cdev, teedev->dev.devt, 1); + if (rc) { + dev_err(&teedev->dev, + "unable to cdev_add() %s, major %d, minor %d, err=%d\n", + teedev->name, MAJOR(teedev->dev.devt), + MINOR(teedev->dev.devt), rc); + return rc; + } + + rc = device_add(&teedev->dev); + if (rc) { + dev_err(&teedev->dev, + "unable to device_add() %s, major %d, minor %d, err=%d\n", + teedev->name, MAJOR(teedev->dev.devt), + MINOR(teedev->dev.devt), rc); + goto err_device_add; + } + + rc = sysfs_create_group(&teedev->dev.kobj, &tee_dev_group); + if (rc) { + dev_err(&teedev->dev, + "failed to create sysfs attributes, err=%d\n", rc); + goto err_sysfs_create_group; + } + + teedev->flags |= TEE_DEVICE_FLAG_REGISTERED; + return 0; + +err_sysfs_create_group: + device_del(&teedev->dev); +err_device_add: + cdev_del(&teedev->cdev); + return rc; +} +EXPORT_SYMBOL_GPL(tee_device_register); + +void tee_device_put(struct tee_device *teedev) +{ + mutex_lock(&teedev->mutex); + /* Shouldn't put in this state */ + if (!WARN_ON(!teedev->desc)) { + teedev->num_users--; + if (!teedev->num_users) { + teedev->desc = NULL; + complete(&teedev->c_no_users); + } + } + mutex_unlock(&teedev->mutex); +} + +bool tee_device_get(struct tee_device *teedev) +{ + mutex_lock(&teedev->mutex); + if (!teedev->desc) { + mutex_unlock(&teedev->mutex); + return false; + } + teedev->num_users++; + mutex_unlock(&teedev->mutex); + return true; +} + +/** + * tee_device_unregister() - Removes a TEE device + * @teedev: Device to unregister + * + * This function should be called to remove the @teedev even if + * tee_device_register() hasn't been called yet. Does nothing if + * @teedev is NULL. + */ +void tee_device_unregister(struct tee_device *teedev) +{ + if (!teedev) + return; + + if (teedev->flags & TEE_DEVICE_FLAG_REGISTERED) { + sysfs_remove_group(&teedev->dev.kobj, &tee_dev_group); + cdev_del(&teedev->cdev); + device_del(&teedev->dev); + } + + tee_device_put(teedev); + wait_for_completion(&teedev->c_no_users); + + /* + * No need to take a mutex any longer now since teedev->desc was + * set to NULL before teedev->c_no_users was completed. + */ + + teedev->pool = NULL; + + put_device(&teedev->dev); +} +EXPORT_SYMBOL_GPL(tee_device_unregister); + +/** + * tee_get_drvdata() - Return driver_data pointer + * @teedev: Device containing the driver_data pointer + * @returns the driver_data pointer supplied to tee_register(). + */ +void *tee_get_drvdata(struct tee_device *teedev) +{ + return dev_get_drvdata(&teedev->dev); +} +EXPORT_SYMBOL_GPL(tee_get_drvdata); + +static int __init tee_init(void) +{ + int rc; + + tee_class = class_create(THIS_MODULE, "tee"); + if (IS_ERR(tee_class)) { + pr_err("couldn't create class\n"); + return PTR_ERR(tee_class); + } + + rc = alloc_chrdev_region(&tee_devt, 0, TEE_NUM_DEVICES, "tee"); + if (rc) { + pr_err("failed to allocate char dev region\n"); + class_destroy(tee_class); + tee_class = NULL; + } + + return rc; +} + +static void __exit tee_exit(void) +{ + class_destroy(tee_class); + tee_class = NULL; + unregister_chrdev_region(tee_devt, TEE_NUM_DEVICES); +} + +subsys_initcall(tee_init); +module_exit(tee_exit); + +MODULE_AUTHOR("Linaro"); +MODULE_DESCRIPTION("TEE Driver"); +MODULE_VERSION("1.0"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/tee/tee_private.h b/drivers/tee/tee_private.h new file mode 100644 index 000000000000..21cb6be8bce9 --- /dev/null +++ b/drivers/tee/tee_private.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef TEE_PRIVATE_H +#define TEE_PRIVATE_H + +#include +#include +#include +#include +#include +#include + +struct tee_device; + +/** + * struct tee_shm - shared memory object + * @teedev: device used to allocate the object + * @ctx: context using the object, if NULL the context is gone + * @link link element + * @paddr: physical address of the shared memory + * @kaddr: virtual address of the shared memory + * @size: size of shared memory + * @dmabuf: dmabuf used to for exporting to user space + * @flags: defined by TEE_SHM_* in tee_drv.h + * @id: unique id of a shared memory object on this device + */ +struct tee_shm { + struct tee_device *teedev; + struct tee_context *ctx; + struct list_head link; + phys_addr_t paddr; + void *kaddr; + size_t size; + struct dma_buf *dmabuf; + u32 flags; + int id; +}; + +struct tee_shm_pool_mgr; + +/** + * struct tee_shm_pool_mgr_ops - shared memory pool manager operations + * @alloc: called when allocating shared memory + * @free: called when freeing shared memory + */ +struct tee_shm_pool_mgr_ops { + int (*alloc)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm, + size_t size); + void (*free)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm); +}; + +/** + * struct tee_shm_pool_mgr - shared memory manager + * @ops: operations + * @private_data: private data for the shared memory manager + */ +struct tee_shm_pool_mgr { + const struct tee_shm_pool_mgr_ops *ops; + void *private_data; +}; + +/** + * struct tee_shm_pool - shared memory pool + * @private_mgr: pool manager for shared memory only between kernel + * and secure world + * @dma_buf_mgr: pool manager for shared memory exported to user space + * @destroy: called when destroying the pool + * @private_data: private data for the pool + */ +struct tee_shm_pool { + struct tee_shm_pool_mgr private_mgr; + struct tee_shm_pool_mgr dma_buf_mgr; + void (*destroy)(struct tee_shm_pool *pool); + void *private_data; +}; + +#define TEE_DEVICE_FLAG_REGISTERED 0x1 +#define TEE_MAX_DEV_NAME_LEN 32 + +/** + * struct tee_device - TEE Device representation + * @name: name of device + * @desc: description of device + * @id: unique id of device + * @flags: represented by TEE_DEVICE_FLAG_REGISTERED above + * @dev: embedded basic device structure + * @cdev: embedded cdev + * @num_users: number of active users of this device + * @c_no_user: completion used when unregistering the device + * @mutex: mutex protecting @num_users and @idr + * @idr: register of shared memory object allocated on this device + * @pool: shared memory pool + */ +struct tee_device { + char name[TEE_MAX_DEV_NAME_LEN]; + const struct tee_desc *desc; + int id; + unsigned int flags; + + struct device dev; + struct cdev cdev; + + size_t num_users; + struct completion c_no_users; + struct mutex mutex; /* protects num_users and idr */ + + struct idr idr; + struct tee_shm_pool *pool; +}; + +int tee_shm_init(void); + +int tee_shm_get_fd(struct tee_shm *shm); + +bool tee_device_get(struct tee_device *teedev); +void tee_device_put(struct tee_device *teedev); + +#endif /*TEE_PRIVATE_H*/ diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c new file mode 100644 index 000000000000..0be1e3e93bee --- /dev/null +++ b/drivers/tee/tee_shm.c @@ -0,0 +1,358 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include "tee_private.h" + +static void tee_shm_release(struct tee_shm *shm) +{ + struct tee_device *teedev = shm->teedev; + struct tee_shm_pool_mgr *poolm; + + mutex_lock(&teedev->mutex); + idr_remove(&teedev->idr, shm->id); + if (shm->ctx) + list_del(&shm->link); + mutex_unlock(&teedev->mutex); + + if (shm->flags & TEE_SHM_DMA_BUF) + poolm = &teedev->pool->dma_buf_mgr; + else + poolm = &teedev->pool->private_mgr; + + poolm->ops->free(poolm, shm); + kfree(shm); + + tee_device_put(teedev); +} + +static struct sg_table *tee_shm_op_map_dma_buf(struct dma_buf_attachment + *attach, enum dma_data_direction dir) +{ + return NULL; +} + +static void tee_shm_op_unmap_dma_buf(struct dma_buf_attachment *attach, + struct sg_table *table, + enum dma_data_direction dir) +{ +} + +static void tee_shm_op_release(struct dma_buf *dmabuf) +{ + struct tee_shm *shm = dmabuf->priv; + + tee_shm_release(shm); +} + +static void *tee_shm_op_kmap_atomic(struct dma_buf *dmabuf, unsigned long pgnum) +{ + return NULL; +} + +static void *tee_shm_op_kmap(struct dma_buf *dmabuf, unsigned long pgnum) +{ + return NULL; +} + +static int tee_shm_op_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) +{ + struct tee_shm *shm = dmabuf->priv; + size_t size = vma->vm_end - vma->vm_start; + + return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT, + size, vma->vm_page_prot); +} + +static struct dma_buf_ops tee_shm_dma_buf_ops = { + .map_dma_buf = tee_shm_op_map_dma_buf, + .unmap_dma_buf = tee_shm_op_unmap_dma_buf, + .release = tee_shm_op_release, + .kmap_atomic = tee_shm_op_kmap_atomic, + .kmap = tee_shm_op_kmap, + .mmap = tee_shm_op_mmap, +}; + +/** + * tee_shm_alloc() - Allocate shared memory + * @ctx: Context that allocates the shared memory + * @size: Requested size of shared memory + * @flags: Flags setting properties for the requested shared memory. + * + * Memory allocated as global shared memory is automatically freed when the + * TEE file pointer is closed. The @flags field uses the bits defined by + * TEE_SHM_* in . TEE_SHM_MAPPED must currently always be + * set. If TEE_SHM_DMA_BUF global shared memory will be allocated and + * associated with a dma-buf handle, else driver private memory. + */ +struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) +{ + struct tee_device *teedev = ctx->teedev; + struct tee_shm_pool_mgr *poolm = NULL; + struct tee_shm *shm; + void *ret; + int rc; + + if (!(flags & TEE_SHM_MAPPED)) { + dev_err(teedev->dev.parent, + "only mapped allocations supported\n"); + return ERR_PTR(-EINVAL); + } + + if ((flags & ~(TEE_SHM_MAPPED | TEE_SHM_DMA_BUF))) { + dev_err(teedev->dev.parent, "invalid shm flags 0x%x", flags); + return ERR_PTR(-EINVAL); + } + + if (!tee_device_get(teedev)) + return ERR_PTR(-EINVAL); + + if (!teedev->pool) { + /* teedev has been detached from driver */ + ret = ERR_PTR(-EINVAL); + goto err_dev_put; + } + + shm = kzalloc(sizeof(*shm), GFP_KERNEL); + if (!shm) { + ret = ERR_PTR(-ENOMEM); + goto err_dev_put; + } + + shm->flags = flags; + shm->teedev = teedev; + shm->ctx = ctx; + if (flags & TEE_SHM_DMA_BUF) + poolm = &teedev->pool->dma_buf_mgr; + else + poolm = &teedev->pool->private_mgr; + + rc = poolm->ops->alloc(poolm, shm, size); + if (rc) { + ret = ERR_PTR(rc); + goto err_kfree; + } + + mutex_lock(&teedev->mutex); + shm->id = idr_alloc(&teedev->idr, shm, 1, 0, GFP_KERNEL); + mutex_unlock(&teedev->mutex); + if (shm->id < 0) { + ret = ERR_PTR(shm->id); + goto err_pool_free; + } + + if (flags & TEE_SHM_DMA_BUF) { + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + + exp_info.ops = &tee_shm_dma_buf_ops; + exp_info.size = shm->size; + exp_info.flags = O_RDWR; + exp_info.priv = shm; + + shm->dmabuf = dma_buf_export(&exp_info); + if (IS_ERR(shm->dmabuf)) { + ret = ERR_CAST(shm->dmabuf); + goto err_rem; + } + } + mutex_lock(&teedev->mutex); + list_add_tail(&shm->link, &ctx->list_shm); + mutex_unlock(&teedev->mutex); + + return shm; +err_rem: + mutex_lock(&teedev->mutex); + idr_remove(&teedev->idr, shm->id); + mutex_unlock(&teedev->mutex); +err_pool_free: + poolm->ops->free(poolm, shm); +err_kfree: + kfree(shm); +err_dev_put: + tee_device_put(teedev); + return ret; +} +EXPORT_SYMBOL_GPL(tee_shm_alloc); + +/** + * tee_shm_get_fd() - Increase reference count and return file descriptor + * @shm: Shared memory handle + * @returns user space file descriptor to shared memory + */ +int tee_shm_get_fd(struct tee_shm *shm) +{ + u32 req_flags = TEE_SHM_MAPPED | TEE_SHM_DMA_BUF; + int fd; + + if ((shm->flags & req_flags) != req_flags) + return -EINVAL; + + fd = dma_buf_fd(shm->dmabuf, O_CLOEXEC); + if (fd >= 0) + get_dma_buf(shm->dmabuf); + return fd; +} + +/** + * tee_shm_free() - Free shared memory + * @shm: Handle to shared memory to free + */ +void tee_shm_free(struct tee_shm *shm) +{ + /* + * dma_buf_put() decreases the dmabuf reference counter and will + * call tee_shm_release() when the last reference is gone. + * + * In the case of driver private memory we call tee_shm_release + * directly instead as it doesn't have a reference counter. + */ + if (shm->flags & TEE_SHM_DMA_BUF) + dma_buf_put(shm->dmabuf); + else + tee_shm_release(shm); +} +EXPORT_SYMBOL_GPL(tee_shm_free); + +/** + * tee_shm_va2pa() - Get physical address of a virtual address + * @shm: Shared memory handle + * @va: Virtual address to tranlsate + * @pa: Returned physical address + * @returns 0 on success and < 0 on failure + */ +int tee_shm_va2pa(struct tee_shm *shm, void *va, phys_addr_t *pa) +{ + /* Check that we're in the range of the shm */ + if ((char *)va < (char *)shm->kaddr) + return -EINVAL; + if ((char *)va >= ((char *)shm->kaddr + shm->size)) + return -EINVAL; + + return tee_shm_get_pa( + shm, (unsigned long)va - (unsigned long)shm->kaddr, pa); +} +EXPORT_SYMBOL_GPL(tee_shm_va2pa); + +/** + * tee_shm_pa2va() - Get virtual address of a physical address + * @shm: Shared memory handle + * @pa: Physical address to tranlsate + * @va: Returned virtual address + * @returns 0 on success and < 0 on failure + */ +int tee_shm_pa2va(struct tee_shm *shm, phys_addr_t pa, void **va) +{ + /* Check that we're in the range of the shm */ + if (pa < shm->paddr) + return -EINVAL; + if (pa >= (shm->paddr + shm->size)) + return -EINVAL; + + if (va) { + void *v = tee_shm_get_va(shm, pa - shm->paddr); + + if (IS_ERR(v)) + return PTR_ERR(v); + *va = v; + } + return 0; +} +EXPORT_SYMBOL_GPL(tee_shm_pa2va); + +/** + * tee_shm_get_va() - Get virtual address of a shared memory plus an offset + * @shm: Shared memory handle + * @offs: Offset from start of this shared memory + * @returns virtual address of the shared memory + offs if offs is within + * the bounds of this shared memory, else an ERR_PTR + */ +void *tee_shm_get_va(struct tee_shm *shm, size_t offs) +{ + if (offs >= shm->size) + return ERR_PTR(-EINVAL); + return (char *)shm->kaddr + offs; +} +EXPORT_SYMBOL_GPL(tee_shm_get_va); + +/** + * tee_shm_get_pa() - Get physical address of a shared memory plus an offset + * @shm: Shared memory handle + * @offs: Offset from start of this shared memory + * @pa: Physical address to return + * @returns 0 if offs is within the bounds of this shared memory, else an + * error code. + */ +int tee_shm_get_pa(struct tee_shm *shm, size_t offs, phys_addr_t *pa) +{ + if (offs >= shm->size) + return -EINVAL; + if (pa) + *pa = shm->paddr + offs; + return 0; +} +EXPORT_SYMBOL_GPL(tee_shm_get_pa); + +/** + * tee_shm_get_from_id() - Find shared memory object and increase reference + * count + * @ctx: Context owning the shared memory + * @id: Id of shared memory object + * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure + */ +struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id) +{ + struct tee_device *teedev; + struct tee_shm *shm; + + if (!ctx) + return ERR_PTR(-EINVAL); + + teedev = ctx->teedev; + mutex_lock(&teedev->mutex); + shm = idr_find(&teedev->idr, id); + if (!shm || shm->ctx != ctx) + shm = ERR_PTR(-EINVAL); + else if (shm->flags & TEE_SHM_DMA_BUF) + get_dma_buf(shm->dmabuf); + mutex_unlock(&teedev->mutex); + return shm; +} +EXPORT_SYMBOL_GPL(tee_shm_get_from_id); + +/** + * tee_shm_get_id() - Get id of a shared memory object + * @shm: Shared memory handle + * @returns id + */ +int tee_shm_get_id(struct tee_shm *shm) +{ + return shm->id; +} +EXPORT_SYMBOL_GPL(tee_shm_get_id); + +/** + * tee_shm_put() - Decrease reference count on a shared memory handle + * @shm: Shared memory handle + */ +void tee_shm_put(struct tee_shm *shm) +{ + if (shm->flags & TEE_SHM_DMA_BUF) + dma_buf_put(shm->dmabuf); +} +EXPORT_SYMBOL_GPL(tee_shm_put); diff --git a/drivers/tee/tee_shm_pool.c b/drivers/tee/tee_shm_pool.c new file mode 100644 index 000000000000..fb4f8522a526 --- /dev/null +++ b/drivers/tee/tee_shm_pool.c @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include +#include +#include "tee_private.h" + +static int pool_op_gen_alloc(struct tee_shm_pool_mgr *poolm, + struct tee_shm *shm, size_t size) +{ + unsigned long va; + struct gen_pool *genpool = poolm->private_data; + size_t s = roundup(size, 1 << genpool->min_alloc_order); + + va = gen_pool_alloc(genpool, s); + if (!va) + return -ENOMEM; + + memset((void *)va, 0, s); + shm->kaddr = (void *)va; + shm->paddr = gen_pool_virt_to_phys(genpool, va); + shm->size = s; + return 0; +} + +static void pool_op_gen_free(struct tee_shm_pool_mgr *poolm, + struct tee_shm *shm) +{ + gen_pool_free(poolm->private_data, (unsigned long)shm->kaddr, + shm->size); + shm->kaddr = NULL; +} + +static const struct tee_shm_pool_mgr_ops pool_ops_generic = { + .alloc = pool_op_gen_alloc, + .free = pool_op_gen_free, +}; + +static void pool_res_mem_destroy(struct tee_shm_pool *pool) +{ + gen_pool_destroy(pool->private_mgr.private_data); + gen_pool_destroy(pool->dma_buf_mgr.private_data); +} + +static int pool_res_mem_mgr_init(struct tee_shm_pool_mgr *mgr, + struct tee_shm_pool_mem_info *info, + int min_alloc_order) +{ + size_t page_mask = PAGE_SIZE - 1; + struct gen_pool *genpool = NULL; + int rc; + + /* + * Start and end must be page aligned + */ + if ((info->vaddr & page_mask) || (info->paddr & page_mask) || + (info->size & page_mask)) + return -EINVAL; + + genpool = gen_pool_create(min_alloc_order, -1); + if (!genpool) + return -ENOMEM; + + gen_pool_set_algo(genpool, gen_pool_best_fit, NULL); + rc = gen_pool_add_virt(genpool, info->vaddr, info->paddr, info->size, + -1); + if (rc) { + gen_pool_destroy(genpool); + return rc; + } + + mgr->private_data = genpool; + mgr->ops = &pool_ops_generic; + return 0; +} + +/** + * tee_shm_pool_alloc_res_mem() - Create a shared memory pool from reserved + * memory range + * @priv_info: Information for driver private shared memory pool + * @dmabuf_info: Information for dma-buf shared memory pool + * + * Start and end of pools will must be page aligned. + * + * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied + * in @dmabuf, others will use the range provided by @priv. + * + * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. + */ +struct tee_shm_pool * +tee_shm_pool_alloc_res_mem(struct tee_shm_pool_mem_info *priv_info, + struct tee_shm_pool_mem_info *dmabuf_info) +{ + struct tee_shm_pool *pool = NULL; + int ret; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) { + ret = -ENOMEM; + goto err; + } + + /* + * Create the pool for driver private shared memory + */ + ret = pool_res_mem_mgr_init(&pool->private_mgr, priv_info, + 3 /* 8 byte aligned */); + if (ret) + goto err; + + /* + * Create the pool for dma_buf shared memory + */ + ret = pool_res_mem_mgr_init(&pool->dma_buf_mgr, dmabuf_info, + PAGE_SHIFT); + if (ret) + goto err; + + pool->destroy = pool_res_mem_destroy; + return pool; +err: + if (ret == -ENOMEM) + pr_err("%s: can't allocate memory for res_mem shared memory pool\n", __func__); + if (pool && pool->private_mgr.private_data) + gen_pool_destroy(pool->private_mgr.private_data); + kfree(pool); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(tee_shm_pool_alloc_res_mem); + +/** + * tee_shm_pool_free() - Free a shared memory pool + * @pool: The shared memory pool to free + * + * There must be no remaining shared memory allocated from this pool when + * this function is called. + */ +void tee_shm_pool_free(struct tee_shm_pool *pool) +{ + pool->destroy(pool); + kfree(pool); +} +EXPORT_SYMBOL_GPL(tee_shm_pool_free); diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h new file mode 100644 index 000000000000..0f175b8f6456 --- /dev/null +++ b/include/linux/tee_drv.h @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __TEE_DRV_H +#define __TEE_DRV_H + +#include +#include +#include +#include + +/* + * The file describes the API provided by the generic TEE driver to the + * specific TEE driver. + */ + +#define TEE_SHM_MAPPED 0x1 /* Memory mapped by the kernel */ +#define TEE_SHM_DMA_BUF 0x2 /* Memory with dma-buf handle */ + +struct tee_device; +struct tee_shm; +struct tee_shm_pool; + +/** + * struct tee_context - driver specific context on file pointer data + * @teedev: pointer to this drivers struct tee_device + * @list_shm: List of shared memory object owned by this context + * @data: driver specific context data, managed by the driver + */ +struct tee_context { + struct tee_device *teedev; + struct list_head list_shm; + void *data; +}; + +struct tee_param_memref { + size_t shm_offs; + size_t size; + struct tee_shm *shm; +}; + +struct tee_param_value { + u64 a; + u64 b; + u64 c; +}; + +struct tee_param { + u64 attr; + union { + struct tee_param_memref memref; + struct tee_param_value value; + } u; +}; + +/** + * struct tee_driver_ops - driver operations vtable + * @get_version: returns version of driver + * @open: called when the device file is opened + * @release: release this open file + * @open_session: open a new session + * @close_session: close a session + * @invoke_func: invoke a trusted function + * @cancel_req: request cancel of an ongoing invoke or open + * @supp_revc: called for supplicant to get a command + * @supp_send: called for supplicant to send a response + */ +struct tee_driver_ops { + void (*get_version)(struct tee_device *teedev, + struct tee_ioctl_version_data *vers); + int (*open)(struct tee_context *ctx); + void (*release)(struct tee_context *ctx); + int (*open_session)(struct tee_context *ctx, + struct tee_ioctl_open_session_arg *arg, + struct tee_param *param); + int (*close_session)(struct tee_context *ctx, u32 session); + int (*invoke_func)(struct tee_context *ctx, + struct tee_ioctl_invoke_arg *arg, + struct tee_param *param); + int (*cancel_req)(struct tee_context *ctx, u32 cancel_id, u32 session); + int (*supp_recv)(struct tee_context *ctx, u32 *func, u32 *num_params, + struct tee_param *param); + int (*supp_send)(struct tee_context *ctx, u32 ret, u32 num_params, + struct tee_param *param); +}; + +/** + * struct tee_desc - Describes the TEE driver to the subsystem + * @name: name of driver + * @ops: driver operations vtable + * @owner: module providing the driver + * @flags: Extra properties of driver, defined by TEE_DESC_* below + */ +#define TEE_DESC_PRIVILEGED 0x1 +struct tee_desc { + const char *name; + const struct tee_driver_ops *ops; + struct module *owner; + u32 flags; +}; + +/** + * tee_device_alloc() - Allocate a new struct tee_device instance + * @teedesc: Descriptor for this driver + * @dev: Parent device for this device + * @pool: Shared memory pool, NULL if not used + * @driver_data: Private driver data for this device + * + * Allocates a new struct tee_device instance. The device is + * removed by tee_device_unregister(). + * + * @returns a pointer to a 'struct tee_device' or an ERR_PTR on failure + */ +struct tee_device *tee_device_alloc(const struct tee_desc *teedesc, + struct device *dev, + struct tee_shm_pool *pool, + void *driver_data); + +/** + * tee_device_register() - Registers a TEE device + * @teedev: Device to register + * + * tee_device_unregister() need to be called to remove the @teedev if + * this function fails. + * + * @returns < 0 on failure + */ +int tee_device_register(struct tee_device *teedev); + +/** + * tee_device_unregister() - Removes a TEE device + * @teedev: Device to unregister + * + * This function should be called to remove the @teedev even if + * tee_device_register() hasn't been called yet. Does nothing if + * @teedev is NULL. + */ +void tee_device_unregister(struct tee_device *teedev); + +/** + * struct tee_shm_pool_mem_info - holds information needed to create a shared + * memory pool + * @vaddr: Virtual address of start of pool + * @paddr: Physical address of start of pool + * @size: Size in bytes of the pool + */ +struct tee_shm_pool_mem_info { + unsigned long vaddr; + phys_addr_t paddr; + size_t size; +}; + +/** + * tee_shm_pool_alloc_res_mem() - Create a shared memory pool from reserved + * memory range + * @priv_info: Information for driver private shared memory pool + * @dmabuf_info: Information for dma-buf shared memory pool + * + * Start and end of pools will must be page aligned. + * + * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied + * in @dmabuf, others will use the range provided by @priv. + * + * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. + */ +struct tee_shm_pool * +tee_shm_pool_alloc_res_mem(struct tee_shm_pool_mem_info *priv_info, + struct tee_shm_pool_mem_info *dmabuf_info); + +/** + * tee_shm_pool_free() - Free a shared memory pool + * @pool: The shared memory pool to free + * + * The must be no remaining shared memory allocated from this pool when + * this function is called. + */ +void tee_shm_pool_free(struct tee_shm_pool *pool); + +/** + * tee_get_drvdata() - Return driver_data pointer + * @returns the driver_data pointer supplied to tee_register(). + */ +void *tee_get_drvdata(struct tee_device *teedev); + +/** + * tee_shm_alloc() - Allocate shared memory + * @ctx: Context that allocates the shared memory + * @size: Requested size of shared memory + * @flags: Flags setting properties for the requested shared memory. + * + * Memory allocated as global shared memory is automatically freed when the + * TEE file pointer is closed. The @flags field uses the bits defined by + * TEE_SHM_* above. TEE_SHM_MAPPED must currently always be set. If + * TEE_SHM_DMA_BUF global shared memory will be allocated and associated + * with a dma-buf handle, else driver private memory. + * + * @returns a pointer to 'struct tee_shm' + */ +struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags); + +/** + * tee_shm_free() - Free shared memory + * @shm: Handle to shared memory to free + */ +void tee_shm_free(struct tee_shm *shm); + +/** + * tee_shm_put() - Decrease reference count on a shared memory handle + * @shm: Shared memory handle + */ +void tee_shm_put(struct tee_shm *shm); + +/** + * tee_shm_va2pa() - Get physical address of a virtual address + * @shm: Shared memory handle + * @va: Virtual address to tranlsate + * @pa: Returned physical address + * @returns 0 on success and < 0 on failure + */ +int tee_shm_va2pa(struct tee_shm *shm, void *va, phys_addr_t *pa); + +/** + * tee_shm_pa2va() - Get virtual address of a physical address + * @shm: Shared memory handle + * @pa: Physical address to tranlsate + * @va: Returned virtual address + * @returns 0 on success and < 0 on failure + */ +int tee_shm_pa2va(struct tee_shm *shm, phys_addr_t pa, void **va); + +/** + * tee_shm_get_va() - Get virtual address of a shared memory plus an offset + * @shm: Shared memory handle + * @offs: Offset from start of this shared memory + * @returns virtual address of the shared memory + offs if offs is within + * the bounds of this shared memory, else an ERR_PTR + */ +void *tee_shm_get_va(struct tee_shm *shm, size_t offs); + +/** + * tee_shm_get_pa() - Get physical address of a shared memory plus an offset + * @shm: Shared memory handle + * @offs: Offset from start of this shared memory + * @pa: Physical address to return + * @returns 0 if offs is within the bounds of this shared memory, else an + * error code. + */ +int tee_shm_get_pa(struct tee_shm *shm, size_t offs, phys_addr_t *pa); + +/** + * tee_shm_get_id() - Get id of a shared memory object + * @shm: Shared memory handle + * @returns id + */ +int tee_shm_get_id(struct tee_shm *shm); + +/** + * tee_shm_get_from_id() - Find shared memory object and increase reference + * count + * @ctx: Context owning the shared memory + * @id: Id of shared memory object + * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure + */ +struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id); + +#endif /*__TEE_DRV_H*/ diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h new file mode 100644 index 000000000000..370d8845ab21 --- /dev/null +++ b/include/uapi/linux/tee.h @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __TEE_H +#define __TEE_H + +#include +#include + +/* + * This file describes the API provided by a TEE driver to user space. + * + * Each TEE driver defines a TEE specific protocol which is used for the + * data passed back and forth using TEE_IOC_CMD. + */ + +/* Helpers to make the ioctl defines */ +#define TEE_IOC_MAGIC 0xa4 +#define TEE_IOC_BASE 0 + +/* Flags relating to shared memory */ +#define TEE_IOCTL_SHM_MAPPED 0x1 /* memory mapped in normal world */ +#define TEE_IOCTL_SHM_DMA_BUF 0x2 /* dma-buf handle on shared memory */ + +#define TEE_MAX_ARG_SIZE 1024 + +#define TEE_GEN_CAP_GP (1 << 0)/* GlobalPlatform compliant TEE */ + +/* + * TEE Implementation ID + */ +#define TEE_IMPL_ID_OPTEE 1 + +/* + * OP-TEE specific capabilities + */ +#define TEE_OPTEE_CAP_TZ (1 << 0) + +/** + * struct tee_ioctl_version_data - TEE version + * @impl_id: [out] TEE implementation id + * @impl_caps: [out] Implementation specific capabilities + * @gen_caps: [out] Generic capabilities, defined by TEE_GEN_CAPS_* above + * + * Identifies the TEE implementation, @impl_id is one of TEE_IMPL_ID_* above. + * @impl_caps is implementation specific, for example TEE_OPTEE_CAP_* + * is valid when @impl_id == TEE_IMPL_ID_OPTEE. + */ +struct tee_ioctl_version_data { + __u32 impl_id; + __u32 impl_caps; + __u32 gen_caps; +}; + +/** + * TEE_IOC_VERSION - query version of TEE + * + * Takes a tee_ioctl_version_data struct and returns with the TEE version + * data filled in. + */ +#define TEE_IOC_VERSION _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 0, \ + struct tee_ioctl_version_data) + +/** + * struct tee_ioctl_shm_alloc_data - Shared memory allocate argument + * @size: [in/out] Size of shared memory to allocate + * @flags: [in/out] Flags to/from allocation. + * @id: [out] Identifier of the shared memory + * + * The flags field should currently be zero as input. Updated by the call + * with actual flags as defined by TEE_IOCTL_SHM_* above. + * This structure is used as argument for TEE_IOC_SHM_ALLOC below. + */ +struct tee_ioctl_shm_alloc_data { + __u64 size; + __u32 flags; + __s32 id; +}; + +/** + * TEE_IOC_SHM_ALLOC - allocate shared memory + * + * Allocates shared memory between the user space process and secure OS. + * + * Returns a file descriptor on success or < 0 on failure + * + * The returned file descriptor is used to map the shared memory into user + * space. The shared memory is freed when the descriptor is closed and the + * memory is unmapped. + */ +#define TEE_IOC_SHM_ALLOC _IOWR(TEE_IOC_MAGIC, TEE_IOC_BASE + 1, \ + struct tee_ioctl_shm_alloc_data) + +/** + * struct tee_ioctl_buf_data - Variable sized buffer + * @buf_ptr: [in] A __user pointer to a buffer + * @buf_len: [in] Length of the buffer above + * + * Used as argument for TEE_IOC_OPEN_SESSION, TEE_IOC_INVOKE, + * TEE_IOC_SUPPL_RECV, and TEE_IOC_SUPPL_SEND below. + */ +struct tee_ioctl_buf_data { + __u64 buf_ptr; + __u64 buf_len; +}; + +/* + * Attributes for struct tee_ioctl_param, selects field in the union + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_NONE 0 /* parameter not used */ + +/* + * These defines value parameters (struct tee_ioctl_param_value) + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT 1 +#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT 2 +#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT 3 /* input and output */ + +/* + * These defines shared memory reference parameters (struct + * tee_ioctl_param_memref) + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT 5 +#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT 6 +#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT 7 /* input and output */ + +/* + * Mask for the type part of the attribute, leaves room for more types + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_MASK 0xff + +/* + * Matches TEEC_LOGIN_* in GP TEE Client API + * Are only defined for GP compliant TEEs + */ +#define TEE_IOCTL_LOGIN_PUBLIC 0 +#define TEE_IOCTL_LOGIN_USER 1 +#define TEE_IOCTL_LOGIN_GROUP 2 +#define TEE_IOCTL_LOGIN_APPLICATION 4 +#define TEE_IOCTL_LOGIN_USER_APPLICATION 5 +#define TEE_IOCTL_LOGIN_GROUP_APPLICATION 6 + +/** + * struct tee_ioctl_param - parameter + * @attr: attributes + * @a: if a memref, offset into the shared memory object, else a value parameter + * @b: if a memref, size of the buffer, else a value parameter + * @c: if a memref, shared memory identifier, else a value parameter + * + * @attr & TEE_PARAM_ATTR_TYPE_MASK indicates if memref or value is used in + * the union. TEE_PARAM_ATTR_TYPE_VALUE_* indicates value and + * TEE_PARAM_ATTR_TYPE_MEMREF_* indicates memref. TEE_PARAM_ATTR_TYPE_NONE + * indicates that none of the members are used. + * + * Shared memory is allocated with TEE_IOC_SHM_ALLOC which returns an + * identifier representing the shared memory object. A memref can reference + * a part of a shared memory by specifying an offset (@a) and size (@b) of + * the object. To supply the entire shared memory object set the offset + * (@a) to 0 and size (@b) to the previously returned size of the object. + */ +struct tee_ioctl_param { + __u64 attr; + __u64 a; + __u64 b; + __u64 c; +}; + +#define TEE_IOCTL_UUID_LEN 16 + +/** + * struct tee_ioctl_open_session_arg - Open session argument + * @uuid: [in] UUID of the Trusted Application + * @clnt_uuid: [in] UUID of client + * @clnt_login: [in] Login class of client, TEE_IOCTL_LOGIN_* above + * @cancel_id: [in] Cancellation id, a unique value to identify this request + * @session: [out] Session id + * @ret: [out] return value + * @ret_origin [out] origin of the return value + * @num_params [in] number of parameters following this struct + */ +struct tee_ioctl_open_session_arg { + __u8 uuid[TEE_IOCTL_UUID_LEN]; + __u8 clnt_uuid[TEE_IOCTL_UUID_LEN]; + __u32 clnt_login; + __u32 cancel_id; + __u32 session; + __u32 ret; + __u32 ret_origin; + __u32 num_params; + /* num_params tells the actual number of element in params */ + struct tee_ioctl_param params[]; +}; + +/** + * TEE_IOC_OPEN_SESSION - opens a session to a Trusted Application + * + * Takes a struct tee_ioctl_buf_data which contains a struct + * tee_ioctl_open_session_arg followed by any array of struct + * tee_ioctl_param + */ +#define TEE_IOC_OPEN_SESSION _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 2, \ + struct tee_ioctl_buf_data) + +/** + * struct tee_ioctl_invoke_func_arg - Invokes a function in a Trusted + * Application + * @func: [in] Trusted Application function, specific to the TA + * @session: [in] Session id + * @cancel_id: [in] Cancellation id, a unique value to identify this request + * @ret: [out] return value + * @ret_origin [out] origin of the return value + * @num_params [in] number of parameters following this struct + */ +struct tee_ioctl_invoke_arg { + __u32 func; + __u32 session; + __u32 cancel_id; + __u32 ret; + __u32 ret_origin; + __u32 num_params; + /* num_params tells the actual number of element in params */ + struct tee_ioctl_param params[]; +}; + +/** + * TEE_IOC_INVOKE - Invokes a function in a Trusted Application + * + * Takes a struct tee_ioctl_buf_data which contains a struct + * tee_invoke_func_arg followed by any array of struct tee_param + */ +#define TEE_IOC_INVOKE _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 3, \ + struct tee_ioctl_buf_data) + +/** + * struct tee_ioctl_cancel_arg - Cancels an open session or invoke ioctl + * @cancel_id: [in] Cancellation id, a unique value to identify this request + * @session: [in] Session id, if the session is opened, else set to 0 + */ +struct tee_ioctl_cancel_arg { + __u32 cancel_id; + __u32 session; +}; + +/** + * TEE_IOC_CANCEL - Cancels an open session or invoke + */ +#define TEE_IOC_CANCEL _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 4, \ + struct tee_ioctl_cancel_arg) + +/** + * struct tee_ioctl_close_session_arg - Closes an open session + * @session: [in] Session id + */ +struct tee_ioctl_close_session_arg { + __u32 session; +}; + +/** + * TEE_IOC_CLOSE_SESSION - Closes a session + */ +#define TEE_IOC_CLOSE_SESSION _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 5, \ + struct tee_ioctl_close_session_arg) + +/** + * struct tee_iocl_supp_recv_arg - Receive a request for a supplicant function + * @func: [in] supplicant function + * @num_params [in/out] number of parameters following this struct + * + * @num_params is the number of params that tee-supplicant has room to + * receive when input, @num_params is the number of actual params + * tee-supplicant receives when output. + */ +struct tee_iocl_supp_recv_arg { + __u32 func; + __u32 num_params; + /* num_params tells the actual number of element in params */ + struct tee_ioctl_param params[]; +}; + +/** + * TEE_IOC_SUPPL_RECV - Receive a request for a supplicant function + * + * Takes a struct tee_ioctl_buf_data which contains a struct + * tee_iocl_supp_recv_arg followed by any array of struct tee_param + */ +#define TEE_IOC_SUPPL_RECV _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 6, \ + struct tee_ioctl_buf_data) + +/** + * struct tee_iocl_supp_send_arg - Send a response to a received request + * @ret: [out] return value + * @num_params [in] number of parameters following this struct + */ +struct tee_iocl_supp_send_arg { + __u32 ret; + __u32 num_params; + /* num_params tells the actual number of element in params */ + struct tee_ioctl_param params[]; +}; + +/** + * TEE_IOC_SUPPL_SEND - Receive a request for a supplicant function + * + * Takes a struct tee_ioctl_buf_data which contains a struct + * tee_iocl_supp_send_arg followed by any array of struct tee_param + */ +#define TEE_IOC_SUPPL_SEND _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 7, \ + struct tee_ioctl_buf_data) + +/* + * Five syscalls are used when communicating with the TEE driver. + * open(): opens the device associated with the driver + * ioctl(): as described above operating on the file descriptor from open() + * close(): two cases + * - closes the device file descriptor + * - closes a file descriptor connected to allocated shared memory + * mmap(): maps shared memory into user space using information from struct + * tee_ioctl_shm_alloc_data + * munmap(): unmaps previously shared memory + */ + +#endif /*__TEE_H*/ -- cgit v1.2.3-71-gd317 From 49b499718fa1b0d639663cfd03085b9bfd23cdc8 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 8 Mar 2017 16:03:32 +0100 Subject: net: sched: make default fifo qdiscs appear in the dump The original reason [1] for having hidden qdiscs (potential scalability issues in qdisc_match_from_root() with single linked list in case of large amount of qdiscs) has been invalidated by 59cc1f61f0 ("net: sched: convert qdisc linked list to hashtable"). This allows us for bringing more clarity and determinism into the dump by making default pfifo qdiscs visible. We're not turning this on by default though, at it was deemed [2] too intrusive / unnecessary change of default behavior towards userspace. Instead, TCA_DUMP_INVISIBLE netlink attribute is introduced, which allows applications to request complete qdisc hierarchy dump, including the ones that have always been implicit/invisible. Singleton noop_qdisc stays invisible, as teaching the whole infrastructure about singletons would require quite some surgery with very little gain (seeing no qdisc or seeing noop qdisc in the dump is probably setting the same user expectation). [1] http://lkml.kernel.org/r/1460732328.10638.74.camel@edumazet-glaptop3.roam.corp.google.com [2] http://lkml.kernel.org/r/20161021.105935.1907696543877061916.davem@davemloft.net Signed-off-by: Jiri Kosina Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 2 +- include/net/sch_generic.h | 1 + include/uapi/linux/rtnetlink.h | 1 + net/sched/sch_api.c | 42 ++++++++++++++++++++++++++++++------------ net/sched/sch_cbq.c | 5 +++++ net/sched/sch_drr.c | 2 ++ net/sched/sch_dsmark.c | 2 ++ net/sched/sch_generic.c | 2 +- net/sched/sch_hfsc.c | 4 ++++ net/sched/sch_htb.c | 2 ++ net/sched/sch_mq.c | 2 +- net/sched/sch_mqprio.c | 2 +- net/sched/sch_multiq.c | 2 ++ net/sched/sch_prio.c | 5 ++++- net/sched/sch_qfq.c | 2 ++ net/sched/sch_red.c | 2 ++ net/sched/sch_sfb.c | 2 ++ net/sched/sch_tbf.c | 2 ++ 18 files changed, 65 insertions(+), 17 deletions(-) (limited to 'include/uapi') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index f1b76b8e6d2d..bec46f63f10c 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -92,7 +92,7 @@ int unregister_qdisc(struct Qdisc_ops *qops); void qdisc_get_default(char *id, size_t len); int qdisc_set_default(const char *id); -void qdisc_hash_add(struct Qdisc *q); +void qdisc_hash_add(struct Qdisc *q, bool invisible); void qdisc_hash_del(struct Qdisc *q); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index aeec4086afb2..65d502610314 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -66,6 +66,7 @@ struct Qdisc { #define TCQ_F_NOPARENT 0x40 /* root of its hierarchy : * qdisc_tree_decrease_qlen() should stop. */ +#define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */ u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 6546917d605a..75fcf5eff093 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -545,6 +545,7 @@ enum { TCA_STATS2, TCA_STAB, TCA_PAD, + TCA_DUMP_INVISIBLE, __TCA_MAX }; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bcf49cd22786..62567bfe52c7 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -274,7 +274,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) return NULL; } -void qdisc_hash_add(struct Qdisc *q) +void qdisc_hash_add(struct Qdisc *q, bool invisible) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { struct Qdisc *root = qdisc_dev(q)->qdisc; @@ -282,6 +282,8 @@ void qdisc_hash_add(struct Qdisc *q) WARN_ON_ONCE(root == &noop_qdisc); ASSERT_RTNL(); hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); + if (invisible) + q->flags |= TCQ_F_INVISIBLE; } } EXPORT_SYMBOL(qdisc_hash_add); @@ -1003,7 +1005,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, goto err_out4; } - qdisc_hash_add(sch); + qdisc_hash_add(sch, false); return sch; } @@ -1401,9 +1403,14 @@ nla_put_failure: return -1; } -static bool tc_qdisc_dump_ignore(struct Qdisc *q) +static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) { - return (q->flags & TCQ_F_BUILTIN) ? true : false; + if (q->flags & TCQ_F_BUILTIN) + return true; + if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible) + return true; + + return false; } static int qdisc_notify(struct net *net, struct sk_buff *oskb, @@ -1417,12 +1424,12 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb, if (!skb) return -ENOBUFS; - if (old && !tc_qdisc_dump_ignore(old)) { + if (old && !tc_qdisc_dump_ignore(old, false)) { if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) goto err_out; } - if (new && !tc_qdisc_dump_ignore(new)) { + if (new && !tc_qdisc_dump_ignore(new, false)) { if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) goto err_out; @@ -1439,7 +1446,8 @@ err_out: static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, struct netlink_callback *cb, - int *q_idx_p, int s_q_idx, bool recur) + int *q_idx_p, int s_q_idx, bool recur, + bool dump_invisible) { int ret = 0, q_idx = *q_idx_p; struct Qdisc *q; @@ -1452,7 +1460,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, if (q_idx < s_q_idx) { q_idx++; } else { - if (!tc_qdisc_dump_ignore(q) && + if (!tc_qdisc_dump_ignore(q, dump_invisible) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) @@ -1474,7 +1482,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, q_idx++; continue; } - if (!tc_qdisc_dump_ignore(q) && + if (!tc_qdisc_dump_ignore(q, dump_invisible) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) @@ -1496,12 +1504,21 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) int idx, q_idx; int s_idx, s_q_idx; struct net_device *dev; + const struct nlmsghdr *nlh = cb->nlh; + struct tcmsg *tcm = nlmsg_data(nlh); + struct nlattr *tca[TCA_MAX + 1]; + int err; s_idx = cb->args[0]; s_q_idx = q_idx = cb->args[1]; idx = 0; ASSERT_RTNL(); + + err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL); + if (err < 0) + return err; + for_each_netdev(net, dev) { struct netdev_queue *dev_queue; @@ -1512,13 +1529,14 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) q_idx = 0; if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, - true) < 0) + true, tca[TCA_DUMP_INVISIBLE]) < 0) goto done; dev_queue = dev_ingress_queue(dev); if (dev_queue && tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, - &q_idx, s_q_idx, false) < 0) + &q_idx, s_q_idx, false, + tca[TCA_DUMP_INVISIBLE]) < 0) goto done; cont: @@ -1762,7 +1780,7 @@ static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, { struct qdisc_dump_args arg; - if (tc_qdisc_dump_ignore(q) || + if (tc_qdisc_dump_ignore(q, false) || *t_p < s_t || !q->ops->cl_ops || (tcm->tcm_parent && TC_H_MAJ(tcm->tcm_parent) != q->handle)) { diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index d6ca18dc04c3..cf93e5ff3d63 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1161,6 +1161,8 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) sch->handle); if (!q->link.q) q->link.q = &noop_qdisc; + else + qdisc_hash_add(q->link.q, true); q->link.priority = TC_CBQ_MAXPRIO - 1; q->link.priority2 = TC_CBQ_MAXPRIO - 1; @@ -1600,6 +1602,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); if (!cl->q) cl->q = &noop_qdisc; + else + qdisc_hash_add(cl->q, true); + cl->common.classid = classid; cl->tparent = parent; cl->qdisc = sch; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index bb4cbdf75004..9fe67e257dfa 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -117,6 +117,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; + else + qdisc_hash_add(cl->qdisc, true); if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 802ac7c2e5e8..1b98cb2160ff 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -368,6 +368,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle); if (p->q == NULL) p->q = &noop_qdisc; + else + qdisc_hash_add(p->q, true); pr_debug("%s: qdisc %p\n", __func__, p->q); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b052b27a984e..3e64d23e098c 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -795,7 +795,7 @@ static void attach_default_qdiscs(struct net_device *dev) } #ifdef CONFIG_NET_SCHED if (dev->qdisc) - qdisc_hash_add(dev->qdisc); + qdisc_hash_add(dev->qdisc, false); #endif } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 3ffaa6fb0990..0198c6cdda49 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1066,6 +1066,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; + else + qdisc_hash_add(cl->qdisc, true); INIT_LIST_HEAD(&cl->children); cl->vt_tree = RB_ROOT; cl->cf_tree = RB_ROOT; @@ -1425,6 +1427,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) sch->handle); if (q->root.qdisc == NULL) q->root.qdisc = &noop_qdisc; + else + qdisc_hash_add(q->root.qdisc, true); INIT_LIST_HEAD(&q->root.children); q->root.vt_tree = RB_ROOT; q->root.cf_tree = RB_ROOT; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 4cd5fb134bc9..95867033542e 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1460,6 +1460,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, qdisc_class_hash_insert(&q->clhash, &cl->common); if (parent) parent->children++; + if (cl->un.leaf.q != &noop_qdisc) + qdisc_hash_add(cl->un.leaf.q, true); } else { if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 20b7f1646f69..cadfdd4f1e52 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -84,7 +84,7 @@ static void mq_attach(struct Qdisc *sch) qdisc_destroy(old); #ifdef CONFIG_NET_SCHED if (ntx < dev->real_num_tx_queues) - qdisc_hash_add(qdisc); + qdisc_hash_add(qdisc, false); #endif } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 922683418e53..b851e209da4d 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -175,7 +175,7 @@ static void mqprio_attach(struct Qdisc *sch) if (old) qdisc_destroy(old); if (ntx < dev->real_num_tx_queues) - qdisc_hash_add(qdisc); + qdisc_hash_add(qdisc, false); } kfree(priv->qdiscs); priv->qdiscs = NULL; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index e7839a0d0eaa..43a3a10b3c81 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -217,6 +217,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) sch_tree_lock(sch); old = q->queues[i]; q->queues[i] = child; + if (child != &noop_qdisc) + qdisc_hash_add(child, true); if (old != &noop_qdisc) { qdisc_tree_reduce_backlog(old, diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index d4d7db267b6e..92c2e6d448d7 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -192,8 +192,11 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) qdisc_destroy(child); } - for (i = oldbands; i < q->bands; i++) + for (i = oldbands; i < q->bands; i++) { q->queues[i] = queues[i]; + if (q->queues[i] != &noop_qdisc) + qdisc_hash_add(q->queues[i], true); + } sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index f9e712ce2d15..6c85f3e9239b 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -494,6 +494,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, goto destroy_class; } + if (cl->qdisc != &noop_qdisc) + qdisc_hash_add(cl->qdisc, true); sch_tree_lock(sch); qdisc_class_hash_insert(&q->clhash, &cl->common); sch_tree_unlock(sch); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 249b2a18acbd..799ea6dd69b2 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -191,6 +191,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) return PTR_ERR(child); } + if (child != &noop_qdisc) + qdisc_hash_add(child, true); sch_tree_lock(sch); q->flags = ctl->flags; q->limit = ctl->limit; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index fe6963d21519..ae862f172c94 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -513,6 +513,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt) if (IS_ERR(child)) return PTR_ERR(child); + if (child != &noop_qdisc) + qdisc_hash_add(child, true); sch_tree_lock(sch); qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 303355c449ab..40c29a801391 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -396,6 +396,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->qdisc->qstats.backlog); qdisc_destroy(q->qdisc); q->qdisc = child; + if (child != &noop_qdisc); + qdisc_hash_add(child, true); } q->limit = qopt->limit; if (tb[TCA_TBF_PBURST]) -- cgit v1.2.3-71-gd317 From c95129d127c6d3d9fca189c6f94c539a7f086b1a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 10 Mar 2017 12:11:06 +0800 Subject: sctp: add support for generating assoc reset event notification This patch is to add Association Reset Event described in rfc6525 section 6.1.2. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/ulpevent.h | 4 ++++ include/uapi/linux/sctp.h | 15 +++++++++++++++ net/sctp/ulpevent.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) (limited to 'include/uapi') diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 324b5965fc4d..2ab7ed44de52 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -132,6 +132,10 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event( const struct sctp_association *asoc, __u16 flags, __u16 stream_num, __u16 *stream_list, gfp_t gfp); +struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event( + const struct sctp_association *asoc, __u16 flags, + __u32 local_tsn, __u32 remote_tsn, gfp_t gfp); + void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, struct msghdr *); void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event, diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index d3ae381fcf33..77358297c2f9 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -502,6 +502,17 @@ struct sctp_stream_reset_event { __u16 strreset_stream_list[]; }; +#define SCTP_ASSOC_RESET_DENIED 0x0004 +#define SCTP_ASSOC_RESET_FAILED 0x0008 +struct sctp_assoc_reset_event { + __u16 assocreset_type; + __u16 assocreset_flags; + __u32 assocreset_length; + sctp_assoc_t assocreset_assoc_id; + __u32 assocreset_local_tsn; + __u32 assocreset_remote_tsn; +}; + /* * Described in Section 7.3 * Ancillary Data and Notification Interest Options @@ -518,6 +529,7 @@ struct sctp_event_subscribe { __u8 sctp_authentication_event; __u8 sctp_sender_dry_event; __u8 sctp_stream_reset_event; + __u8 sctp_assoc_reset_event; }; /* @@ -543,6 +555,7 @@ union sctp_notification { struct sctp_authkey_event sn_authkey_event; struct sctp_sender_dry_event sn_sender_dry_event; struct sctp_stream_reset_event sn_strreset_event; + struct sctp_assoc_reset_event sn_assocreset_event; }; /* Section 5.3.1 @@ -572,6 +585,8 @@ enum sctp_sn_type { #define SCTP_SENDER_DRY_EVENT SCTP_SENDER_DRY_EVENT SCTP_STREAM_RESET_EVENT, #define SCTP_STREAM_RESET_EVENT SCTP_STREAM_RESET_EVENT + SCTP_ASSOC_RESET_EVENT, +#define SCTP_ASSOC_RESET_EVENT SCTP_ASSOC_RESET_EVENT }; /* Notification error codes used to fill up the error fields in some diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index c8881bc542a0..420d7f35256a 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -883,6 +883,34 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event( return event; } +struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event( + const struct sctp_association *asoc, __u16 flags, __u32 local_tsn, + __u32 remote_tsn, gfp_t gfp) +{ + struct sctp_assoc_reset_event *areset; + struct sctp_ulpevent *event; + struct sk_buff *skb; + + event = sctp_ulpevent_new(sizeof(struct sctp_assoc_reset_event), + MSG_NOTIFICATION, gfp); + if (!event) + return NULL; + + skb = sctp_event2skb(event); + areset = (struct sctp_assoc_reset_event *) + skb_put(skb, sizeof(struct sctp_assoc_reset_event)); + + areset->assocreset_type = SCTP_ASSOC_RESET_EVENT; + areset->assocreset_flags = flags; + areset->assocreset_length = sizeof(struct sctp_assoc_reset_event); + sctp_ulpevent_set_owner(event, asoc); + areset->assocreset_assoc_id = sctp_assoc2id(asoc); + areset->assocreset_local_tsn = local_tsn; + areset->assocreset_remote_tsn = remote_tsn; + + return event; +} + /* Return the notification type, assuming this is a notification * event. */ -- cgit v1.2.3-71-gd317 From b444153fb5a647448c2080ad28656ad183cae4fc Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 10 Mar 2017 12:11:08 +0800 Subject: sctp: add support for generating add stream change event notification This patch is to add Stream Change Event described in rfc6525 section 6.1.3. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/ulpevent.h | 4 ++++ include/uapi/linux/sctp.h | 15 +++++++++++++++ net/sctp/ulpevent.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) (limited to 'include/uapi') diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 2ab7ed44de52..1060494ac230 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -136,6 +136,10 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event( const struct sctp_association *asoc, __u16 flags, __u32 local_tsn, __u32 remote_tsn, gfp_t gfp); +struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event( + const struct sctp_association *asoc, __u16 flags, + __u32 strchange_instrms, __u32 strchange_outstrms, gfp_t gfp); + void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, struct msghdr *); void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event, diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 77358297c2f9..fd652e6501b4 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -513,6 +513,17 @@ struct sctp_assoc_reset_event { __u32 assocreset_remote_tsn; }; +#define SCTP_ASSOC_CHANGE_DENIED 0x0004 +#define SCTP_ASSOC_CHANGE_FAILED 0x0008 +struct sctp_stream_change_event { + __u16 strchange_type; + __u16 strchange_flags; + __u32 strchange_length; + sctp_assoc_t strchange_assoc_id; + __u16 strchange_instrms; + __u16 strchange_outstrms; +}; + /* * Described in Section 7.3 * Ancillary Data and Notification Interest Options @@ -530,6 +541,7 @@ struct sctp_event_subscribe { __u8 sctp_sender_dry_event; __u8 sctp_stream_reset_event; __u8 sctp_assoc_reset_event; + __u8 sctp_stream_change_event; }; /* @@ -556,6 +568,7 @@ union sctp_notification { struct sctp_sender_dry_event sn_sender_dry_event; struct sctp_stream_reset_event sn_strreset_event; struct sctp_assoc_reset_event sn_assocreset_event; + struct sctp_stream_change_event sn_strchange_event; }; /* Section 5.3.1 @@ -587,6 +600,8 @@ enum sctp_sn_type { #define SCTP_STREAM_RESET_EVENT SCTP_STREAM_RESET_EVENT SCTP_ASSOC_RESET_EVENT, #define SCTP_ASSOC_RESET_EVENT SCTP_ASSOC_RESET_EVENT + SCTP_STREAM_CHANGE_EVENT, +#define SCTP_STREAM_CHANGE_EVENT SCTP_STREAM_CHANGE_EVENT }; /* Notification error codes used to fill up the error fields in some diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 420d7f35256a..ec2b3e013c2f 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -911,6 +911,34 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event( return event; } +struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event( + const struct sctp_association *asoc, __u16 flags, + __u32 strchange_instrms, __u32 strchange_outstrms, gfp_t gfp) +{ + struct sctp_stream_change_event *schange; + struct sctp_ulpevent *event; + struct sk_buff *skb; + + event = sctp_ulpevent_new(sizeof(struct sctp_stream_change_event), + MSG_NOTIFICATION, gfp); + if (!event) + return NULL; + + skb = sctp_event2skb(event); + schange = (struct sctp_stream_change_event *) + skb_put(skb, sizeof(struct sctp_stream_change_event)); + + schange->strchange_type = SCTP_STREAM_CHANGE_EVENT; + schange->strchange_flags = flags; + schange->strchange_length = sizeof(struct sctp_stream_change_event); + sctp_ulpevent_set_owner(event, asoc); + schange->strchange_assoc_id = sctp_assoc2id(asoc); + schange->strchange_instrms = strchange_instrms; + schange->strchange_outstrms = strchange_outstrms; + + return event; +} + /* Return the notification type, assuming this is a notification * event. */ -- cgit v1.2.3-71-gd317 From c0d8bab6ae518cedfb5246e99ece43fe51d79b56 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 10 Mar 2017 12:11:12 +0800 Subject: sctp: add get and set sockopt for reconf_enable This patchset is to add SCTP_RECONFIG_SUPPORTED sockopt, it would set and get asoc reconf_enable value when asoc_id is set, or it would set and get ep reconf_enalbe value if asoc_id is 0. It is also to add sysctl interface for users to set the default value for reconf_enable. After this patch, stream reconf will work. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 1 + net/sctp/socket.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++ net/sctp/sysctl.c | 7 ++++ 3 files changed, 89 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index fd652e6501b4..7212870ef5d7 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -115,6 +115,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_PR_SUPPORTED 113 #define SCTP_DEFAULT_PRINFO 114 #define SCTP_PR_ASSOC_STATUS 115 +#define SCTP_RECONFIG_SUPPORTED 117 #define SCTP_ENABLE_STREAM_RESET 118 #define SCTP_RESET_STREAMS 119 #define SCTP_RESET_ASSOC 120 diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6f0a9be50f50..24e28cfb542b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3758,6 +3758,39 @@ out: return retval; } +static int sctp_setsockopt_reconfig_supported(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EINVAL; + + if (optlen != sizeof(params)) + goto out; + + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (asoc) { + asoc->reconf_enable = !!params.assoc_value; + } else if (!params.assoc_id) { + struct sctp_sock *sp = sctp_sk(sk); + + sp->ep->reconf_enable = !!params.assoc_value; + } else { + goto out; + } + + retval = 0; + +out: + return retval; +} + static int sctp_setsockopt_enable_strreset(struct sock *sk, char __user *optval, unsigned int optlen) @@ -4038,6 +4071,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_DEFAULT_PRINFO: retval = sctp_setsockopt_default_prinfo(sk, optval, optlen); break; + case SCTP_RECONFIG_SUPPORTED: + retval = sctp_setsockopt_reconfig_supported(sk, optval, optlen); + break; case SCTP_ENABLE_STREAM_RESET: retval = sctp_setsockopt_enable_strreset(sk, optval, optlen); break; @@ -6540,6 +6576,47 @@ out: return retval; } +static int sctp_getsockopt_reconfig_supported(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (asoc) { + params.assoc_value = asoc->reconf_enable; + } else if (!params.assoc_id) { + struct sctp_sock *sp = sctp_sk(sk); + + params.assoc_value = sp->ep->reconf_enable; + } else { + retval = -EINVAL; + goto out; + } + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + +out: + return retval; +} + static int sctp_getsockopt_enable_strreset(struct sock *sk, int len, char __user *optval, int __user *optlen) @@ -6748,6 +6825,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_pr_assocstatus(sk, len, optval, optlen); break; + case SCTP_RECONFIG_SUPPORTED: + retval = sctp_getsockopt_reconfig_supported(sk, len, optval, + optlen); + break; case SCTP_ENABLE_STREAM_RESET: retval = sctp_getsockopt_enable_strreset(sk, len, optval, optlen); diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index daf8554fd42a..0e732f68c2bf 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -274,6 +274,13 @@ static struct ctl_table sctp_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "reconf_enable", + .data = &init_net.sctp.reconf_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "auth_enable", .data = &init_net.sctp.auth_enable, -- cgit v1.2.3-71-gd317 From 1a64edf54f55d7956cf5a0d95898bc1f84f9b818 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 8 Mar 2017 16:48:44 +0100 Subject: netfilter: nft_ct: add helper set support this allows to assign connection tracking helpers to connections via nft objref infrastructure. The idea is to first specifiy a helper object: table ip filter { ct helper some-name { type "ftp" protocol tcp l3proto ip } } and then assign it via nft add ... ct helper set "some-name" helper assignment works for new conntracks only as we cannot expand the conntrack extension area once it has been committed to the main conntrack table. ipv4 and ipv6 protocols are tracked stored separately so we can also handle families that observe both ipv4 and ipv6 traffic. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 12 ++- net/netfilter/nft_ct.c | 171 +++++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 4f7d75682c59..34c8d08b687a 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1259,10 +1259,20 @@ enum nft_fib_flags { NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */ }; +enum nft_ct_helper_attributes { + NFTA_CT_HELPER_UNSPEC, + NFTA_CT_HELPER_NAME, + NFTA_CT_HELPER_L3PROTO, + NFTA_CT_HELPER_L4PROTO, + __NFTA_CT_HELPER_MAX, +}; +#define NFTA_CT_HELPER_MAX (__NFTA_CT_HELPER_MAX - 1) + #define NFT_OBJECT_UNSPEC 0 #define NFT_OBJECT_COUNTER 1 #define NFT_OBJECT_QUOTA 2 -#define __NFT_OBJECT_MAX 3 +#define NFT_OBJECT_CT_HELPER 3 +#define __NFT_OBJECT_MAX 4 #define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1) /** diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index bf548a7a71ec..4144ae845bdd 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -32,6 +32,12 @@ struct nft_ct { }; }; +struct nft_ct_helper_obj { + struct nf_conntrack_helper *helper4; + struct nf_conntrack_helper *helper6; + u8 l4proto; +}; + #ifdef CONFIG_NF_CONNTRACK_ZONES static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template); static unsigned int nft_ct_pcpu_template_refcnt __read_mostly; @@ -730,6 +736,162 @@ static struct nft_expr_type nft_notrack_type __read_mostly = { .owner = THIS_MODULE, }; +static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, + const struct nlattr * const tb[], + struct nft_object *obj) +{ + struct nft_ct_helper_obj *priv = nft_obj_data(obj); + struct nf_conntrack_helper *help4, *help6; + char name[NF_CT_HELPER_NAME_LEN]; + int family = ctx->afi->family; + + if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO]) + return -EINVAL; + + priv->l4proto = nla_get_u8(tb[NFTA_CT_HELPER_L4PROTO]); + if (!priv->l4proto) + return -ENOENT; + + nla_strlcpy(name, tb[NFTA_CT_HELPER_NAME], sizeof(name)); + + if (tb[NFTA_CT_HELPER_L3PROTO]) + family = ntohs(nla_get_be16(tb[NFTA_CT_HELPER_L3PROTO])); + + help4 = NULL; + help6 = NULL; + + switch (family) { + case NFPROTO_IPV4: + if (ctx->afi->family == NFPROTO_IPV6) + return -EINVAL; + + help4 = nf_conntrack_helper_try_module_get(name, family, + priv->l4proto); + break; + case NFPROTO_IPV6: + if (ctx->afi->family == NFPROTO_IPV4) + return -EINVAL; + + help6 = nf_conntrack_helper_try_module_get(name, family, + priv->l4proto); + break; + case NFPROTO_NETDEV: /* fallthrough */ + case NFPROTO_BRIDGE: /* same */ + case NFPROTO_INET: + help4 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV4, + priv->l4proto); + help6 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV6, + priv->l4proto); + break; + default: + return -EAFNOSUPPORT; + } + + /* && is intentional; only error if INET found neither ipv4 or ipv6 */ + if (!help4 && !help6) + return -ENOENT; + + priv->helper4 = help4; + priv->helper6 = help6; + + return 0; +} + +static void nft_ct_helper_obj_destroy(struct nft_object *obj) +{ + struct nft_ct_helper_obj *priv = nft_obj_data(obj); + + if (priv->helper4) + module_put(priv->helper4->me); + if (priv->helper6) + module_put(priv->helper6->me); +} + +static void nft_ct_helper_obj_eval(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_ct_helper_obj *priv = nft_obj_data(obj); + struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb); + struct nf_conntrack_helper *to_assign = NULL; + struct nf_conn_help *help; + + if (!ct || + nf_ct_is_confirmed(ct) || + nf_ct_is_template(ct) || + priv->l4proto != nf_ct_protonum(ct)) + return; + + switch (nf_ct_l3num(ct)) { + case NFPROTO_IPV4: + to_assign = priv->helper4; + break; + case NFPROTO_IPV6: + to_assign = priv->helper6; + break; + default: + WARN_ON_ONCE(1); + return; + } + + if (!to_assign) + return; + + if (test_bit(IPS_HELPER_BIT, &ct->status)) + return; + + help = nf_ct_helper_ext_add(ct, to_assign, GFP_ATOMIC); + if (help) { + rcu_assign_pointer(help->helper, to_assign); + set_bit(IPS_HELPER_BIT, &ct->status); + } +} + +static int nft_ct_helper_obj_dump(struct sk_buff *skb, + struct nft_object *obj, bool reset) +{ + const struct nft_ct_helper_obj *priv = nft_obj_data(obj); + const struct nf_conntrack_helper *helper = priv->helper4; + u16 family; + + if (nla_put_string(skb, NFTA_CT_HELPER_NAME, helper->name)) + return -1; + + if (nla_put_u8(skb, NFTA_CT_HELPER_L4PROTO, priv->l4proto)) + return -1; + + if (priv->helper4 && priv->helper6) + family = NFPROTO_INET; + else if (priv->helper6) + family = NFPROTO_IPV6; + else + family = NFPROTO_IPV4; + + if (nla_put_be16(skb, NFTA_CT_HELPER_L3PROTO, htons(family))) + return -1; + + return 0; +} + +static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = { + [NFTA_CT_HELPER_NAME] = { .type = NLA_STRING, + .len = NF_CT_HELPER_NAME_LEN - 1 }, + [NFTA_CT_HELPER_L3PROTO] = { .type = NLA_U16 }, + [NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 }, +}; + +static struct nft_object_type nft_ct_helper_obj __read_mostly = { + .type = NFT_OBJECT_CT_HELPER, + .size = sizeof(struct nft_ct_helper_obj), + .maxattr = NFTA_CT_HELPER_MAX, + .policy = nft_ct_helper_policy, + .eval = nft_ct_helper_obj_eval, + .init = nft_ct_helper_obj_init, + .destroy = nft_ct_helper_obj_destroy, + .dump = nft_ct_helper_obj_dump, + .owner = THIS_MODULE, +}; + static int __init nft_ct_module_init(void) { int err; @@ -744,7 +906,14 @@ static int __init nft_ct_module_init(void) if (err < 0) goto err1; + err = nft_register_obj(&nft_ct_helper_obj); + if (err < 0) + goto err2; + return 0; + +err2: + nft_unregister_expr(&nft_notrack_type); err1: nft_unregister_expr(&nft_ct_type); return err; @@ -752,6 +921,7 @@ err1: static void __exit nft_ct_module_exit(void) { + nft_unregister_obj(&nft_ct_helper_obj); nft_unregister_expr(&nft_notrack_type); nft_unregister_expr(&nft_ct_type); } @@ -763,3 +933,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); MODULE_ALIAS_NFT_EXPR("ct"); MODULE_ALIAS_NFT_EXPR("notrack"); +MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER); -- cgit v1.2.3-71-gd317 From 055c4b34b94f696d9bd9aad53a11378a0fc409c9 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 10 Mar 2017 18:08:02 +0100 Subject: netfilter: nft_fib: Support existence check Instead of the actual interface index or name, set destination register to just 1 or 0 depending on whether the lookup succeeded or not if NFTA_FIB_F_PRESENT was set in userspace. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nft_fib.h | 2 +- include/uapi/linux/netfilter/nf_tables.h | 1 + net/ipv4/netfilter/nft_fib_ipv4.c | 4 ++-- net/ipv6/netfilter/nft_fib_ipv6.c | 2 +- net/netfilter/nft_fib.c | 14 +++++++++----- 5 files changed, 14 insertions(+), 9 deletions(-) (limited to 'include/uapi') diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 5ceb2205e4e3..381af9469e6a 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -32,6 +32,6 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); -void nft_fib_store_result(void *reg, enum nft_fib_result r, +void nft_fib_store_result(void *reg, const struct nft_fib *priv, const struct nft_pktinfo *pkt, int index); #endif diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 34c8d08b687a..8f3842690d17 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1257,6 +1257,7 @@ enum nft_fib_flags { NFTA_FIB_F_MARK = 1 << 2, /* use skb->mark */ NFTA_FIB_F_IIF = 1 << 3, /* restrict to iif */ NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */ + NFTA_FIB_F_PRESENT = 1 << 5, /* check existence only */ }; enum nft_ct_helper_attributes { diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 2981291910dd..f4e4462cb5bb 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -90,7 +90,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { - nft_fib_store_result(dest, priv->result, pkt, + nft_fib_store_result(dest, priv, pkt, nft_in(pkt)->ifindex); return; } @@ -99,7 +99,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, if (ipv4_is_zeronet(iph->saddr)) { if (ipv4_is_lbcast(iph->daddr) || ipv4_is_local_multicast(iph->daddr)) { - nft_fib_store_result(dest, priv->result, pkt, + nft_fib_store_result(dest, priv, pkt, get_ifindex(pkt->skb->dev)); return; } diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 765facf03d45..e8d88d82636b 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -159,7 +159,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { - nft_fib_store_result(dest, priv->result, pkt, + nft_fib_store_result(dest, priv, pkt, nft_in(pkt)->ifindex); return; } diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index fd0b19303b0d..21df8cccea65 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -24,7 +24,8 @@ const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = { EXPORT_SYMBOL(nft_fib_policy); #define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \ - NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF) + NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \ + NFTA_FIB_F_PRESENT) int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) @@ -133,19 +134,22 @@ int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr) } EXPORT_SYMBOL_GPL(nft_fib_dump); -void nft_fib_store_result(void *reg, enum nft_fib_result r, +void nft_fib_store_result(void *reg, const struct nft_fib *priv, const struct nft_pktinfo *pkt, int index) { struct net_device *dev; u32 *dreg = reg; - switch (r) { + switch (priv->result) { case NFT_FIB_RESULT_OIF: - *dreg = index; + *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index; break; case NFT_FIB_RESULT_OIFNAME: dev = dev_get_by_index_rcu(nft_net(pkt), index); - strncpy(reg, dev ? dev->name : "", IFNAMSIZ); + if (priv->flags & NFTA_FIB_F_PRESENT) + *dreg = !!dev; + else + strncpy(reg, dev ? dev->name : "", IFNAMSIZ); break; default: WARN_ON_ONCE(1); -- cgit v1.2.3-71-gd317 From e422267322cd319e2695a535e47c5b1feeac45eb Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Wed, 8 Mar 2017 02:11:36 +0530 Subject: perf: Add PERF_RECORD_NAMESPACES to include namespaces related info With the advert of container technologies like docker, that depend on namespaces for isolation, there is a need for tracing support for namespaces. This patch introduces new PERF_RECORD_NAMESPACES event for recording namespaces related info. By recording info for every namespace, it is left to userspace to take a call on the definition of a container and trace containers by updating perf tool accordingly. Each namespace has a combination of device and inode numbers. Though every namespace has the same device number currently, that may change in future to avoid the need for a namespace of namespaces. Considering such possibility, record both device and inode numbers separately for each namespace. Signed-off-by: Hari Bathini Acked-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Ananth N Mavinakayanahalli Cc: Aravinda Prasad Cc: Brendan Gregg Cc: Daniel Borkmann Cc: Eric Biederman Cc: Sargun Dhillon Cc: Steven Rostedt Link: http://lkml.kernel.org/r/148891929686.25309.2827618988917007768.stgit@hbathini.in.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 2 + include/uapi/linux/perf_event.h | 32 ++++++++- kernel/events/core.c | 139 ++++++++++++++++++++++++++++++++++++++++ kernel/fork.c | 2 + kernel/nsproxy.c | 3 + 5 files changed, 177 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 000fdb211c7d..f19a82362851 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1112,6 +1112,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); +extern void perf_event_namespaces(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ @@ -1315,6 +1316,7 @@ static inline int perf_unregister_guest_info_callbacks static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } +static inline void perf_event_namespaces(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } static inline int perf_swevent_get_recursion_context(void) { return -1; } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c66a485a24ac..bec0aad0e15c 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -344,7 +344,8 @@ struct perf_event_attr { use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ - __reserved_1 : 36; + namespaces : 1, /* include namespaces data */ + __reserved_1 : 35; union { __u32 wakeup_events; /* wakeup every n events */ @@ -610,6 +611,23 @@ struct perf_event_header { __u16 size; }; +struct perf_ns_link_info { + __u64 dev; + __u64 ino; +}; + +enum { + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NR_NAMESPACES, /* number of available namespaces */ +}; + enum perf_event_type { /* @@ -862,6 +880,18 @@ enum perf_event_type { */ PERF_RECORD_SWITCH_CPU_WIDE = 15, + /* + * struct { + * struct perf_event_header header; + * u32 pid; + * u32 tid; + * u64 nr_namespaces; + * { u64 dev, inode; } [nr_namespaces]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_NAMESPACES = 16, + PERF_RECORD_MAX, /* non-ABI */ }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 6f41548f2e32..16c877a121c8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -48,6 +48,8 @@ #include #include #include +#include +#include #include "internal.h" @@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; +static atomic_t nr_namespaces_events __read_mostly; static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; static atomic_t nr_switch_events __read_mostly; @@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event) atomic_dec(&nr_mmap_events); if (event->attr.comm) atomic_dec(&nr_comm_events); + if (event->attr.namespaces) + atomic_dec(&nr_namespaces_events); if (event->attr.task) atomic_dec(&nr_task_events); if (event->attr.freq) @@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task, void perf_event_fork(struct task_struct *task) { perf_event_task(task, NULL, 1); + perf_event_namespaces(task); } /* @@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec) perf_event_comm_event(&comm_event); } +/* + * namespaces tracking + */ + +struct perf_namespaces_event { + struct task_struct *task; + + struct { + struct perf_event_header header; + + u32 pid; + u32 tid; + u64 nr_namespaces; + struct perf_ns_link_info link_info[NR_NAMESPACES]; + } event_id; +}; + +static int perf_event_namespaces_match(struct perf_event *event) +{ + return event->attr.namespaces; +} + +static void perf_event_namespaces_output(struct perf_event *event, + void *data) +{ + struct perf_namespaces_event *namespaces_event = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + int ret; + + if (!perf_event_namespaces_match(event)) + return; + + perf_event_header__init_id(&namespaces_event->event_id.header, + &sample, event); + ret = perf_output_begin(&handle, event, + namespaces_event->event_id.header.size); + if (ret) + return; + + namespaces_event->event_id.pid = perf_event_pid(event, + namespaces_event->task); + namespaces_event->event_id.tid = perf_event_tid(event, + namespaces_event->task); + + perf_output_put(&handle, namespaces_event->event_id); + + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + +static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info, + struct task_struct *task, + const struct proc_ns_operations *ns_ops) +{ + struct path ns_path; + struct inode *ns_inode; + void *error; + + error = ns_get_path(&ns_path, task, ns_ops); + if (!error) { + ns_inode = ns_path.dentry->d_inode; + ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev); + ns_link_info->ino = ns_inode->i_ino; + } +} + +void perf_event_namespaces(struct task_struct *task) +{ + struct perf_namespaces_event namespaces_event; + struct perf_ns_link_info *ns_link_info; + + if (!atomic_read(&nr_namespaces_events)) + return; + + namespaces_event = (struct perf_namespaces_event){ + .task = task, + .event_id = { + .header = { + .type = PERF_RECORD_NAMESPACES, + .misc = 0, + .size = sizeof(namespaces_event.event_id), + }, + /* .pid */ + /* .tid */ + .nr_namespaces = NR_NAMESPACES, + /* .link_info[NR_NAMESPACES] */ + }, + }; + + ns_link_info = namespaces_event.event_id.link_info; + + perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX], + task, &mntns_operations); + +#ifdef CONFIG_USER_NS + perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX], + task, &userns_operations); +#endif +#ifdef CONFIG_NET_NS + perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX], + task, &netns_operations); +#endif +#ifdef CONFIG_UTS_NS + perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX], + task, &utsns_operations); +#endif +#ifdef CONFIG_IPC_NS + perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX], + task, &ipcns_operations); +#endif +#ifdef CONFIG_PID_NS + perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX], + task, &pidns_operations); +#endif +#ifdef CONFIG_CGROUPS + perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX], + task, &cgroupns_operations); +#endif + + perf_iterate_sb(perf_event_namespaces_output, + &namespaces_event, + NULL); +} + /* * mmap tracking */ @@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event) atomic_inc(&nr_mmap_events); if (event->attr.comm) atomic_inc(&nr_comm_events); + if (event->attr.namespaces) + atomic_inc(&nr_namespaces_events); if (event->attr.task) atomic_inc(&nr_task_events); if (event->attr.freq) @@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open, return -EACCES; } + if (attr.namespaces) { + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + } + if (attr.freq) { if (attr.sample_freq > sysctl_perf_event_sample_rate) return -EINVAL; diff --git a/kernel/fork.c b/kernel/fork.c index 6c463c80e93d..afa2947286cd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) } } + perf_event_namespaces(current); + bad_unshare_cleanup_cred: if (new_cred) put_cred(new_cred); diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 782102e59eed..f6c5d330059a 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -26,6 +26,7 @@ #include #include #include +#include static struct kmem_cache *nsproxy_cachep; @@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) goto out; } switch_task_namespaces(tsk, new_nsproxy); + + perf_event_namespaces(tsk); out: fput(file); return err; -- cgit v1.2.3-71-gd317 From 5b441ac8784c1e7f3c619f14da4c3f52e87348d5 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Fri, 10 Mar 2017 20:43:24 +0000 Subject: mpls: allow TTL propagation to IP packets to be configured Provide the ability to control on a per-route basis whether the TTL value from an MPLS packet is propagated to an IPv4/IPv6 packet when the last label is popped as per the theoretical model in RFC 3443 through a new route attribute, RTA_TTL_PROPAGATE which can be 0 to mean disable propagation and 1 to mean enable propagation. In order to provide the ability to change the behaviour for packets arriving with IPv4/IPv6 Explicit Null labels and to provide an easy way for a user to change the behaviour for all existing routes without having to reprogram them, a global knob is provided. This is done through the addition of a new per-namespace sysctl, "net.mpls.ip_ttl_propagate", which defaults to enabled. If the per-route attribute is set (either enabled or disabled) then it overrides the global configuration. Signed-off-by: Robert Shearman Acked-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/mpls-sysctl.txt | 11 ++++ include/net/netns/mpls.h | 2 + include/uapi/linux/rtnetlink.h | 1 + net/mpls/af_mpls.c | 87 +++++++++++++++++++++++++++++--- net/mpls/internal.h | 7 +++ 5 files changed, 100 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt index 15d8d16934fd..9badd1d6685f 100644 --- a/Documentation/networking/mpls-sysctl.txt +++ b/Documentation/networking/mpls-sysctl.txt @@ -19,6 +19,17 @@ platform_labels - INTEGER Possible values: 0 - 1048575 Default: 0 +ip_ttl_propagate - BOOL + Control whether TTL is propagated from the IPv4/IPv6 header to + the MPLS header on imposing labels and propagated from the + MPLS header to the IPv4/IPv6 header on popping the last label. + + If disabled, the MPLS transport network will appear as a + single hop to transit traffic. + + 0 - disabled / RFC 3443 [Short] Pipe Model + 1 - enabled / RFC 3443 Uniform Model (default) + conf//input - BOOL Control whether packets can be input on this interface. diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h index d29203651c01..08652eedabb2 100644 --- a/include/net/netns/mpls.h +++ b/include/net/netns/mpls.h @@ -9,8 +9,10 @@ struct mpls_route; struct ctl_table_header; struct netns_mpls { + int ip_ttl_propagate; size_t platform_labels; struct mpls_route __rcu * __rcu *platform_label; + struct ctl_table_header *ctl; }; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 75fcf5eff093..3dd72aee4d32 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -319,6 +319,7 @@ enum rtattr_type_t { RTA_EXPIRES, RTA_PAD, RTA_UID, + RTA_TTL_PROPAGATE, __RTA_MAX }; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 3818686182b2..0e1046f21af8 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -32,6 +32,7 @@ #define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1) static int zero = 0; +static int one = 1; static int label_limit = (1 << 20) - 1; static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, @@ -220,8 +221,8 @@ out: return &rt->rt_nh[nh_index]; } -static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, - struct mpls_entry_decoded dec) +static bool mpls_egress(struct net *net, struct mpls_route *rt, + struct sk_buff *skb, struct mpls_entry_decoded dec) { enum mpls_payload_type payload_type; bool success = false; @@ -246,22 +247,46 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, switch (payload_type) { case MPT_IPV4: { struct iphdr *hdr4 = ip_hdr(skb); + u8 new_ttl; skb->protocol = htons(ETH_P_IP); + + /* If propagating TTL, take the decremented TTL from + * the incoming MPLS header, otherwise decrement the + * TTL, but only if not 0 to avoid underflow. + */ + if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED || + (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT && + net->mpls.ip_ttl_propagate)) + new_ttl = dec.ttl; + else + new_ttl = hdr4->ttl ? hdr4->ttl - 1 : 0; + csum_replace2(&hdr4->check, htons(hdr4->ttl << 8), - htons(dec.ttl << 8)); - hdr4->ttl = dec.ttl; + htons(new_ttl << 8)); + hdr4->ttl = new_ttl; success = true; break; } case MPT_IPV6: { struct ipv6hdr *hdr6 = ipv6_hdr(skb); skb->protocol = htons(ETH_P_IPV6); - hdr6->hop_limit = dec.ttl; + + /* If propagating TTL, take the decremented TTL from + * the incoming MPLS header, otherwise decrement the + * hop limit, but only if not 0 to avoid underflow. + */ + if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED || + (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT && + net->mpls.ip_ttl_propagate)) + hdr6->hop_limit = dec.ttl; + else if (hdr6->hop_limit) + hdr6->hop_limit = hdr6->hop_limit - 1; success = true; break; } case MPT_UNSPEC: + /* Should have decided which protocol it is by now */ break; } @@ -361,7 +386,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, if (unlikely(!new_header_size && dec.bos)) { /* Penultimate hop popping */ - if (!mpls_egress(rt, skb, dec)) + if (!mpls_egress(dev_net(out_dev), rt, skb, dec)) goto err; } else { bool bos; @@ -412,6 +437,7 @@ static struct packet_type mpls_packet_type __read_mostly = { static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = { [RTA_DST] = { .type = NLA_U32 }, [RTA_OIF] = { .type = NLA_U32 }, + [RTA_TTL_PROPAGATE] = { .type = NLA_U8 }, }; struct mpls_route_config { @@ -421,6 +447,7 @@ struct mpls_route_config { u8 rc_via_alen; u8 rc_via[MAX_VIA_ALEN]; u32 rc_label; + u8 rc_ttl_propagate; u8 rc_output_labels; u32 rc_output_label[MAX_NEW_LABELS]; u32 rc_nlflags; @@ -856,6 +883,7 @@ static int mpls_route_add(struct mpls_route_config *cfg) rt->rt_protocol = cfg->rc_protocol; rt->rt_payload_type = cfg->rc_payload_type; + rt->rt_ttl_propagate = cfg->rc_ttl_propagate; if (cfg->rc_mp) err = mpls_nh_build_multi(cfg, rt); @@ -1576,6 +1604,7 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->rc_label = LABEL_NOT_SPECIFIED; cfg->rc_protocol = rtm->rtm_protocol; cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC; + cfg->rc_ttl_propagate = MPLS_TTL_PROP_DEFAULT; cfg->rc_nlflags = nlh->nlmsg_flags; cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->rc_nlinfo.nlh = nlh; @@ -1622,6 +1651,17 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->rc_mp_len = nla_len(nla); break; } + case RTA_TTL_PROPAGATE: + { + u8 ttl_propagate = nla_get_u8(nla); + + if (ttl_propagate > 1) + goto errout; + cfg->rc_ttl_propagate = ttl_propagate ? + MPLS_TTL_PROP_ENABLED : + MPLS_TTL_PROP_DISABLED; + break; + } default: /* Unsupported attribute */ goto errout; @@ -1682,6 +1722,15 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, if (nla_put_labels(skb, RTA_DST, 1, &label)) goto nla_put_failure; + + if (rt->rt_ttl_propagate != MPLS_TTL_PROP_DEFAULT) { + bool ttl_propagate = + rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED; + + if (nla_put_u8(skb, RTA_TTL_PROPAGATE, + ttl_propagate)) + goto nla_put_failure; + } if (rt->rt_nhn == 1) { const struct mpls_nh *nh = rt->rt_nh; @@ -1792,7 +1841,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt) { size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) - + nla_total_size(4); /* RTA_DST */ + + nla_total_size(4) /* RTA_DST */ + + nla_total_size(1); /* RTA_TTL_PROPAGATE */ if (rt->rt_nhn == 1) { struct mpls_nh *nh = rt->rt_nh; @@ -1876,6 +1926,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo); rt0->rt_protocol = RTPROT_KERNEL; rt0->rt_payload_type = MPT_IPV4; + rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE; rt0->rt_nh->nh_via_alen = lo->addr_len; memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr, @@ -1889,6 +1940,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo); rt2->rt_protocol = RTPROT_KERNEL; rt2->rt_payload_type = MPT_IPV6; + rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE; rt2->rt_nh->nh_via_alen = lo->addr_len; memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr, @@ -1970,6 +2022,9 @@ static int mpls_platform_labels(struct ctl_table *table, int write, return ret; } +#define MPLS_NS_SYSCTL_OFFSET(field) \ + (&((struct net *)0)->field) + static const struct ctl_table mpls_table[] = { { .procname = "platform_labels", @@ -1978,21 +2033,37 @@ static const struct ctl_table mpls_table[] = { .mode = 0644, .proc_handler = mpls_platform_labels, }, + { + .procname = "ip_ttl_propagate", + .data = MPLS_NS_SYSCTL_OFFSET(mpls.ip_ttl_propagate), + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, { } }; static int mpls_net_init(struct net *net) { struct ctl_table *table; + int i; net->mpls.platform_labels = 0; net->mpls.platform_label = NULL; + net->mpls.ip_ttl_propagate = 1; table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL); if (table == NULL) return -ENOMEM; - table[0].data = net; + /* Table data contains only offsets relative to the base of + * the mdev at this point, so make them absolute. + */ + for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++) + table[i].data = (char *)net + (uintptr_t)table[i].data; + net->mpls.ctl = register_net_sysctl(net, "net/mpls", table); if (net->mpls.ctl == NULL) { kfree(table); diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 76360d8b9579..62928d8fabd1 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -90,6 +90,12 @@ struct mpls_nh { /* next hop label forwarding entry */ u8 nh_via_table; }; +enum mpls_ttl_propagation { + MPLS_TTL_PROP_DEFAULT, + MPLS_TTL_PROP_ENABLED, + MPLS_TTL_PROP_DISABLED, +}; + /* The route, nexthops and vias are stored together in the same memory * block: * @@ -116,6 +122,7 @@ struct mpls_route { /* next hop label forwarding entry */ u8 rt_protocol; u8 rt_payload_type; u8 rt_max_alen; + u8 rt_ttl_propagate; unsigned int rt_nhn; unsigned int rt_nhn_alive; struct mpls_nh rt_nh[0]; -- cgit v1.2.3-71-gd317 From a59166e470868d92f0813977817e99e699398af5 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Fri, 10 Mar 2017 20:43:25 +0000 Subject: mpls: allow TTL propagation from IP packets to be configured Allow TTL propagation from IP packets to MPLS packets to be configured. Add a new optional LWT attribute, MPLS_IPTUNNEL_TTL, which allows the TTL to be set in the resulting MPLS packet, with the value of 0 having the semantics of enabling propagation of the TTL from the IP header (i.e. non-zero values disable propagation). Also allow the configuration to be overridden globally by reusing the same sysctl to control whether the TTL is propagated from IP packets into the MPLS header. If the per-LWT attribute is set then it overrides the global configuration. If the TTL isn't propagated then a default TTL value is used which can be configured via a new sysctl, "net.mpls.default_ttl". This is kept separate from the configuration of whether IP TTL propagation is enabled as it can be used in the future when non-IP payloads are supported (i.e. where there is no payload TTL that can be propagated). Signed-off-by: Robert Shearman Acked-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/mpls-sysctl.txt | 8 ++++ include/net/mpls_iptunnel.h | 2 + include/net/netns/mpls.h | 1 + include/uapi/linux/mpls_iptunnel.h | 2 + net/mpls/af_mpls.c | 11 +++++ net/mpls/mpls_iptunnel.c | 73 ++++++++++++++++++++++++++------ 6 files changed, 84 insertions(+), 13 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt index 9badd1d6685f..2f24a1912a48 100644 --- a/Documentation/networking/mpls-sysctl.txt +++ b/Documentation/networking/mpls-sysctl.txt @@ -30,6 +30,14 @@ ip_ttl_propagate - BOOL 0 - disabled / RFC 3443 [Short] Pipe Model 1 - enabled / RFC 3443 Uniform Model (default) +default_ttl - BOOL + Default TTL value to use for MPLS packets where it cannot be + propagated from an IP header, either because one isn't present + or ip_ttl_propagate has been disabled. + + Possible values: 1 - 255 + Default: 255 + conf//input - BOOL Control whether packets can be input on this interface. diff --git a/include/net/mpls_iptunnel.h b/include/net/mpls_iptunnel.h index 179253f9dcfd..a18af6a16eb5 100644 --- a/include/net/mpls_iptunnel.h +++ b/include/net/mpls_iptunnel.h @@ -19,6 +19,8 @@ struct mpls_iptunnel_encap { u32 label[MAX_NEW_LABELS]; u8 labels; + u8 ttl_propagate; + u8 default_ttl; }; static inline struct mpls_iptunnel_encap *mpls_lwtunnel_encap(struct lwtunnel_state *lwtstate) diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h index 08652eedabb2..6608b3693385 100644 --- a/include/net/netns/mpls.h +++ b/include/net/netns/mpls.h @@ -10,6 +10,7 @@ struct ctl_table_header; struct netns_mpls { int ip_ttl_propagate; + int default_ttl; size_t platform_labels; struct mpls_route __rcu * __rcu *platform_label; diff --git a/include/uapi/linux/mpls_iptunnel.h b/include/uapi/linux/mpls_iptunnel.h index d80a0498f77e..f5e45095b0bb 100644 --- a/include/uapi/linux/mpls_iptunnel.h +++ b/include/uapi/linux/mpls_iptunnel.h @@ -16,11 +16,13 @@ /* MPLS tunnel attributes * [RTA_ENCAP] = { * [MPLS_IPTUNNEL_DST] + * [MPLS_IPTUNNEL_TTL] * } */ enum { MPLS_IPTUNNEL_UNSPEC, MPLS_IPTUNNEL_DST, + MPLS_IPTUNNEL_TTL, __MPLS_IPTUNNEL_MAX, }; #define MPLS_IPTUNNEL_MAX (__MPLS_IPTUNNEL_MAX - 1) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 0e1046f21af8..0c5d111abe36 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -34,6 +34,7 @@ static int zero = 0; static int one = 1; static int label_limit = (1 << 20) - 1; +static int ttl_max = 255; static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, struct nlmsghdr *nlh, struct net *net, u32 portid, @@ -2042,6 +2043,15 @@ static const struct ctl_table mpls_table[] = { .extra1 = &zero, .extra2 = &one, }, + { + .procname = "default_ttl", + .data = MPLS_NS_SYSCTL_OFFSET(mpls.default_ttl), + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &ttl_max, + }, { } }; @@ -2053,6 +2063,7 @@ static int mpls_net_init(struct net *net) net->mpls.platform_labels = 0; net->mpls.platform_label = NULL; net->mpls.ip_ttl_propagate = 1; + net->mpls.default_ttl = 255; table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL); if (table == NULL) diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index e4e4424f9eb1..22f71fce0bfb 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -29,6 +29,7 @@ static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = { [MPLS_IPTUNNEL_DST] = { .type = NLA_U32 }, + [MPLS_IPTUNNEL_TTL] = { .type = NLA_U8 }, }; static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en) @@ -49,6 +50,7 @@ static int mpls_xmit(struct sk_buff *skb) struct rtable *rt = NULL; struct rt6_info *rt6 = NULL; struct mpls_dev *out_mdev; + struct net *net; int err = 0; bool bos; int i; @@ -56,17 +58,7 @@ static int mpls_xmit(struct sk_buff *skb) /* Find the output device */ out_dev = dst->dev; - - /* Obtain the ttl */ - if (dst->ops->family == AF_INET) { - ttl = ip_hdr(skb)->ttl; - rt = (struct rtable *)dst; - } else if (dst->ops->family == AF_INET6) { - ttl = ipv6_hdr(skb)->hop_limit; - rt6 = (struct rt6_info *)dst; - } else { - goto drop; - } + net = dev_net(out_dev); skb_orphan(skb); @@ -78,6 +70,38 @@ static int mpls_xmit(struct sk_buff *skb) tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate); + /* Obtain the ttl using the following set of rules. + * + * LWT ttl propagation setting: + * - disabled => use default TTL value from LWT + * - enabled => use TTL value from IPv4/IPv6 header + * - default => + * Global ttl propagation setting: + * - disabled => use default TTL value from global setting + * - enabled => use TTL value from IPv4/IPv6 header + */ + if (dst->ops->family == AF_INET) { + if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED) + ttl = tun_encap_info->default_ttl; + else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT && + !net->mpls.ip_ttl_propagate) + ttl = net->mpls.default_ttl; + else + ttl = ip_hdr(skb)->ttl; + rt = (struct rtable *)dst; + } else if (dst->ops->family == AF_INET6) { + if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED) + ttl = tun_encap_info->default_ttl; + else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT && + !net->mpls.ip_ttl_propagate) + ttl = net->mpls.default_ttl; + else + ttl = ipv6_hdr(skb)->hop_limit; + rt6 = (struct rt6_info *)dst; + } else { + goto drop; + } + /* Verify the destination can hold the packet */ new_header_size = mpls_encap_size(tun_encap_info); mtu = mpls_dev_mtu(out_dev); @@ -160,6 +184,17 @@ static int mpls_build_state(struct nlattr *nla, &tun_encap_info->labels, tun_encap_info->label); if (ret) goto errout; + + tun_encap_info->ttl_propagate = MPLS_TTL_PROP_DEFAULT; + + if (tb[MPLS_IPTUNNEL_TTL]) { + tun_encap_info->default_ttl = nla_get_u8(tb[MPLS_IPTUNNEL_TTL]); + /* TTL 0 implies propagate from IP header */ + tun_encap_info->ttl_propagate = tun_encap_info->default_ttl ? + MPLS_TTL_PROP_DISABLED : + MPLS_TTL_PROP_ENABLED; + } + newts->type = LWTUNNEL_ENCAP_MPLS; newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT; newts->headroom = mpls_encap_size(tun_encap_info); @@ -186,6 +221,10 @@ static int mpls_fill_encap_info(struct sk_buff *skb, tun_encap_info->label)) goto nla_put_failure; + if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT && + nla_put_u8(skb, MPLS_IPTUNNEL_TTL, tun_encap_info->default_ttl)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -195,10 +234,16 @@ nla_put_failure: static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate) { struct mpls_iptunnel_encap *tun_encap_info; + int nlsize; tun_encap_info = mpls_lwtunnel_encap(lwtstate); - return nla_total_size(tun_encap_info->labels * 4); + nlsize = nla_total_size(tun_encap_info->labels * 4); + + if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT) + nlsize += nla_total_size(1); + + return nlsize; } static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) @@ -207,7 +252,9 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) struct mpls_iptunnel_encap *b_hdr = mpls_lwtunnel_encap(b); int l; - if (a_hdr->labels != b_hdr->labels) + if (a_hdr->labels != b_hdr->labels || + a_hdr->ttl_propagate != b_hdr->ttl_propagate || + a_hdr->default_ttl != b_hdr->default_ttl) return 1; for (l = 0; l < MAX_NEW_LABELS; l++) -- cgit v1.2.3-71-gd317 From 6f051e4a685b768f3704c7c069aa1edee3010622 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Wed, 8 Mar 2017 19:03:03 +0100 Subject: eventpoll.h: fix epoll event masks [resend due to me forgetting to cc: linux-api the first time around I posted these back on Feb 23] From: Greg Kroah-Hartman When userspace tries to use these defines, it complains that it needs to be an unsigned 1 that is shifted, so libc implementations have to create their own version. Fix this by defining it properly so that libcs can just use the kernel uapi header. Reported-by: Elliott Hughes Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/eventpoll.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h index 1c3154913a39..a7046c227e86 100644 --- a/include/uapi/linux/eventpoll.h +++ b/include/uapi/linux/eventpoll.h @@ -27,7 +27,7 @@ #define EPOLL_CTL_MOD 3 /* Set exclusive wakeup mode for the target file descriptor */ -#define EPOLLEXCLUSIVE (1 << 28) +#define EPOLLEXCLUSIVE (1U << 28) /* * Request the handling of system wakeup events so as to prevent system suspends @@ -39,13 +39,13 @@ * * Requires CAP_BLOCK_SUSPEND */ -#define EPOLLWAKEUP (1 << 29) +#define EPOLLWAKEUP (1U << 29) /* Set the One Shot behaviour for the target file descriptor */ -#define EPOLLONESHOT (1 << 30) +#define EPOLLONESHOT (1U << 30) /* Set the Edge Triggered behaviour for the target file descriptor */ -#define EPOLLET (1 << 31) +#define EPOLLET (1U << 31) /* * On x86-64 make the 64bit structure have the same alignment as the -- cgit v1.2.3-71-gd317 From 7e040726850a106587485c21bdacc0bfc8a0cbed Mon Sep 17 00:00:00 2001 From: Greg KH Date: Wed, 8 Mar 2017 19:03:44 +0100 Subject: eventpoll.h: add missing epoll event masks [resend due to me forgetting to cc: linux-api the first time around I posted these back on Feb 23] From: Greg Kroah-Hartman For some reason these values are not in the uapi header file, so any libc has to define it themselves. To prevent them from needing to do this, just have the kernel provide the correct values. Reported-by: Elliott Hughes Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/eventpoll.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h index a7046c227e86..f4d5c998cc2b 100644 --- a/include/uapi/linux/eventpoll.h +++ b/include/uapi/linux/eventpoll.h @@ -26,6 +26,19 @@ #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 +/* Epoll event masks */ +#define EPOLLIN 0x00000001 +#define EPOLLPRI 0x00000002 +#define EPOLLOUT 0x00000004 +#define EPOLLERR 0x00000008 +#define EPOLLHUP 0x00000010 +#define EPOLLRDNORM 0x00000040 +#define EPOLLRDBAND 0x00000080 +#define EPOLLWRNORM 0x00000100 +#define EPOLLWRBAND 0x00000200 +#define EPOLLMSG 0x00000400 +#define EPOLLRDHUP 0x00002000 + /* Set exclusive wakeup mode for the target file descriptor */ #define EPOLLEXCLUSIVE (1U << 28) -- cgit v1.2.3-71-gd317 From ba2b5277dc52cc96944d9765281bdf1e12681f66 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Fri, 9 Jan 2015 11:05:13 +0100 Subject: drm: add RGB formats with separate alpha plane Some hardware can read the alpha components separately and then conditionally fetch color components only for non-zero alpha values. This patch adds fourcc definitions for two-plane RGB formats with an 8-bit alpha channel on a second plane. Signed-off-by: Philipp Zabel --- drivers/gpu/drm/drm_fourcc.c | 8 ++++++++ include/uapi/drm/drm_fourcc.h | 14 ++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c index 90d2cc8da8eb..92bf3306d4b3 100644 --- a/drivers/gpu/drm/drm_fourcc.c +++ b/drivers/gpu/drm/drm_fourcc.c @@ -132,6 +132,8 @@ const struct drm_format_info *__drm_format_info(u32 format) { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_RGBX8888, .depth = 24, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_BGRX8888, .depth = 24, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 }, + { .format = DRM_FORMAT_RGB565_A8, .depth = 24, .num_planes = 2, .cpp = { 2, 1, 0 }, .hsub = 1, .vsub = 1 }, + { .format = DRM_FORMAT_BGR565_A8, .depth = 24, .num_planes = 2, .cpp = { 2, 1, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_XRGB2101010, .depth = 30, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_XBGR2101010, .depth = 30, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_RGBX1010102, .depth = 30, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 }, @@ -144,6 +146,12 @@ const struct drm_format_info *__drm_format_info(u32 format) { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_RGBA8888, .depth = 32, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_BGRA8888, .depth = 32, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 }, + { .format = DRM_FORMAT_RGB888_A8, .depth = 32, .num_planes = 2, .cpp = { 3, 1, 0 }, .hsub = 1, .vsub = 1 }, + { .format = DRM_FORMAT_BGR888_A8, .depth = 32, .num_planes = 2, .cpp = { 3, 1, 0 }, .hsub = 1, .vsub = 1 }, + { .format = DRM_FORMAT_XRGB8888_A8, .depth = 32, .num_planes = 2, .cpp = { 4, 1, 0 }, .hsub = 1, .vsub = 1 }, + { .format = DRM_FORMAT_XBGR8888_A8, .depth = 32, .num_planes = 2, .cpp = { 4, 1, 0 }, .hsub = 1, .vsub = 1 }, + { .format = DRM_FORMAT_RGBX8888_A8, .depth = 32, .num_planes = 2, .cpp = { 4, 1, 0 }, .hsub = 1, .vsub = 1 }, + { .format = DRM_FORMAT_BGRX8888_A8, .depth = 32, .num_planes = 2, .cpp = { 4, 1, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_YUV410, .depth = 0, .num_planes = 3, .cpp = { 1, 1, 1 }, .hsub = 4, .vsub = 4 }, { .format = DRM_FORMAT_YVU410, .depth = 0, .num_planes = 3, .cpp = { 1, 1, 1 }, .hsub = 4, .vsub = 4 }, { .format = DRM_FORMAT_YUV411, .depth = 0, .num_planes = 3, .cpp = { 1, 1, 1 }, .hsub = 4, .vsub = 1 }, diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index ef20abb8119b..995c8f9c692f 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -113,6 +113,20 @@ extern "C" { #define DRM_FORMAT_AYUV fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */ +/* + * 2 plane RGB + A + * index 0 = RGB plane, same format as the corresponding non _A8 format has + * index 1 = A plane, [7:0] A + */ +#define DRM_FORMAT_XRGB8888_A8 fourcc_code('X', 'R', 'A', '8') +#define DRM_FORMAT_XBGR8888_A8 fourcc_code('X', 'B', 'A', '8') +#define DRM_FORMAT_RGBX8888_A8 fourcc_code('R', 'X', 'A', '8') +#define DRM_FORMAT_BGRX8888_A8 fourcc_code('B', 'X', 'A', '8') +#define DRM_FORMAT_RGB888_A8 fourcc_code('R', '8', 'A', '8') +#define DRM_FORMAT_BGR888_A8 fourcc_code('B', '8', 'A', '8') +#define DRM_FORMAT_RGB565_A8 fourcc_code('R', '5', 'A', '8') +#define DRM_FORMAT_BGR565_A8 fourcc_code('B', '5', 'A', '8') + /* * 2 plane YCbCr * index 0 = Y plane, [7:0] Y -- cgit v1.2.3-71-gd317 From 2026fecf516bc04df20cb50874957cd8c364fb4e Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 15 Mar 2017 10:39:18 -0700 Subject: mqprio: Change handling of hw u8 to allow for multiple hardware offload modes This patch is meant to allow for support of multiple hardware offload type for a single device. There is currently no bounds checking for the hw member of the mqprio_qopt structure. This results in us being able to pass values from 1 to 255 with all being treated the same. On retreiving the value it is returned as 1 for anything 1 or greater being set. With this change we are currently adding limited bounds checking by defining an enum and using those values to limit the reported hardware offloads. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 8 ++++++++ net/sched/sch_mqprio.c | 26 ++++++++++++++++---------- 2 files changed, 24 insertions(+), 10 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index df7451d35131..099bf5528fed 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -617,6 +617,14 @@ struct tc_drr_stats { #define TC_QOPT_BITMASK 15 #define TC_QOPT_MAX_QUEUE 16 +enum { + TC_MQPRIO_HW_OFFLOAD_NONE, /* no offload requested */ + TC_MQPRIO_HW_OFFLOAD_TCS, /* offload TCs, no queue counts */ + __TC_MQPRIO_HW_OFFLOAD_MAX +}; + +#define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1) + struct tc_mqprio_qopt { __u8 num_tc; __u8 prio_tc_map[TC_QOPT_BITMASK + 1]; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index b851e209da4d..5f55bf149d9f 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -21,7 +21,7 @@ struct mqprio_sched { struct Qdisc **qdiscs; - int hw_owned; + int hw_offload; }; static void mqprio_destroy(struct Qdisc *sch) @@ -39,7 +39,7 @@ static void mqprio_destroy(struct Qdisc *sch) kfree(priv->qdiscs); } - if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc) + if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); else netdev_set_num_tc(dev, 0); @@ -59,15 +59,20 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) return -EINVAL; } - /* net_device does not support requested operation */ - if (qopt->hw && !dev->netdev_ops->ndo_setup_tc) - return -EINVAL; + /* Limit qopt->hw to maximum supported offload value. Drivers have + * the option of overriding this later if they don't support the a + * given offload type. + */ + if (qopt->hw > TC_MQPRIO_HW_OFFLOAD_MAX) + qopt->hw = TC_MQPRIO_HW_OFFLOAD_MAX; - /* if hw owned qcount and qoffset are taken from LLD so - * no reason to verify them here + /* If hardware offload is requested we will leave it to the device + * to either populate the queue counts itself or to validate the + * provided queue counts. If ndo_setup_tc is not present then + * hardware doesn't support offload and we should return an error. */ if (qopt->hw) - return 0; + return dev->netdev_ops->ndo_setup_tc ? 0 : -EINVAL; for (i = 0; i < qopt->num_tc; i++) { unsigned int last = qopt->offset[i] + qopt->count[i]; @@ -142,10 +147,11 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO, { .tc = qopt->num_tc }}; - priv->hw_owned = 1; err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); if (err) return err; + + priv->hw_offload = qopt->hw; } else { netdev_set_num_tc(dev, qopt->num_tc); for (i = 0; i < qopt->num_tc; i++) @@ -243,7 +249,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) opt.num_tc = netdev_get_num_tc(dev); memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); - opt.hw = priv->hw_owned; + opt.hw = priv->hw_offload; for (i = 0; i < netdev_get_num_tc(dev); i++) { opt.count[i] = dev->tc_to_txq[i].count; -- cgit v1.2.3-71-gd317 From ae0c2d995d648d5165545d5e05e2869642009b38 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Mon, 20 Feb 2017 15:33:51 +0200 Subject: perf/core: Add a flag for partial AUX records The Intel PT driver needs to be able to communicate partial AUX transactions, that is, transactions with gaps in data for reasons other than no room left in the buffer (i.e. truncated transactions). Therefore, this condition does not imply a wakeup for the consumer. To this end, add a new "partial" AUX flag. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Poirier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/20170220133352.17995-4-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 1 + kernel/events/ring_buffer.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index bec0aad0e15c..d09a9cd021b1 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -915,6 +915,7 @@ enum perf_callchain_context { */ #define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ #define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 9654e55c38d6..2831480c63a2 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -424,8 +424,8 @@ err: */ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) { + bool wakeup = !!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED); struct ring_buffer *rb = handle->rb; - bool wakeup = !!handle->aux_flags; unsigned long aux_head; /* in overwrite mode, driver provides aux_head via handle */ -- cgit v1.2.3-71-gd317 From ea0213e0c7cc1c1b52badf27bd7db4f50a67baaa Mon Sep 17 00:00:00 2001 From: Artur Paszkiewicz Date: Thu, 9 Mar 2017 09:59:57 +0100 Subject: md: superblock changes for PPL Include information about PPL location and size into mdp_superblock_1 and copy it to/from rdev. Because PPL is mutually exclusive with bitmap, put it in place of 'bitmap_offset'. Add a new flag MD_FEATURE_PPL for 'feature_map', analogically to MD_FEATURE_BITMAP_OFFSET. Add MD_HAS_PPL to mddev->flags to indicate that PPL is enabled on an array. Signed-off-by: Artur Paszkiewicz Signed-off-by: Shaohua Li --- drivers/md/md.c | 19 +++++++++++++++++++ drivers/md/md.h | 8 ++++++++ drivers/md/raid0.c | 3 ++- drivers/md/raid1.c | 3 ++- include/uapi/linux/raid/md_p.h | 18 ++++++++++++++---- 5 files changed, 45 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/drivers/md/md.c b/drivers/md/md.c index 72ef3f18ac9a..d57045996d35 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1507,6 +1507,12 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ } else if (sb->bblog_offset != 0) rdev->badblocks.shift = 0; + if (le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) { + rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset); + rdev->ppl.size = le16_to_cpu(sb->ppl.size); + rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset; + } + if (!refdev) { ret = 1; } else { @@ -1619,6 +1625,13 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) set_bit(MD_HAS_JOURNAL, &mddev->flags); + + if (le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) { + if (le32_to_cpu(sb->feature_map) & + (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL)) + return -EINVAL; + set_bit(MD_HAS_PPL, &mddev->flags); + } } else if (mddev->pers == NULL) { /* Insist of good event counter while assembling, except for * spares (which don't need an event count) */ @@ -1832,6 +1845,12 @@ retry: if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL); + if (test_bit(MD_HAS_PPL, &mddev->flags)) { + sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL); + sb->ppl.offset = cpu_to_le16(rdev->ppl.offset); + sb->ppl.size = cpu_to_le16(rdev->ppl.size); + } + rdev_for_each(rdev2, mddev) { i = rdev2->desc_nr; if (test_bit(Faulty, &rdev2->flags)) diff --git a/drivers/md/md.h b/drivers/md/md.h index 1c00160b09f9..a7b2f16452c4 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -122,6 +122,13 @@ struct md_rdev { * sysfs entry */ struct badblocks badblocks; + + struct { + short offset; /* Offset from superblock to start of PPL. + * Not used by external metadata. */ + unsigned int size; /* Size in sectors of the PPL space */ + sector_t sector; /* First sector of the PPL space */ + } ppl; }; enum flag_bits { Faulty, /* device is known to have a fault */ @@ -226,6 +233,7 @@ enum mddev_flags { * supported as calls to md_error() will * never cause the array to become failed. */ + MD_HAS_PPL, /* The raid array has PPL feature set */ }; enum mddev_sb_flags { diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 93347ca7c7a6..56f70c3ad37c 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -29,7 +29,8 @@ #define UNSUPPORTED_MDDEV_FLAGS \ ((1L << MD_HAS_JOURNAL) | \ (1L << MD_JOURNAL_CLEAN) | \ - (1L << MD_FAILFAST_SUPPORTED)) + (1L << MD_FAILFAST_SUPPORTED) |\ + (1L << MD_HAS_PPL)) static int raid0_congested(struct mddev *mddev, int bits) { diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a34f58772022..730e57259af9 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -47,7 +47,8 @@ #define UNSUPPORTED_MDDEV_FLAGS \ ((1L << MD_HAS_JOURNAL) | \ - (1L << MD_JOURNAL_CLEAN)) + (1L << MD_JOURNAL_CLEAN) | \ + (1L << MD_HAS_PPL)) /* * Number of guaranteed r1bios in case of extreme VM load: diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index 9930f3e9040f..fe2112810c43 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -242,10 +242,18 @@ struct mdp_superblock_1 { __le32 chunksize; /* in 512byte sectors */ __le32 raid_disks; - __le32 bitmap_offset; /* sectors after start of superblock that bitmap starts - * NOTE: signed, so bitmap can be before superblock - * only meaningful of feature_map[0] is set. - */ + union { + __le32 bitmap_offset; /* sectors after start of superblock that bitmap starts + * NOTE: signed, so bitmap can be before superblock + * only meaningful of feature_map[0] is set. + */ + + /* only meaningful when feature_map[MD_FEATURE_PPL] is set */ + struct { + __le16 offset; /* sectors from start of superblock that ppl starts (signed) */ + __le16 size; /* ppl size in sectors */ + } ppl; + }; /* These are only valid with feature bit '4' */ __le32 new_level; /* new level we are reshaping to */ @@ -318,6 +326,7 @@ struct mdp_superblock_1 { */ #define MD_FEATURE_CLUSTERED 256 /* clustered MD */ #define MD_FEATURE_JOURNAL 512 /* support write cache */ +#define MD_FEATURE_PPL 1024 /* support PPL */ #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ |MD_FEATURE_RECOVERY_OFFSET \ |MD_FEATURE_RESHAPE_ACTIVE \ @@ -328,6 +337,7 @@ struct mdp_superblock_1 { |MD_FEATURE_RECOVERY_BITMAP \ |MD_FEATURE_CLUSTERED \ |MD_FEATURE_JOURNAL \ + |MD_FEATURE_PPL \ ) struct r5l_payload_header { -- cgit v1.2.3-71-gd317 From 3418d036c81dcb604b7c7c71b209d5890a8418aa Mon Sep 17 00:00:00 2001 From: Artur Paszkiewicz Date: Thu, 9 Mar 2017 09:59:59 +0100 Subject: raid5-ppl: Partial Parity Log write logging implementation Implement the calculation of partial parity for a stripe and PPL write logging functionality. The description of PPL is added to the documentation. More details can be found in the comments in raid5-ppl.c. Attach a page for holding the partial parity data to stripe_head. Allocate it only if mddev has the MD_HAS_PPL flag set. Partial parity is the xor of not modified data chunks of a stripe and is calculated as follows: - reconstruct-write case: xor data from all not updated disks in a stripe - read-modify-write case: xor old data and parity from all updated disks in a stripe Implement it using the async_tx API and integrate into raid_run_ops(). It must be called when we still have access to old data, so do it when STRIPE_OP_BIODRAIN is set, but before ops_run_prexor5(). The result is stored into sh->ppl_page. Partial parity is not meaningful for full stripe write and is not stored in the log or used for recovery, so don't attempt to calculate it when stripe has STRIPE_FULL_WRITE. Put the PPL metadata structures to md_p.h because userspace tools (mdadm) will also need to read/write PPL. Warn about using PPL with enabled disk volatile write-back cache for now. It can be removed once disk cache flushing before writing PPL is implemented. Signed-off-by: Artur Paszkiewicz Signed-off-by: Shaohua Li --- Documentation/md/raid5-ppl.txt | 44 +++ drivers/md/Makefile | 2 +- drivers/md/raid5-log.h | 24 ++ drivers/md/raid5-ppl.c | 703 +++++++++++++++++++++++++++++++++++++++++ drivers/md/raid5.c | 64 +++- drivers/md/raid5.h | 10 +- include/uapi/linux/raid/md_p.h | 27 ++ 7 files changed, 869 insertions(+), 5 deletions(-) create mode 100644 Documentation/md/raid5-ppl.txt create mode 100644 drivers/md/raid5-ppl.c (limited to 'include/uapi') diff --git a/Documentation/md/raid5-ppl.txt b/Documentation/md/raid5-ppl.txt new file mode 100644 index 000000000000..127072b09363 --- /dev/null +++ b/Documentation/md/raid5-ppl.txt @@ -0,0 +1,44 @@ +Partial Parity Log + +Partial Parity Log (PPL) is a feature available for RAID5 arrays. The issue +addressed by PPL is that after a dirty shutdown, parity of a particular stripe +may become inconsistent with data on other member disks. If the array is also +in degraded state, there is no way to recalculate parity, because one of the +disks is missing. This can lead to silent data corruption when rebuilding the +array or using it is as degraded - data calculated from parity for array blocks +that have not been touched by a write request during the unclean shutdown can +be incorrect. Such condition is known as the RAID5 Write Hole. Because of +this, md by default does not allow starting a dirty degraded array. + +Partial parity for a write operation is the XOR of stripe data chunks not +modified by this write. It is just enough data needed for recovering from the +write hole. XORing partial parity with the modified chunks produces parity for +the stripe, consistent with its state before the write operation, regardless of +which chunk writes have completed. If one of the not modified data disks of +this stripe is missing, this updated parity can be used to recover its +contents. PPL recovery is also performed when starting an array after an +unclean shutdown and all disks are available, eliminating the need to resync +the array. Because of this, using write-intent bitmap and PPL together is not +supported. + +When handling a write request PPL writes partial parity before new data and +parity are dispatched to disks. PPL is a distributed log - it is stored on +array member drives in the metadata area, on the parity drive of a particular +stripe. It does not require a dedicated journaling drive. Write performance is +reduced by up to 30%-40% but it scales with the number of drives in the array +and the journaling drive does not become a bottleneck or a single point of +failure. + +Unlike raid5-cache, the other solution in md for closing the write hole, PPL is +not a true journal. It does not protect from losing in-flight data, only from +silent data corruption. If a dirty disk of a stripe is lost, no PPL recovery is +performed for this stripe (parity is not updated). So it is possible to have +arbitrary data in the written part of a stripe if that disk is lost. In such +case the behavior is the same as in plain raid5. + +PPL is available for md version-1 metadata and external (specifically IMSM) +metadata arrays. It can be enabled using mdadm option --consistency-policy=ppl. + +Currently, volatile write-back cache should be disabled on all member drives +when using PPL. Otherwise it cannot guarantee consistency in case of power +failure. diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 3cbda1af87a0..4d48714ccc6b 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -18,7 +18,7 @@ dm-cache-cleaner-y += dm-cache-policy-cleaner.o dm-era-y += dm-era-target.o dm-verity-y += dm-verity-target.o md-mod-y += md.o bitmap.o -raid456-y += raid5.o raid5-cache.o +raid456-y += raid5.o raid5-cache.o raid5-ppl.o # Note: link order is important. All raid personalities # and must come before md.o, as they each initialise diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h index 2da4bd3bbd79..a67fb58513b9 100644 --- a/drivers/md/raid5-log.h +++ b/drivers/md/raid5-log.h @@ -31,6 +31,20 @@ extern struct md_sysfs_entry r5c_journal_mode; extern void r5c_update_on_rdev_error(struct mddev *mddev); extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect); +extern struct dma_async_tx_descriptor * +ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu, + struct dma_async_tx_descriptor *tx); +extern int ppl_init_log(struct r5conf *conf); +extern void ppl_exit_log(struct r5conf *conf); +extern int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh); +extern void ppl_write_stripe_run(struct r5conf *conf); +extern void ppl_stripe_write_finished(struct stripe_head *sh); + +static inline bool raid5_has_ppl(struct r5conf *conf) +{ + return test_bit(MD_HAS_PPL, &conf->mddev->flags); +} + static inline int log_stripe(struct stripe_head *sh, struct stripe_head_state *s) { struct r5conf *conf = sh->raid_conf; @@ -45,6 +59,8 @@ static inline int log_stripe(struct stripe_head *sh, struct stripe_head_state *s /* caching phase */ return r5c_cache_data(conf->log, sh); } + } else if (raid5_has_ppl(conf)) { + return ppl_write_stripe(conf, sh); } return -EAGAIN; @@ -56,24 +72,32 @@ static inline void log_stripe_write_finished(struct stripe_head *sh) if (conf->log) r5l_stripe_write_finished(sh); + else if (raid5_has_ppl(conf)) + ppl_stripe_write_finished(sh); } static inline void log_write_stripe_run(struct r5conf *conf) { if (conf->log) r5l_write_stripe_run(conf->log); + else if (raid5_has_ppl(conf)) + ppl_write_stripe_run(conf); } static inline void log_exit(struct r5conf *conf) { if (conf->log) r5l_exit_log(conf); + else if (raid5_has_ppl(conf)) + ppl_exit_log(conf); } static inline int log_init(struct r5conf *conf, struct md_rdev *journal_dev) { if (journal_dev) return r5l_init_log(conf, journal_dev); + else if (raid5_has_ppl(conf)) + return ppl_init_log(conf); return 0; } diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c new file mode 100644 index 000000000000..db5b72b11594 --- /dev/null +++ b/drivers/md/raid5-ppl.c @@ -0,0 +1,703 @@ +/* + * Partial Parity Log for closing the RAID5 write hole + * Copyright (c) 2017, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "md.h" +#include "raid5.h" + +/* + * PPL consists of a 4KB header (struct ppl_header) and at least 128KB for + * partial parity data. The header contains an array of entries + * (struct ppl_header_entry) which describe the logged write requests. + * Partial parity for the entries comes after the header, written in the same + * sequence as the entries: + * + * Header + * entry0 + * ... + * entryN + * PP data + * PP for entry0 + * ... + * PP for entryN + * + * An entry describes one or more consecutive stripe_heads, up to a full + * stripe. The modifed raid data chunks form an m-by-n matrix, where m is the + * number of stripe_heads in the entry and n is the number of modified data + * disks. Every stripe_head in the entry must write to the same data disks. + * An example of a valid case described by a single entry (writes to the first + * stripe of a 4 disk array, 16k chunk size): + * + * sh->sector dd0 dd1 dd2 ppl + * +-----+-----+-----+ + * 0 | --- | --- | --- | +----+ + * 8 | -W- | -W- | --- | | pp | data_sector = 8 + * 16 | -W- | -W- | --- | | pp | data_size = 3 * 2 * 4k + * 24 | -W- | -W- | --- | | pp | pp_size = 3 * 4k + * +-----+-----+-----+ +----+ + * + * data_sector is the first raid sector of the modified data, data_size is the + * total size of modified data and pp_size is the size of partial parity for + * this entry. Entries for full stripe writes contain no partial parity + * (pp_size = 0), they only mark the stripes for which parity should be + * recalculated after an unclean shutdown. Every entry holds a checksum of its + * partial parity, the header also has a checksum of the header itself. + * + * A write request is always logged to the PPL instance stored on the parity + * disk of the corresponding stripe. For each member disk there is one ppl_log + * used to handle logging for this disk, independently from others. They are + * grouped in child_logs array in struct ppl_conf, which is assigned to + * r5conf->log_private. + * + * ppl_io_unit represents a full PPL write, header_page contains the ppl_header. + * PPL entries for logged stripes are added in ppl_log_stripe(). A stripe_head + * can be appended to the last entry if it meets the conditions for a valid + * entry described above, otherwise a new entry is added. Checksums of entries + * are calculated incrementally as stripes containing partial parity are being + * added. ppl_submit_iounit() calculates the checksum of the header and submits + * a bio containing the header page and partial parity pages (sh->ppl_page) for + * all stripes of the io_unit. When the PPL write completes, the stripes + * associated with the io_unit are released and raid5d starts writing their data + * and parity. When all stripes are written, the io_unit is freed and the next + * can be submitted. + * + * An io_unit is used to gather stripes until it is submitted or becomes full + * (if the maximum number of entries or size of PPL is reached). Another io_unit + * can't be submitted until the previous has completed (PPL and stripe + * data+parity is written). The log->io_list tracks all io_units of a log + * (for a single member disk). New io_units are added to the end of the list + * and the first io_unit is submitted, if it is not submitted already. + * The current io_unit accepting new stripes is always at the end of the list. + */ + +struct ppl_conf { + struct mddev *mddev; + + /* array of child logs, one for each raid disk */ + struct ppl_log *child_logs; + int count; + + int block_size; /* the logical block size used for data_sector + * in ppl_header_entry */ + u32 signature; /* raid array identifier */ + atomic64_t seq; /* current log write sequence number */ + + struct kmem_cache *io_kc; + mempool_t *io_pool; + struct bio_set *bs; + mempool_t *meta_pool; +}; + +struct ppl_log { + struct ppl_conf *ppl_conf; /* shared between all log instances */ + + struct md_rdev *rdev; /* array member disk associated with + * this log instance */ + struct mutex io_mutex; + struct ppl_io_unit *current_io; /* current io_unit accepting new data + * always at the end of io_list */ + spinlock_t io_list_lock; + struct list_head io_list; /* all io_units of this log */ + struct list_head no_mem_stripes;/* stripes to retry if failed to + * allocate io_unit */ +}; + +#define PPL_IO_INLINE_BVECS 32 + +struct ppl_io_unit { + struct ppl_log *log; + + struct page *header_page; /* for ppl_header */ + + unsigned int entries_count; /* number of entries in ppl_header */ + unsigned int pp_size; /* total size current of partial parity */ + + u64 seq; /* sequence number of this log write */ + struct list_head log_sibling; /* log->io_list */ + + struct list_head stripe_list; /* stripes added to the io_unit */ + atomic_t pending_stripes; /* how many stripes not written to raid */ + + bool submitted; /* true if write to log started */ + + /* inline bio and its biovec for submitting the iounit */ + struct bio bio; + struct bio_vec biovec[PPL_IO_INLINE_BVECS]; +}; + +struct dma_async_tx_descriptor * +ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu, + struct dma_async_tx_descriptor *tx) +{ + int disks = sh->disks; + struct page **xor_srcs = flex_array_get(percpu->scribble, 0); + int count = 0, pd_idx = sh->pd_idx, i; + struct async_submit_ctl submit; + + pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); + + /* + * Partial parity is the XOR of stripe data chunks that are not changed + * during the write request. Depending on available data + * (read-modify-write vs. reconstruct-write case) we calculate it + * differently. + */ + if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) { + /* rmw: xor old data and parity from updated disks */ + for (i = disks; i--;) { + struct r5dev *dev = &sh->dev[i]; + if (test_bit(R5_Wantdrain, &dev->flags) || i == pd_idx) + xor_srcs[count++] = dev->page; + } + } else if (sh->reconstruct_state == reconstruct_state_drain_run) { + /* rcw: xor data from all not updated disks */ + for (i = disks; i--;) { + struct r5dev *dev = &sh->dev[i]; + if (test_bit(R5_UPTODATE, &dev->flags)) + xor_srcs[count++] = dev->page; + } + } else { + return tx; + } + + init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, tx, + NULL, sh, flex_array_get(percpu->scribble, 0) + + sizeof(struct page *) * (sh->disks + 2)); + + if (count == 1) + tx = async_memcpy(sh->ppl_page, xor_srcs[0], 0, 0, PAGE_SIZE, + &submit); + else + tx = async_xor(sh->ppl_page, xor_srcs, 0, count, PAGE_SIZE, + &submit); + + return tx; +} + +static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log, + struct stripe_head *sh) +{ + struct ppl_conf *ppl_conf = log->ppl_conf; + struct ppl_io_unit *io; + struct ppl_header *pplhdr; + + io = mempool_alloc(ppl_conf->io_pool, GFP_ATOMIC); + if (!io) + return NULL; + + memset(io, 0, sizeof(*io)); + io->log = log; + INIT_LIST_HEAD(&io->log_sibling); + INIT_LIST_HEAD(&io->stripe_list); + atomic_set(&io->pending_stripes, 0); + bio_init(&io->bio, io->biovec, PPL_IO_INLINE_BVECS); + + io->header_page = mempool_alloc(ppl_conf->meta_pool, GFP_NOIO); + pplhdr = page_address(io->header_page); + clear_page(pplhdr); + memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED); + pplhdr->signature = cpu_to_le32(ppl_conf->signature); + + io->seq = atomic64_add_return(1, &ppl_conf->seq); + pplhdr->generation = cpu_to_le64(io->seq); + + return io; +} + +static int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh) +{ + struct ppl_io_unit *io = log->current_io; + struct ppl_header_entry *e = NULL; + struct ppl_header *pplhdr; + int i; + sector_t data_sector = 0; + int data_disks = 0; + unsigned int entry_space = (log->rdev->ppl.size << 9) - PPL_HEADER_SIZE; + struct r5conf *conf = sh->raid_conf; + + pr_debug("%s: stripe: %llu\n", __func__, (unsigned long long)sh->sector); + + /* check if current io_unit is full */ + if (io && (io->pp_size == entry_space || + io->entries_count == PPL_HDR_MAX_ENTRIES)) { + pr_debug("%s: add io_unit blocked by seq: %llu\n", + __func__, io->seq); + io = NULL; + } + + /* add a new unit if there is none or the current is full */ + if (!io) { + io = ppl_new_iounit(log, sh); + if (!io) + return -ENOMEM; + spin_lock_irq(&log->io_list_lock); + list_add_tail(&io->log_sibling, &log->io_list); + spin_unlock_irq(&log->io_list_lock); + + log->current_io = io; + } + + for (i = 0; i < sh->disks; i++) { + struct r5dev *dev = &sh->dev[i]; + + if (i != sh->pd_idx && test_bit(R5_Wantwrite, &dev->flags)) { + if (!data_disks || dev->sector < data_sector) + data_sector = dev->sector; + data_disks++; + } + } + BUG_ON(!data_disks); + + pr_debug("%s: seq: %llu data_sector: %llu data_disks: %d\n", __func__, + io->seq, (unsigned long long)data_sector, data_disks); + + pplhdr = page_address(io->header_page); + + if (io->entries_count > 0) { + struct ppl_header_entry *last = + &pplhdr->entries[io->entries_count - 1]; + struct stripe_head *sh_last = list_last_entry( + &io->stripe_list, struct stripe_head, log_list); + u64 data_sector_last = le64_to_cpu(last->data_sector); + u32 data_size_last = le32_to_cpu(last->data_size); + + /* + * Check if we can append the stripe to the last entry. It must + * be just after the last logged stripe and write to the same + * disks. Use bit shift and logarithm to avoid 64-bit division. + */ + if ((sh->sector == sh_last->sector + STRIPE_SECTORS) && + (data_sector >> ilog2(conf->chunk_sectors) == + data_sector_last >> ilog2(conf->chunk_sectors)) && + ((data_sector - data_sector_last) * data_disks == + data_size_last >> 9)) + e = last; + } + + if (!e) { + e = &pplhdr->entries[io->entries_count++]; + e->data_sector = cpu_to_le64(data_sector); + e->parity_disk = cpu_to_le32(sh->pd_idx); + e->checksum = cpu_to_le32(~0); + } + + le32_add_cpu(&e->data_size, data_disks << PAGE_SHIFT); + + /* don't write any PP if full stripe write */ + if (!test_bit(STRIPE_FULL_WRITE, &sh->state)) { + le32_add_cpu(&e->pp_size, PAGE_SIZE); + io->pp_size += PAGE_SIZE; + e->checksum = cpu_to_le32(crc32c_le(le32_to_cpu(e->checksum), + page_address(sh->ppl_page), + PAGE_SIZE)); + } + + list_add_tail(&sh->log_list, &io->stripe_list); + atomic_inc(&io->pending_stripes); + sh->ppl_io = io; + + return 0; +} + +int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh) +{ + struct ppl_conf *ppl_conf = conf->log_private; + struct ppl_io_unit *io = sh->ppl_io; + struct ppl_log *log; + + if (io || test_bit(STRIPE_SYNCING, &sh->state) || + !test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags) || + !test_bit(R5_Insync, &sh->dev[sh->pd_idx].flags)) { + clear_bit(STRIPE_LOG_TRAPPED, &sh->state); + return -EAGAIN; + } + + log = &ppl_conf->child_logs[sh->pd_idx]; + + mutex_lock(&log->io_mutex); + + if (!log->rdev || test_bit(Faulty, &log->rdev->flags)) { + mutex_unlock(&log->io_mutex); + return -EAGAIN; + } + + set_bit(STRIPE_LOG_TRAPPED, &sh->state); + clear_bit(STRIPE_DELAYED, &sh->state); + atomic_inc(&sh->count); + + if (ppl_log_stripe(log, sh)) { + spin_lock_irq(&log->io_list_lock); + list_add_tail(&sh->log_list, &log->no_mem_stripes); + spin_unlock_irq(&log->io_list_lock); + } + + mutex_unlock(&log->io_mutex); + + return 0; +} + +static void ppl_log_endio(struct bio *bio) +{ + struct ppl_io_unit *io = bio->bi_private; + struct ppl_log *log = io->log; + struct ppl_conf *ppl_conf = log->ppl_conf; + struct stripe_head *sh, *next; + + pr_debug("%s: seq: %llu\n", __func__, io->seq); + + if (bio->bi_error) + md_error(ppl_conf->mddev, log->rdev); + + mempool_free(io->header_page, ppl_conf->meta_pool); + + list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) { + list_del_init(&sh->log_list); + + set_bit(STRIPE_HANDLE, &sh->state); + raid5_release_stripe(sh); + } +} + +static void ppl_submit_iounit_bio(struct ppl_io_unit *io, struct bio *bio) +{ + char b[BDEVNAME_SIZE]; + + pr_debug("%s: seq: %llu size: %u sector: %llu dev: %s\n", + __func__, io->seq, bio->bi_iter.bi_size, + (unsigned long long)bio->bi_iter.bi_sector, + bdevname(bio->bi_bdev, b)); + + submit_bio(bio); +} + +static void ppl_submit_iounit(struct ppl_io_unit *io) +{ + struct ppl_log *log = io->log; + struct ppl_conf *ppl_conf = log->ppl_conf; + struct ppl_header *pplhdr = page_address(io->header_page); + struct bio *bio = &io->bio; + struct stripe_head *sh; + int i; + + for (i = 0; i < io->entries_count; i++) { + struct ppl_header_entry *e = &pplhdr->entries[i]; + + pr_debug("%s: seq: %llu entry: %d data_sector: %llu pp_size: %u data_size: %u\n", + __func__, io->seq, i, le64_to_cpu(e->data_sector), + le32_to_cpu(e->pp_size), le32_to_cpu(e->data_size)); + + e->data_sector = cpu_to_le64(le64_to_cpu(e->data_sector) >> + ilog2(ppl_conf->block_size >> 9)); + e->checksum = cpu_to_le32(~le32_to_cpu(e->checksum)); + } + + pplhdr->entries_count = cpu_to_le32(io->entries_count); + pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE)); + + bio->bi_private = io; + bio->bi_end_io = ppl_log_endio; + bio->bi_opf = REQ_OP_WRITE | REQ_FUA; + bio->bi_bdev = log->rdev->bdev; + bio->bi_iter.bi_sector = log->rdev->ppl.sector; + bio_add_page(bio, io->header_page, PAGE_SIZE, 0); + + list_for_each_entry(sh, &io->stripe_list, log_list) { + /* entries for full stripe writes have no partial parity */ + if (test_bit(STRIPE_FULL_WRITE, &sh->state)) + continue; + + if (!bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0)) { + struct bio *prev = bio; + + bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, + ppl_conf->bs); + bio->bi_opf = prev->bi_opf; + bio->bi_bdev = prev->bi_bdev; + bio->bi_iter.bi_sector = bio_end_sector(prev); + bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0); + + bio_chain(bio, prev); + ppl_submit_iounit_bio(io, prev); + } + } + + ppl_submit_iounit_bio(io, bio); +} + +static void ppl_submit_current_io(struct ppl_log *log) +{ + struct ppl_io_unit *io; + + spin_lock_irq(&log->io_list_lock); + + io = list_first_entry_or_null(&log->io_list, struct ppl_io_unit, + log_sibling); + if (io && io->submitted) + io = NULL; + + spin_unlock_irq(&log->io_list_lock); + + if (io) { + io->submitted = true; + + if (io == log->current_io) + log->current_io = NULL; + + ppl_submit_iounit(io); + } +} + +void ppl_write_stripe_run(struct r5conf *conf) +{ + struct ppl_conf *ppl_conf = conf->log_private; + struct ppl_log *log; + int i; + + for (i = 0; i < ppl_conf->count; i++) { + log = &ppl_conf->child_logs[i]; + + mutex_lock(&log->io_mutex); + ppl_submit_current_io(log); + mutex_unlock(&log->io_mutex); + } +} + +static void ppl_io_unit_finished(struct ppl_io_unit *io) +{ + struct ppl_log *log = io->log; + unsigned long flags; + + pr_debug("%s: seq: %llu\n", __func__, io->seq); + + spin_lock_irqsave(&log->io_list_lock, flags); + + list_del(&io->log_sibling); + mempool_free(io, log->ppl_conf->io_pool); + + if (!list_empty(&log->no_mem_stripes)) { + struct stripe_head *sh = list_first_entry(&log->no_mem_stripes, + struct stripe_head, + log_list); + list_del_init(&sh->log_list); + set_bit(STRIPE_HANDLE, &sh->state); + raid5_release_stripe(sh); + } + + spin_unlock_irqrestore(&log->io_list_lock, flags); +} + +void ppl_stripe_write_finished(struct stripe_head *sh) +{ + struct ppl_io_unit *io; + + io = sh->ppl_io; + sh->ppl_io = NULL; + + if (io && atomic_dec_and_test(&io->pending_stripes)) + ppl_io_unit_finished(io); +} + +static void __ppl_exit_log(struct ppl_conf *ppl_conf) +{ + clear_bit(MD_HAS_PPL, &ppl_conf->mddev->flags); + + kfree(ppl_conf->child_logs); + + mempool_destroy(ppl_conf->meta_pool); + if (ppl_conf->bs) + bioset_free(ppl_conf->bs); + mempool_destroy(ppl_conf->io_pool); + kmem_cache_destroy(ppl_conf->io_kc); + + kfree(ppl_conf); +} + +void ppl_exit_log(struct r5conf *conf) +{ + struct ppl_conf *ppl_conf = conf->log_private; + + if (ppl_conf) { + __ppl_exit_log(ppl_conf); + conf->log_private = NULL; + } +} + +static int ppl_validate_rdev(struct md_rdev *rdev) +{ + char b[BDEVNAME_SIZE]; + int ppl_data_sectors; + int ppl_size_new; + + /* + * The configured PPL size must be enough to store + * the header and (at the very least) partial parity + * for one stripe. Round it down to ensure the data + * space is cleanly divisible by stripe size. + */ + ppl_data_sectors = rdev->ppl.size - (PPL_HEADER_SIZE >> 9); + + if (ppl_data_sectors > 0) + ppl_data_sectors = rounddown(ppl_data_sectors, STRIPE_SECTORS); + + if (ppl_data_sectors <= 0) { + pr_warn("md/raid:%s: PPL space too small on %s\n", + mdname(rdev->mddev), bdevname(rdev->bdev, b)); + return -ENOSPC; + } + + ppl_size_new = ppl_data_sectors + (PPL_HEADER_SIZE >> 9); + + if ((rdev->ppl.sector < rdev->data_offset && + rdev->ppl.sector + ppl_size_new > rdev->data_offset) || + (rdev->ppl.sector >= rdev->data_offset && + rdev->data_offset + rdev->sectors > rdev->ppl.sector)) { + pr_warn("md/raid:%s: PPL space overlaps with data on %s\n", + mdname(rdev->mddev), bdevname(rdev->bdev, b)); + return -EINVAL; + } + + if (!rdev->mddev->external && + ((rdev->ppl.offset > 0 && rdev->ppl.offset < (rdev->sb_size >> 9)) || + (rdev->ppl.offset <= 0 && rdev->ppl.offset + ppl_size_new > 0))) { + pr_warn("md/raid:%s: PPL space overlaps with superblock on %s\n", + mdname(rdev->mddev), bdevname(rdev->bdev, b)); + return -EINVAL; + } + + rdev->ppl.size = ppl_size_new; + + return 0; +} + +int ppl_init_log(struct r5conf *conf) +{ + struct ppl_conf *ppl_conf; + struct mddev *mddev = conf->mddev; + int ret = 0; + int i; + bool need_cache_flush; + + pr_debug("md/raid:%s: enabling distributed Partial Parity Log\n", + mdname(conf->mddev)); + + if (PAGE_SIZE != 4096) + return -EINVAL; + + if (mddev->level != 5) { + pr_warn("md/raid:%s PPL is not compatible with raid level %d\n", + mdname(mddev), mddev->level); + return -EINVAL; + } + + if (mddev->bitmap_info.file || mddev->bitmap_info.offset) { + pr_warn("md/raid:%s PPL is not compatible with bitmap\n", + mdname(mddev)); + return -EINVAL; + } + + if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { + pr_warn("md/raid:%s PPL is not compatible with journal\n", + mdname(mddev)); + return -EINVAL; + } + + ppl_conf = kzalloc(sizeof(struct ppl_conf), GFP_KERNEL); + if (!ppl_conf) + return -ENOMEM; + + ppl_conf->mddev = mddev; + + ppl_conf->io_kc = KMEM_CACHE(ppl_io_unit, 0); + if (!ppl_conf->io_kc) { + ret = -EINVAL; + goto err; + } + + ppl_conf->io_pool = mempool_create_slab_pool(conf->raid_disks, ppl_conf->io_kc); + if (!ppl_conf->io_pool) { + ret = -EINVAL; + goto err; + } + + ppl_conf->bs = bioset_create(conf->raid_disks, 0); + if (!ppl_conf->bs) { + ret = -EINVAL; + goto err; + } + + ppl_conf->meta_pool = mempool_create_page_pool(conf->raid_disks, 0); + if (!ppl_conf->meta_pool) { + ret = -EINVAL; + goto err; + } + + ppl_conf->count = conf->raid_disks; + ppl_conf->child_logs = kcalloc(ppl_conf->count, sizeof(struct ppl_log), + GFP_KERNEL); + if (!ppl_conf->child_logs) { + ret = -ENOMEM; + goto err; + } + + atomic64_set(&ppl_conf->seq, 0); + + if (!mddev->external) { + ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid)); + ppl_conf->block_size = 512; + } else { + ppl_conf->block_size = queue_logical_block_size(mddev->queue); + } + + for (i = 0; i < ppl_conf->count; i++) { + struct ppl_log *log = &ppl_conf->child_logs[i]; + struct md_rdev *rdev = conf->disks[i].rdev; + + mutex_init(&log->io_mutex); + spin_lock_init(&log->io_list_lock); + INIT_LIST_HEAD(&log->io_list); + INIT_LIST_HEAD(&log->no_mem_stripes); + + log->ppl_conf = ppl_conf; + log->rdev = rdev; + + if (rdev) { + struct request_queue *q; + + ret = ppl_validate_rdev(rdev); + if (ret) + goto err; + + q = bdev_get_queue(rdev->bdev); + if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) + need_cache_flush = true; + } + } + + if (need_cache_flush) + pr_warn("md/raid:%s: Volatile write-back cache should be disabled on all member drives when using PPL!\n", + mdname(mddev)); + + conf->log_private = ppl_conf; + + return 0; +err: + __ppl_exit_log(ppl_conf); + return ret; +} diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f575f40d2acb..6b86e0826afe 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -482,6 +482,11 @@ static void shrink_buffers(struct stripe_head *sh) sh->dev[i].page = NULL; put_page(p); } + + if (sh->ppl_page) { + put_page(sh->ppl_page); + sh->ppl_page = NULL; + } } static int grow_buffers(struct stripe_head *sh, gfp_t gfp) @@ -498,6 +503,13 @@ static int grow_buffers(struct stripe_head *sh, gfp_t gfp) sh->dev[i].page = page; sh->dev[i].orig_page = page; } + + if (raid5_has_ppl(sh->raid_conf)) { + sh->ppl_page = alloc_page(gfp); + if (!sh->ppl_page) + return 1; + } + return 0; } @@ -746,7 +758,7 @@ static bool stripe_can_batch(struct stripe_head *sh) { struct r5conf *conf = sh->raid_conf; - if (conf->log) + if (conf->log || raid5_has_ppl(conf)) return false; return test_bit(STRIPE_BATCH_READY, &sh->state) && !test_bit(STRIPE_BITMAP_PENDING, &sh->state) && @@ -2093,6 +2105,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) async_tx_ack(tx); } + if (test_bit(STRIPE_OP_PARTIAL_PARITY, &ops_request)) + tx = ops_run_partial_parity(sh, percpu, tx); + if (test_bit(STRIPE_OP_PREXOR, &ops_request)) { if (level < 6) tx = ops_run_prexor5(sh, percpu, tx); @@ -3168,6 +3183,12 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, s->locked++; } + if (raid5_has_ppl(sh->raid_conf) && + test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) && + !test_bit(STRIPE_FULL_WRITE, &sh->state) && + test_bit(R5_Insync, &sh->dev[pd_idx].flags)) + set_bit(STRIPE_OP_PARTIAL_PARITY, &s->ops_request); + pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n", __func__, (unsigned long long)sh->sector, s->locked, s->ops_request); @@ -3215,6 +3236,36 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi)) goto overlap; + if (forwrite && raid5_has_ppl(conf)) { + /* + * With PPL only writes to consecutive data chunks within a + * stripe are allowed because for a single stripe_head we can + * only have one PPL entry at a time, which describes one data + * range. Not really an overlap, but wait_for_overlap can be + * used to handle this. + */ + sector_t sector; + sector_t first = 0; + sector_t last = 0; + int count = 0; + int i; + + for (i = 0; i < sh->disks; i++) { + if (i != sh->pd_idx && + (i == dd_idx || sh->dev[i].towrite)) { + sector = sh->dev[i].sector; + if (count == 0 || sector < first) + first = sector; + if (sector > last) + last = sector; + count++; + } + } + + if (first + conf->chunk_sectors * (count - 1) != last) + goto overlap; + } + if (!forwrite || previous) clear_bit(STRIPE_BATCH_READY, &sh->state); @@ -7208,6 +7259,13 @@ static int raid5_run(struct mddev *mddev) BUG_ON(mddev->delta_disks != 0); } + if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && + test_bit(MD_HAS_PPL, &mddev->flags)) { + pr_warn("md/raid:%s: using journal device and PPL not allowed - disabling PPL\n", + mdname(mddev)); + clear_bit(MD_HAS_PPL, &mddev->flags); + } + if (mddev->private == NULL) conf = setup_conf(mddev); else @@ -7689,7 +7747,7 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors) sector_t newsize; struct r5conf *conf = mddev->private; - if (conf->log) + if (conf->log || raid5_has_ppl(conf)) return -EINVAL; sectors &= ~((sector_t)conf->chunk_sectors - 1); newsize = raid5_size(mddev, sectors, mddev->raid_disks); @@ -7740,7 +7798,7 @@ static int check_reshape(struct mddev *mddev) { struct r5conf *conf = mddev->private; - if (conf->log) + if (conf->log || raid5_has_ppl(conf)) return -EINVAL; if (mddev->delta_disks == 0 && mddev->new_layout == mddev->layout && diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 6dd295a80ee1..ba5b7a3790af 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -224,10 +224,16 @@ struct stripe_head { spinlock_t batch_lock; /* only header's lock is useful */ struct list_head batch_list; /* protected by head's batch lock*/ - struct r5l_io_unit *log_io; + union { + struct r5l_io_unit *log_io; + struct ppl_io_unit *ppl_io; + }; + struct list_head log_list; sector_t log_start; /* first meta block on the journal */ struct list_head r5c; /* for r5c_cache->stripe_in_journal */ + + struct page *ppl_page; /* partial parity of this stripe */ /** * struct stripe_operations * @target - STRIPE_OP_COMPUTE_BLK target @@ -400,6 +406,7 @@ enum { STRIPE_OP_BIODRAIN, STRIPE_OP_RECONSTRUCT, STRIPE_OP_CHECK, + STRIPE_OP_PARTIAL_PARITY, }; /* @@ -696,6 +703,7 @@ struct r5conf { int group_cnt; int worker_cnt_per_group; struct r5l_log *log; + void *log_private; spinlock_t pending_bios_lock; bool batch_bio_dispatch; diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index fe2112810c43..d9a1ead867b9 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -398,4 +398,31 @@ struct r5l_meta_block { #define R5LOG_VERSION 0x1 #define R5LOG_MAGIC 0x6433c509 + +struct ppl_header_entry { + __le64 data_sector; /* raid sector of the new data */ + __le32 pp_size; /* length of partial parity */ + __le32 data_size; /* length of data */ + __le32 parity_disk; /* member disk containing parity */ + __le32 checksum; /* checksum of partial parity data for this + * entry (~crc32c) */ +} __attribute__ ((__packed__)); + +#define PPL_HEADER_SIZE 4096 +#define PPL_HDR_RESERVED 512 +#define PPL_HDR_ENTRY_SPACE \ + (PPL_HEADER_SIZE - PPL_HDR_RESERVED - 4 * sizeof(u32) - sizeof(u64)) +#define PPL_HDR_MAX_ENTRIES \ + (PPL_HDR_ENTRY_SPACE / sizeof(struct ppl_header_entry)) + +struct ppl_header { + __u8 reserved[PPL_HDR_RESERVED];/* reserved space, fill with 0xff */ + __le32 signature; /* signature (family number of volume) */ + __le32 padding; /* zero pad */ + __le64 generation; /* generation number of the header */ + __le32 entries_count; /* number of entries in entry array */ + __le32 checksum; /* checksum of the header (~crc32c) */ + struct ppl_header_entry entries[PPL_HDR_MAX_ENTRIES]; +} __attribute__ ((__packed__)); + #endif -- cgit v1.2.3-71-gd317 From 4396e46187ca5070219b81773c4e65088dac50cc Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Wed, 15 Mar 2017 16:30:46 -0400 Subject: tcp: remove tcp_tw_recycle The tcp_tw_recycle was already broken for connections behind NAT, since the per-destination timestamp is not monotonically increasing for multiple machines behind a single destination address. After the randomization of TCP timestamp offsets in commit 8a5bd45f6616 (tcp: randomize tcp timestamp offsets for each connection), the tcp_tw_recycle is broken for all types of connections for the same reason: the timestamps received from a single machine is not monotonically increasing, anymore. Remove tcp_tw_recycle, since it is not functional. Also, remove the PAWSPassive SNMP counter since it is only used for tcp_tw_recycle, and simplify tcp_v4_route_req and tcp_v6_route_req since the strict argument is only set when tcp_tw_recycle is enabled. Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Cc: Lutz Vieweg Cc: Florian Westphal Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 5 ----- include/net/netns/ipv4.h | 1 - include/net/tcp.h | 3 +-- include/uapi/linux/snmp.h | 1 - net/ipv4/proc.c | 1 - net/ipv4/sysctl_net_ipv4.c | 7 ------- net/ipv4/tcp_input.c | 30 +++++------------------------- net/ipv4/tcp_ipv4.c | 15 ++------------- net/ipv6/tcp_ipv6.c | 5 +---- 9 files changed, 9 insertions(+), 59 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ab0230461377..ed3d0791eb27 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -640,11 +640,6 @@ tcp_tso_win_divisor - INTEGER building larger TSO frames. Default: 3 -tcp_tw_recycle - BOOLEAN - Enable fast recycling TIME-WAIT sockets. Default value is 0. - It should not be changed without advice/request of technical - experts. - tcp_tw_reuse - BOOLEAN Allow to reuse TIME-WAIT sockets for new connections when it is safe from protocol viewpoint. Default value is 0. diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 622d2da27135..2e9d649ba169 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -33,7 +33,6 @@ struct inet_timewait_death_row { atomic_t tw_count; struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp; - int sysctl_tw_recycle; int sysctl_max_tw_buckets; }; diff --git a/include/net/tcp.h b/include/net/tcp.h index c81f3b958d44..e614ad4d613e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1810,8 +1810,7 @@ struct tcp_request_sock_ops { __u16 *mss); #endif struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl, - const struct request_sock *req, - bool *strict); + const struct request_sock *req); __u32 (*init_seq_tsoff)(const struct sk_buff *skb, u32 *tsoff); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 3b2bed7ca9a4..cec0e171d20c 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -177,7 +177,6 @@ enum LINUX_MIB_TIMEWAITED, /* TimeWaited */ LINUX_MIB_TIMEWAITRECYCLED, /* TimeWaitRecycled */ LINUX_MIB_TIMEWAITKILLED, /* TimeWaitKilled */ - LINUX_MIB_PAWSPASSIVEREJECTED, /* PAWSPassiveRejected */ LINUX_MIB_PAWSACTIVEREJECTED, /* PAWSActiveRejected */ LINUX_MIB_PAWSESTABREJECTED, /* PAWSEstabRejected */ LINUX_MIB_DELAYEDACKS, /* DelayedACKs */ diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 69cf49e8356d..4ccbf464d1ac 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -199,7 +199,6 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TW", LINUX_MIB_TIMEWAITED), SNMP_MIB_ITEM("TWRecycled", LINUX_MIB_TIMEWAITRECYCLED), SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED), - SNMP_MIB_ITEM("PAWSPassive", LINUX_MIB_PAWSPASSIVEREJECTED), SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED), SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED), SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS), diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d6880a6149ee..11aaef0939b2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -980,13 +980,6 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_tw_recycle", - .data = &init_net.ipv4.tcp_death_row.sysctl_tw_recycle, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_max_syn_backlog", .data = &init_net.ipv4.sysctl_max_syn_backlog, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index aafec0676d3e..bb09c7095988 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6327,31 +6327,11 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off); if (!want_cookie && !isn) { - /* VJ's idea. We save last timestamp seen - * from the destination in peer table, when entering - * state TIME-WAIT, and check against it before - * accepting new connection request. - * - * If "isn" is not zero, this request hit alive - * timewait bucket, so that all the necessary checks - * are made in the function processing timewait state. - */ - if (net->ipv4.tcp_death_row.sysctl_tw_recycle) { - bool strict; - - dst = af_ops->route_req(sk, &fl, req, &strict); - - if (dst && strict && - !tcp_peer_is_proven(req, dst)) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); - goto drop_and_release; - } - } /* Kill the following clause, if you dislike this way. */ - else if (!net->ipv4.sysctl_tcp_syncookies && - (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (net->ipv4.sysctl_max_syn_backlog >> 2)) && - !tcp_peer_is_proven(req, dst)) { + if (!net->ipv4.sysctl_tcp_syncookies && + (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (net->ipv4.sysctl_max_syn_backlog >> 2)) && + !tcp_peer_is_proven(req, dst)) { /* Without syncookies last quarter of * backlog is filled with destinations, * proven to be alive. @@ -6367,7 +6347,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, isn = af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off); } if (!dst) { - dst = af_ops->route_req(sk, &fl, req, NULL); + dst = af_ops->route_req(sk, &fl, req); if (!dst) goto drop_and_free; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d8b401fff9fe..7482b5d11861 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1213,19 +1213,9 @@ static void tcp_v4_init_req(struct request_sock *req, static struct dst_entry *tcp_v4_route_req(const struct sock *sk, struct flowi *fl, - const struct request_sock *req, - bool *strict) + const struct request_sock *req) { - struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req); - - if (strict) { - if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr) - *strict = true; - else - *strict = false; - } - - return dst; + return inet_csk_route_req(sk, &fl->u.ip4, req); } struct request_sock_ops tcp_request_sock_ops __read_mostly = { @@ -2462,7 +2452,6 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_tw_reuse = 0; cnt = tcp_hashinfo.ehash_mask + 1; - net->ipv4.tcp_death_row.sysctl_tw_recycle = 0; net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2; net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 853cb43e3e3c..0f08d718a002 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -722,11 +722,8 @@ static void tcp_v6_init_req(struct request_sock *req, static struct dst_entry *tcp_v6_route_req(const struct sock *sk, struct flowi *fl, - const struct request_sock *req, - bool *strict) + const struct request_sock *req) { - if (strict) - *strict = true; return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); } -- cgit v1.2.3-71-gd317 From 6c4e976785011dfbe461821d0bfc58cfd60eac56 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Fri, 17 Feb 2017 14:28:49 +1100 Subject: drivers/misc: Add Aspeed LPC control driver In order to manage server systems, there is typically another processor known as a BMC (Baseboard Management Controller) which is responsible for powering the server and other various elements, sometimes fans, often the system flash. The Aspeed BMC family which is what is used on OpenPOWER machines and a number of x86 as well is typically connected to the host via an LPC (Low Pin Count) bus (among others). The LPC bus is an ISA bus on steroids. It's generally used by the BMC chip to provide the host with access to the system flash (via MEM/FW cycles) that contains the BIOS or other host firmware along with a number of SuperIO-style IOs (via IO space) such as UARTs, IPMI controllers. On the BMC chip side, this is all configured via a bunch of registers whose content is related to a given policy of what devices are exposed at a per system level, which is system/vendor specific, so we don't want to bolt that into the BMC kernel. This started with a need to provide something nicer than /dev/mem for user space to configure these things. One important aspect of the configuration is how the MEM/FW space is exposed to the host (ie, the x86 or POWER). Some registers in that bridge can define a window remapping all or portion of the LPC MEM/FW space to a portion of the BMC internal bus, with no specific limits imposed in HW. I think it makes sense to ensure that this window is configured by a kernel driver that can apply some serious sanity checks on what it is configured to map. In practice, user space wants to control this by flipping the mapping between essentially two types of portions of the BMC address space: - The flash space. This is a region of the BMC MMIO space that more/less directly maps the system flash (at least for reads, writes are somewhat more complicated). - One (or more) reserved area(s) of the BMC physical memory. The latter is needed for a number of things, such as avoiding letting the host manipulate the innards of the BMC flash controller via some evil backdoor, we want to do flash updates by routing the window to a portion of memory (under control of a mailbox protocol via some separate set of registers) which the host can use to write new data in bulk and then request the BMC to flash it. There are other uses, such as allowing the host to boot from an in-memory flash image rather than the one in flash (very handy for continuous integration and test, the BMC can just download new images). It is important to note that due to the way the Aspeed chip lets the kernel configure the mapping between host LPC addresses and BMC ram addresses the offset within the window must be a multiple of size. Not doing so will fragment the accessible space rather than simply moving 'zero' upwards. This is caused by the nature of HICR8 being a mask and the way host LPC addresses are translated. Signed-off-by: Cyril Bur Reviewed-by: Joel Stanley Reviewed-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- drivers/misc/Kconfig | 8 ++ drivers/misc/Makefile | 1 + drivers/misc/aspeed-lpc-ctrl.c | 267 +++++++++++++++++++++++++++++++++++ include/uapi/linux/aspeed-lpc-ctrl.h | 60 ++++++++ 4 files changed, 336 insertions(+) create mode 100644 drivers/misc/aspeed-lpc-ctrl.c create mode 100644 include/uapi/linux/aspeed-lpc-ctrl.h (limited to 'include/uapi') diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index c290990d73ed..77b001e7cf85 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -771,6 +771,14 @@ config PANEL_BOOT_MESSAGE endif # PANEL +config ASPEED_LPC_CTRL + depends on (ARCH_ASPEED || COMPILE_TEST) && REGMAP && MFD_SYSCON + tristate "Aspeed ast2400/2500 HOST LPC to BMC bridge control" + ---help--- + Control Aspeed ast2400/2500 HOST LPC to BMC mappings through + ioctl()s, the driver also provides a read/write interface to a BMC ram + region where the host LPC read/write region can be buffered. + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 7a3ea89339b4..4925ea8e1952 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -54,6 +54,7 @@ obj-$(CONFIG_ECHO) += echo/ obj-$(CONFIG_VEXPRESS_SYSCFG) += vexpress-syscfg.o obj-$(CONFIG_CXL_BASE) += cxl/ obj-$(CONFIG_PANEL) += panel.o +obj-$(CONFIG_ASPEED_LPC_CTRL) += aspeed-lpc-ctrl.o lkdtm-$(CONFIG_LKDTM) += lkdtm_core.o lkdtm-$(CONFIG_LKDTM) += lkdtm_bugs.o diff --git a/drivers/misc/aspeed-lpc-ctrl.c b/drivers/misc/aspeed-lpc-ctrl.c new file mode 100644 index 000000000000..f6acbe1d9378 --- /dev/null +++ b/drivers/misc/aspeed-lpc-ctrl.c @@ -0,0 +1,267 @@ +/* + * Copyright 2017 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define DEVICE_NAME "aspeed-lpc-ctrl" + +#define HICR7 0x8 +#define HICR8 0xc + +struct aspeed_lpc_ctrl { + struct miscdevice miscdev; + struct regmap *regmap; + phys_addr_t mem_base; + resource_size_t mem_size; + u32 pnor_size; + u32 pnor_base; +}; + +static struct aspeed_lpc_ctrl *file_aspeed_lpc_ctrl(struct file *file) +{ + return container_of(file->private_data, struct aspeed_lpc_ctrl, + miscdev); +} + +static int aspeed_lpc_ctrl_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct aspeed_lpc_ctrl *lpc_ctrl = file_aspeed_lpc_ctrl(file); + unsigned long vsize = vma->vm_end - vma->vm_start; + pgprot_t prot = vma->vm_page_prot; + + if (vma->vm_pgoff + vsize > lpc_ctrl->mem_base + lpc_ctrl->mem_size) + return -EINVAL; + + /* ast2400/2500 AHB accesses are not cache coherent */ + prot = pgprot_dmacoherent(prot); + + if (remap_pfn_range(vma, vma->vm_start, + (lpc_ctrl->mem_base >> PAGE_SHIFT) + vma->vm_pgoff, + vsize, prot)) + return -EAGAIN; + + return 0; +} + +static long aspeed_lpc_ctrl_ioctl(struct file *file, unsigned int cmd, + unsigned long param) +{ + struct aspeed_lpc_ctrl *lpc_ctrl = file_aspeed_lpc_ctrl(file); + void __user *p = (void __user *)param; + struct aspeed_lpc_ctrl_mapping map; + u32 addr; + u32 size; + long rc; + + if (copy_from_user(&map, p, sizeof(map))) + return -EFAULT; + + if (map.flags != 0) + return -EINVAL; + + switch (cmd) { + case ASPEED_LPC_CTRL_IOCTL_GET_SIZE: + /* The flash windows don't report their size */ + if (map.window_type != ASPEED_LPC_CTRL_WINDOW_MEMORY) + return -EINVAL; + + /* Support more than one window id in the future */ + if (map.window_id != 0) + return -EINVAL; + + map.size = lpc_ctrl->mem_size; + + return copy_to_user(p, &map, sizeof(map)) ? -EFAULT : 0; + case ASPEED_LPC_CTRL_IOCTL_MAP: + + /* + * The top half of HICR7 is the MSB of the BMC address of the + * mapping. + * The bottom half of HICR7 is the MSB of the HOST LPC + * firmware space address of the mapping. + * + * The 1 bits in the top of half of HICR8 represent the bits + * (in the requested address) that should be ignored and + * replaced with those from the top half of HICR7. + * The 1 bits in the bottom half of HICR8 represent the bits + * (in the requested address) that should be kept and pass + * into the BMC address space. + */ + + /* + * It doesn't make sense to talk about a size or offset with + * low 16 bits set. Both HICR7 and HICR8 talk about the top 16 + * bits of addresses and sizes. + */ + + if ((map.size & 0x0000ffff) || (map.offset & 0x0000ffff)) + return -EINVAL; + + /* + * Because of the way the masks work in HICR8 offset has to + * be a multiple of size. + */ + if (map.offset & (map.size - 1)) + return -EINVAL; + + if (map.window_type == ASPEED_LPC_CTRL_WINDOW_FLASH) { + addr = lpc_ctrl->pnor_base; + size = lpc_ctrl->pnor_size; + } else if (map.window_type == ASPEED_LPC_CTRL_WINDOW_MEMORY) { + addr = lpc_ctrl->mem_base; + size = lpc_ctrl->mem_size; + } else { + return -EINVAL; + } + + /* Check overflow first! */ + if (map.offset + map.size < map.offset || + map.offset + map.size > size) + return -EINVAL; + + if (map.size == 0 || map.size > size) + return -EINVAL; + + addr += map.offset; + + /* + * addr (host lpc address) is safe regardless of values. This + * simply changes the address the host has to request on its + * side of the LPC bus. This cannot impact the hosts own + * memory space by surprise as LPC specific accessors are + * required. The only strange thing that could be done is + * setting the lower 16 bits but the shift takes care of that. + */ + + rc = regmap_write(lpc_ctrl->regmap, HICR7, + (addr | (map.addr >> 16))); + if (rc) + return rc; + + return regmap_write(lpc_ctrl->regmap, HICR8, + (~(map.size - 1)) | ((map.size >> 16) - 1)); + } + + return -EINVAL; +} + +static const struct file_operations aspeed_lpc_ctrl_fops = { + .owner = THIS_MODULE, + .mmap = aspeed_lpc_ctrl_mmap, + .unlocked_ioctl = aspeed_lpc_ctrl_ioctl, +}; + +static int aspeed_lpc_ctrl_probe(struct platform_device *pdev) +{ + struct aspeed_lpc_ctrl *lpc_ctrl; + struct device_node *node; + struct resource resm; + struct device *dev; + int rc; + + dev = &pdev->dev; + + lpc_ctrl = devm_kzalloc(dev, sizeof(*lpc_ctrl), GFP_KERNEL); + if (!lpc_ctrl) + return -ENOMEM; + + node = of_parse_phandle(dev->of_node, "flash", 0); + if (!node) { + dev_err(dev, "Didn't find host pnor flash node\n"); + return -ENODEV; + } + + rc = of_address_to_resource(node, 1, &resm); + of_node_put(node); + if (rc) { + dev_err(dev, "Couldn't address to resource for flash\n"); + return rc; + } + + lpc_ctrl->pnor_size = resource_size(&resm); + lpc_ctrl->pnor_base = resm.start; + + dev_set_drvdata(&pdev->dev, lpc_ctrl); + + node = of_parse_phandle(dev->of_node, "memory-region", 0); + if (!node) { + dev_err(dev, "Didn't find reserved memory\n"); + return -EINVAL; + } + + rc = of_address_to_resource(node, 0, &resm); + of_node_put(node); + if (rc) { + dev_err(dev, "Couldn't address to resource for reserved memory\n"); + return -ENOMEM; + } + + lpc_ctrl->mem_size = resource_size(&resm); + lpc_ctrl->mem_base = resm.start; + + lpc_ctrl->regmap = syscon_node_to_regmap( + pdev->dev.parent->of_node); + if (IS_ERR(lpc_ctrl->regmap)) { + dev_err(dev, "Couldn't get regmap\n"); + return -ENODEV; + } + + lpc_ctrl->miscdev.minor = MISC_DYNAMIC_MINOR; + lpc_ctrl->miscdev.name = DEVICE_NAME; + lpc_ctrl->miscdev.fops = &aspeed_lpc_ctrl_fops; + lpc_ctrl->miscdev.parent = dev; + rc = misc_register(&lpc_ctrl->miscdev); + if (rc) + dev_err(dev, "Unable to register device\n"); + else + dev_info(dev, "Loaded at 0x%08x (0x%08x)\n", + lpc_ctrl->mem_base, lpc_ctrl->mem_size); + + return rc; +} + +static int aspeed_lpc_ctrl_remove(struct platform_device *pdev) +{ + struct aspeed_lpc_ctrl *lpc_ctrl = dev_get_drvdata(&pdev->dev); + + misc_deregister(&lpc_ctrl->miscdev); + + return 0; +} + +static const struct of_device_id aspeed_lpc_ctrl_match[] = { + { .compatible = "aspeed,ast2400-lpc-ctrl" }, + { .compatible = "aspeed,ast2500-lpc-ctrl" }, + { }, +}; + +static struct platform_driver aspeed_lpc_ctrl_driver = { + .driver = { + .name = DEVICE_NAME, + .of_match_table = aspeed_lpc_ctrl_match, + }, + .probe = aspeed_lpc_ctrl_probe, + .remove = aspeed_lpc_ctrl_remove, +}; + +module_platform_driver(aspeed_lpc_ctrl_driver); + +MODULE_DEVICE_TABLE(of, aspeed_lpc_ctrl_match); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Cyril Bur "); +MODULE_DESCRIPTION("Control for aspeed 2400/2500 LPC HOST to BMC mappings"); diff --git a/include/uapi/linux/aspeed-lpc-ctrl.h b/include/uapi/linux/aspeed-lpc-ctrl.h new file mode 100644 index 000000000000..f96fa995a3f0 --- /dev/null +++ b/include/uapi/linux/aspeed-lpc-ctrl.h @@ -0,0 +1,60 @@ +/* + * Copyright 2017 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_ASPEED_LPC_CTRL_H +#define _UAPI_LINUX_ASPEED_LPC_CTRL_H + +#include + +/* Window types */ +#define ASPEED_LPC_CTRL_WINDOW_FLASH 1 +#define ASPEED_LPC_CTRL_WINDOW_MEMORY 2 + +/* + * This driver provides a window for the host to access a BMC resource + * across the BMC <-> Host LPC bus. + * + * window_type: The BMC resource that the host will access through the + * window. BMC flash and BMC RAM. + * + * window_id: For each window type there may be multiple windows, + * these are referenced by ID. + * + * flags: Reserved for future use, this field is expected to be + * zeroed. + * + * addr: Address on the host LPC bus that the specified window should + * be mapped. This address must be power of two aligned. + * + * offset: Offset into the BMC window that should be mapped to the + * host (at addr). This must be a multiple of size. + * + * size: The size of the mapping. The smallest possible size is 64K. + * This must be power of two aligned. + * + */ + +struct aspeed_lpc_ctrl_mapping { + __u8 window_type; + __u8 window_id; + __u16 flags; + __u32 addr; + __u32 offset; + __u32 size; +}; + +#define __ASPEED_LPC_CTRL_IOCTL_MAGIC 0xb2 + +#define ASPEED_LPC_CTRL_IOCTL_GET_SIZE _IOWR(__ASPEED_LPC_CTRL_IOCTL_MAGIC, \ + 0x00, struct aspeed_lpc_ctrl_mapping) + +#define ASPEED_LPC_CTRL_IOCTL_MAP _IOW(__ASPEED_LPC_CTRL_IOCTL_MAGIC, \ + 0x01, struct aspeed_lpc_ctrl_mapping) + +#endif /* _UAPI_LINUX_ASPEED_LPC_CTRL_H */ -- cgit v1.2.3-71-gd317 From c2a49fe8eeef301b40d0c8065d817c5425d31b11 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Fri, 10 Mar 2017 15:19:45 -0800 Subject: pps: fix padding issue with PPS_FETCH for ioctl_compat Issue is that x86 32-bit aligns to 4-bytes instead of 8-bytes so this patchset works around the issue and corrects the data returned in pps_fdata_compat. Acked-by: Rodolfo Giometti Signed-off-by: Matt Ranostay Signed-off-by: Greg Kroah-Hartman --- drivers/pps/pps.c | 110 ++++++++++++++++++++++++++++++++++------------- include/uapi/linux/pps.h | 19 ++++++++ 2 files changed, 98 insertions(+), 31 deletions(-) (limited to 'include/uapi') diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c index 452ead5a5e52..6eb0db37dd88 100644 --- a/drivers/pps/pps.c +++ b/drivers/pps/pps.c @@ -64,6 +64,43 @@ static int pps_cdev_fasync(int fd, struct file *file, int on) return fasync_helper(fd, file, on, &pps->async_queue); } +static int pps_cdev_pps_fetch(struct pps_device *pps, struct pps_fdata *fdata) +{ + unsigned int ev = pps->last_ev; + int err = 0; + + /* Manage the timeout */ + if (fdata->timeout.flags & PPS_TIME_INVALID) + err = wait_event_interruptible(pps->queue, + ev != pps->last_ev); + else { + unsigned long ticks; + + dev_dbg(pps->dev, "timeout %lld.%09d\n", + (long long) fdata->timeout.sec, + fdata->timeout.nsec); + ticks = fdata->timeout.sec * HZ; + ticks += fdata->timeout.nsec / (NSEC_PER_SEC / HZ); + + if (ticks != 0) { + err = wait_event_interruptible_timeout( + pps->queue, + ev != pps->last_ev, + ticks); + if (err == 0) + return -ETIMEDOUT; + } + } + + /* Check for pending signals */ + if (err == -ERESTARTSYS) { + dev_dbg(pps->dev, "pending signal caught\n"); + return -EINTR; + } + + return 0; +} + static long pps_cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -144,7 +181,6 @@ static long pps_cdev_ioctl(struct file *file, case PPS_FETCH: { struct pps_fdata fdata; - unsigned int ev; dev_dbg(pps->dev, "PPS_FETCH\n"); @@ -152,36 +188,9 @@ static long pps_cdev_ioctl(struct file *file, if (err) return -EFAULT; - ev = pps->last_ev; - - /* Manage the timeout */ - if (fdata.timeout.flags & PPS_TIME_INVALID) - err = wait_event_interruptible(pps->queue, - ev != pps->last_ev); - else { - unsigned long ticks; - - dev_dbg(pps->dev, "timeout %lld.%09d\n", - (long long) fdata.timeout.sec, - fdata.timeout.nsec); - ticks = fdata.timeout.sec * HZ; - ticks += fdata.timeout.nsec / (NSEC_PER_SEC / HZ); - - if (ticks != 0) { - err = wait_event_interruptible_timeout( - pps->queue, - ev != pps->last_ev, - ticks); - if (err == 0) - return -ETIMEDOUT; - } - } - - /* Check for pending signals */ - if (err == -ERESTARTSYS) { - dev_dbg(pps->dev, "pending signal caught\n"); - return -EINTR; - } + err = pps_cdev_pps_fetch(pps, &fdata); + if (err) + return err; /* Return the fetched timestamp */ spin_lock_irq(&pps->lock); @@ -246,8 +255,47 @@ static long pps_cdev_ioctl(struct file *file, static long pps_cdev_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + struct pps_device *pps = file->private_data; + void __user *uarg = (void __user *) arg; + cmd = _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(void *)); + if (cmd == PPS_FETCH) { + struct pps_fdata_compat compat; + struct pps_fdata fdata; + int err; + + dev_dbg(pps->dev, "PPS_FETCH\n"); + + err = copy_from_user(&compat, uarg, sizeof(struct pps_fdata_compat)); + if (err) + return -EFAULT; + + memcpy(&fdata.timeout, &compat.timeout, + sizeof(struct pps_ktime_compat)); + + err = pps_cdev_pps_fetch(pps, &fdata); + if (err) + return err; + + /* Return the fetched timestamp */ + spin_lock_irq(&pps->lock); + + compat.info.assert_sequence = pps->assert_sequence; + compat.info.clear_sequence = pps->clear_sequence; + compat.info.current_mode = pps->current_mode; + + memcpy(&compat.info.assert_tu, &pps->assert_tu, + sizeof(struct pps_ktime_compat)); + memcpy(&compat.info.clear_tu, &pps->clear_tu, + sizeof(struct pps_ktime_compat)); + + spin_unlock_irq(&pps->lock); + + return copy_to_user(uarg, &compat, + sizeof(struct pps_fdata_compat)) ? -EFAULT : 0; + } + return pps_cdev_ioctl(file, cmd, arg); } #else diff --git a/include/uapi/linux/pps.h b/include/uapi/linux/pps.h index a9bb1d93451a..c1cb3825a8bc 100644 --- a/include/uapi/linux/pps.h +++ b/include/uapi/linux/pps.h @@ -55,6 +55,12 @@ struct pps_ktime { __s32 nsec; __u32 flags; }; + +struct pps_ktime_compat { + __s64 sec; + __s32 nsec; + __u32 flags; +} __attribute__((packed, aligned(4))); #define PPS_TIME_INVALID (1<<0) /* used to specify timeout==NULL */ struct pps_kinfo { @@ -65,6 +71,14 @@ struct pps_kinfo { int current_mode; /* current mode bits */ }; +struct pps_kinfo_compat { + __u32 assert_sequence; /* seq. num. of assert event */ + __u32 clear_sequence; /* seq. num. of clear event */ + struct pps_ktime_compat assert_tu; /* time of assert event */ + struct pps_ktime_compat clear_tu; /* time of clear event */ + int current_mode; /* current mode bits */ +}; + struct pps_kparams { int api_version; /* API version # */ int mode; /* mode bits */ @@ -114,6 +128,11 @@ struct pps_fdata { struct pps_ktime timeout; }; +struct pps_fdata_compat { + struct pps_kinfo_compat info; + struct pps_ktime_compat timeout; +}; + struct pps_bind_args { int tsformat; /* format of time stamps */ int edge; /* selected event type */ -- cgit v1.2.3-71-gd317 From 227c011b2e046dd4d36d9e00e3d9c88097b2a4c3 Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Tue, 7 Mar 2017 15:45:00 -0800 Subject: HID: sony: Report DS4 motion sensors through a separate device The DS4 motion sensors are currently mapped by the hid-core driver to non-existing axes in between ABS_MISC and ABS_MT_SLOT, because the device already exhausted ABS_X-ABS_RZ. For a part the mapping by hid-core is accomplished by a fixup in hid-sony as the motion axes actually use vendor specific usage pages. This patch makes the DS4 use a separate input device for the motion sensors and reports acceleration data through ABS_X-ABS_Z and gyroscope data through ABS_RX-ABS_RZ. In addition it extends the event spec to allow gyroscope data through ABS_RX-ABS_RZ when INPUT_PROP_ACCELEROMETER is set. This change was suggested by Peter Hutterer during a discussion on linux-input. [jkosina@suse.cz: rebase onto slightly newer codebase] Signed-off-by: Roderick Colenbrander Signed-off-by: Jiri Kosina --- Documentation/input/event-codes.txt | 5 +- drivers/hid/hid-sony.c | 397 ++++++++++-------------------------- include/uapi/linux/input.h | 11 +- 3 files changed, 117 insertions(+), 296 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/input/event-codes.txt b/Documentation/input/event-codes.txt index 36ea940e5bb9..575415f4cef0 100644 --- a/Documentation/input/event-codes.txt +++ b/Documentation/input/event-codes.txt @@ -301,7 +301,10 @@ them as any other INPUT_PROP_BUTTONPAD device. INPUT_PROP_ACCELEROMETER ------------------------- Directional axes on this device (absolute and/or relative x, y, z) represent -accelerometer data. All other axes retain their meaning. A device must not mix +accelerometer data. Some devices also report gyroscope data, which devices +can report through the rotational axes (absolute and/or relative rx, ry, rz). + +All other axes retain their meaning. A device must not mix regular directional axes and accelerometer axes on the same event node. Guidelines: diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index 7e2bae309671..17df165133ab 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -344,265 +344,6 @@ static u8 navigation_rdesc[] = { 0xC0 /* End Collection */ }; -/* - * The default descriptor doesn't provide mapping for the accelerometers - * or orientation sensors. This fixed descriptor maps the accelerometers - * to usage values 0x40, 0x41 and 0x42 and maps the orientation sensors - * to usage values 0x43, 0x44 and 0x45. - */ -static u8 dualshock4_usb_rdesc[] = { - 0x05, 0x01, /* Usage Page (Desktop), */ - 0x09, 0x05, /* Usage (Gamepad), */ - 0xA1, 0x01, /* Collection (Application), */ - 0x85, 0x01, /* Report ID (1), */ - 0x09, 0x30, /* Usage (X), */ - 0x09, 0x31, /* Usage (Y), */ - 0x09, 0x32, /* Usage (Z), */ - 0x09, 0x35, /* Usage (Rz), */ - 0x15, 0x00, /* Logical Minimum (0), */ - 0x26, 0xFF, 0x00, /* Logical Maximum (255), */ - 0x75, 0x08, /* Report Size (8), */ - 0x95, 0x04, /* Report Count (4), */ - 0x81, 0x02, /* Input (Variable), */ - 0x09, 0x39, /* Usage (Hat Switch), */ - 0x15, 0x00, /* Logical Minimum (0), */ - 0x25, 0x07, /* Logical Maximum (7), */ - 0x35, 0x00, /* Physical Minimum (0), */ - 0x46, 0x3B, 0x01, /* Physical Maximum (315), */ - 0x65, 0x14, /* Unit (Degrees), */ - 0x75, 0x04, /* Report Size (4), */ - 0x95, 0x01, /* Report Count (1), */ - 0x81, 0x42, /* Input (Variable, Null State), */ - 0x65, 0x00, /* Unit, */ - 0x05, 0x09, /* Usage Page (Button), */ - 0x19, 0x01, /* Usage Minimum (01h), */ - 0x29, 0x0D, /* Usage Maximum (0Dh), */ - 0x15, 0x00, /* Logical Minimum (0), */ - 0x25, 0x01, /* Logical Maximum (1), */ - 0x75, 0x01, /* Report Size (1), */ - 0x95, 0x0E, /* Report Count (14), */ - 0x81, 0x02, /* Input (Variable), */ - 0x06, 0x00, 0xFF, /* Usage Page (FF00h), */ - 0x09, 0x20, /* Usage (20h), */ - 0x75, 0x06, /* Report Size (6), */ - 0x95, 0x01, /* Report Count (1), */ - 0x15, 0x00, /* Logical Minimum (0), */ - 0x25, 0x3F, /* Logical Maximum (63), */ - 0x81, 0x02, /* Input (Variable), */ - 0x05, 0x01, /* Usage Page (Desktop), */ - 0x09, 0x33, /* Usage (Rx), */ - 0x09, 0x34, /* Usage (Ry), */ - 0x15, 0x00, /* Logical Minimum (0), */ - 0x26, 0xFF, 0x00, /* Logical Maximum (255), */ - 0x75, 0x08, /* Report Size (8), */ - 0x95, 0x02, /* Report Count (2), */ - 0x81, 0x02, /* Input (Variable), */ - 0x06, 0x00, 0xFF, /* Usage Page (FF00h), */ - 0x09, 0x21, /* Usage (21h), */ - 0x95, 0x03, /* Report Count (3), */ - 0x81, 0x02, /* Input (Variable), */ - 0x05, 0x01, /* Usage Page (Desktop), */ - 0x19, 0x40, /* Usage Minimum (40h), */ - 0x29, 0x42, /* Usage Maximum (42h), */ - 0x16, 0x00, 0x80, /* Logical Minimum (-32768), */ - 0x26, 0xFF, 0x7F, /* Logical Maximum (32767), */ - 0x75, 0x10, /* Report Size (16), */ - 0x95, 0x03, /* Report Count (3), */ - 0x81, 0x02, /* Input (Variable), */ - 0x19, 0x43, /* Usage Minimum (43h), */ - 0x29, 0x45, /* Usage Maximum (45h), */ - 0x16, 0x00, 0x80, /* Logical Minimum (-32768), */ - 0x26, 0xFF, 0x7F, /* Logical Maximum (32767), */ - 0x95, 0x03, /* Report Count (3), */ - 0x81, 0x02, /* Input (Variable), */ - 0x06, 0x00, 0xFF, /* Usage Page (FF00h), */ - 0x09, 0x21, /* Usage (21h), */ - 0x15, 0x00, /* Logical Minimum (0), */ - 0x26, 0xFF, 0x00, /* Logical Maximum (255), */ - 0x75, 0x08, /* Report Size (8), */ - 0x95, 0x27, /* Report Count (39), */ - 0x81, 0x02, /* Input (Variable), */ - 0x85, 0x05, /* Report ID (5), */ - 0x09, 0x22, /* Usage (22h), */ - 0x95, 0x1F, /* Report Count (31), */ - 0x91, 0x02, /* Output (Variable), */ - 0x85, 0x04, /* Report ID (4), */ - 0x09, 0x23, /* Usage (23h), */ - 0x95, 0x24, /* Report Count (36), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x02, /* Report ID (2), */ - 0x09, 0x24, /* Usage (24h), */ - 0x95, 0x24, /* Report Count (36), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x08, /* Report ID (8), */ - 0x09, 0x25, /* Usage (25h), */ - 0x95, 0x03, /* Report Count (3), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x10, /* Report ID (16), */ - 0x09, 0x26, /* Usage (26h), */ - 0x95, 0x04, /* Report Count (4), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x11, /* Report ID (17), */ - 0x09, 0x27, /* Usage (27h), */ - 0x95, 0x02, /* Report Count (2), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x12, /* Report ID (18), */ - 0x06, 0x02, 0xFF, /* Usage Page (FF02h), */ - 0x09, 0x21, /* Usage (21h), */ - 0x95, 0x0F, /* Report Count (15), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x13, /* Report ID (19), */ - 0x09, 0x22, /* Usage (22h), */ - 0x95, 0x16, /* Report Count (22), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x14, /* Report ID (20), */ - 0x06, 0x05, 0xFF, /* Usage Page (FF05h), */ - 0x09, 0x20, /* Usage (20h), */ - 0x95, 0x10, /* Report Count (16), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x15, /* Report ID (21), */ - 0x09, 0x21, /* Usage (21h), */ - 0x95, 0x2C, /* Report Count (44), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x06, 0x80, 0xFF, /* Usage Page (FF80h), */ - 0x85, 0x80, /* Report ID (128), */ - 0x09, 0x20, /* Usage (20h), */ - 0x95, 0x06, /* Report Count (6), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x81, /* Report ID (129), */ - 0x09, 0x21, /* Usage (21h), */ - 0x95, 0x06, /* Report Count (6), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x82, /* Report ID (130), */ - 0x09, 0x22, /* Usage (22h), */ - 0x95, 0x05, /* Report Count (5), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x83, /* Report ID (131), */ - 0x09, 0x23, /* Usage (23h), */ - 0x95, 0x01, /* Report Count (1), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x84, /* Report ID (132), */ - 0x09, 0x24, /* Usage (24h), */ - 0x95, 0x04, /* Report Count (4), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x85, /* Report ID (133), */ - 0x09, 0x25, /* Usage (25h), */ - 0x95, 0x06, /* Report Count (6), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x86, /* Report ID (134), */ - 0x09, 0x26, /* Usage (26h), */ - 0x95, 0x06, /* Report Count (6), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x87, /* Report ID (135), */ - 0x09, 0x27, /* Usage (27h), */ - 0x95, 0x23, /* Report Count (35), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x88, /* Report ID (136), */ - 0x09, 0x28, /* Usage (28h), */ - 0x95, 0x22, /* Report Count (34), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x89, /* Report ID (137), */ - 0x09, 0x29, /* Usage (29h), */ - 0x95, 0x02, /* Report Count (2), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x90, /* Report ID (144), */ - 0x09, 0x30, /* Usage (30h), */ - 0x95, 0x05, /* Report Count (5), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x91, /* Report ID (145), */ - 0x09, 0x31, /* Usage (31h), */ - 0x95, 0x03, /* Report Count (3), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x92, /* Report ID (146), */ - 0x09, 0x32, /* Usage (32h), */ - 0x95, 0x03, /* Report Count (3), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0x93, /* Report ID (147), */ - 0x09, 0x33, /* Usage (33h), */ - 0x95, 0x0C, /* Report Count (12), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xA0, /* Report ID (160), */ - 0x09, 0x40, /* Usage (40h), */ - 0x95, 0x06, /* Report Count (6), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xA1, /* Report ID (161), */ - 0x09, 0x41, /* Usage (41h), */ - 0x95, 0x01, /* Report Count (1), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xA2, /* Report ID (162), */ - 0x09, 0x42, /* Usage (42h), */ - 0x95, 0x01, /* Report Count (1), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xA3, /* Report ID (163), */ - 0x09, 0x43, /* Usage (43h), */ - 0x95, 0x30, /* Report Count (48), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xA4, /* Report ID (164), */ - 0x09, 0x44, /* Usage (44h), */ - 0x95, 0x0D, /* Report Count (13), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xA5, /* Report ID (165), */ - 0x09, 0x45, /* Usage (45h), */ - 0x95, 0x15, /* Report Count (21), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xA6, /* Report ID (166), */ - 0x09, 0x46, /* Usage (46h), */ - 0x95, 0x15, /* Report Count (21), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xF0, /* Report ID (240), */ - 0x09, 0x47, /* Usage (47h), */ - 0x95, 0x3F, /* Report Count (63), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xF1, /* Report ID (241), */ - 0x09, 0x48, /* Usage (48h), */ - 0x95, 0x3F, /* Report Count (63), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xF2, /* Report ID (242), */ - 0x09, 0x49, /* Usage (49h), */ - 0x95, 0x0F, /* Report Count (15), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xA7, /* Report ID (167), */ - 0x09, 0x4A, /* Usage (4Ah), */ - 0x95, 0x01, /* Report Count (1), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xA8, /* Report ID (168), */ - 0x09, 0x4B, /* Usage (4Bh), */ - 0x95, 0x01, /* Report Count (1), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xA9, /* Report ID (169), */ - 0x09, 0x4C, /* Usage (4Ch), */ - 0x95, 0x08, /* Report Count (8), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xAA, /* Report ID (170), */ - 0x09, 0x4E, /* Usage (4Eh), */ - 0x95, 0x01, /* Report Count (1), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xAB, /* Report ID (171), */ - 0x09, 0x4F, /* Usage (4Fh), */ - 0x95, 0x39, /* Report Count (57), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xAC, /* Report ID (172), */ - 0x09, 0x50, /* Usage (50h), */ - 0x95, 0x39, /* Report Count (57), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xAD, /* Report ID (173), */ - 0x09, 0x51, /* Usage (51h), */ - 0x95, 0x0B, /* Report Count (11), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xAE, /* Report ID (174), */ - 0x09, 0x52, /* Usage (52h), */ - 0x95, 0x01, /* Report Count (1), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xAF, /* Report ID (175), */ - 0x09, 0x53, /* Usage (53h), */ - 0x95, 0x02, /* Report Count (2), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0x85, 0xB0, /* Report ID (176), */ - 0x09, 0x54, /* Usage (54h), */ - 0x95, 0x3F, /* Report Count (63), */ - 0xB1, 0x02, /* Feature (Variable), */ - 0xC0 /* End Collection */ -}; /* * The default behavior of the Dualshock 4 is to send reports using report @@ -706,31 +447,10 @@ static u8 dualshock4_bt_rdesc[] = { 0x75, 0x08, /* Report Size (8), */ 0x95, 0x02, /* Report Count (2), */ 0x81, 0x02, /* Input (Variable), */ - 0x06, 0x00, 0xFF, /* Usage Page (FF00h), */ - 0x09, 0x20, /* Usage (20h), */ - 0x95, 0x03, /* Report Count (3), */ - 0x81, 0x02, /* Input (Variable), */ - 0x05, 0x01, /* Usage Page (Desktop), */ - 0x19, 0x40, /* Usage Minimum (40h), */ - 0x29, 0x42, /* Usage Maximum (42h), */ - 0x16, 0x00, 0x80, /* Logical Minimum (-32768), */ - 0x26, 0xFF, 0x7F, /* Logical Maximum (32767), */ - 0x75, 0x10, /* Report Size (16), */ - 0x95, 0x03, /* Report Count (3), */ - 0x81, 0x02, /* Input (Variable), */ - 0x19, 0x43, /* Usage Minimum (43h), */ - 0x29, 0x45, /* Usage Maximum (45h), */ - 0x16, 0x00, 0x80, /* Logical Minimum (-32768), */ - 0x26, 0xFF, 0x7F, /* Logical Maximum (32767), */ - 0x95, 0x03, /* Report Count (3), */ - 0x81, 0x02, /* Input (Variable), */ - 0x06, 0x00, 0xFF, /* Usage Page (FF00h), */ - 0x09, 0x20, /* Usage (20h), */ - 0x15, 0x00, /* Logical Minimum (0), */ - 0x26, 0xFF, 0x00, /* Logical Maximum (255), */ - 0x75, 0x08, /* Report Size (8), */ - 0x95, 0x31, /* Report Count (51), */ - 0x81, 0x02, /* Input (Variable), */ + 0x06, 0x00, 0xFF, /* Usage Page (FF00h) */ + 0x09, 0x21, /* Usage (0x21) */ + 0x95, 0x42, /* Report Count (66) */ + 0x81, 0x02, /* Input (Variable) */ 0x09, 0x21, /* Usage (21h), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x4D, /* Report Count (77), */ @@ -1060,9 +780,11 @@ struct motion_output_report_02 { * additional +2. */ #define DS4_INPUT_REPORT_BUTTON_OFFSET 5 +#define DS4_INPUT_REPORT_GYRO_X_OFFSET 13 #define DS4_INPUT_REPORT_BATTERY_OFFSET 30 #define DS4_INPUT_REPORT_TOUCHPAD_OFFSET 33 +#define DS4_SENSOR_SUFFIX " Motion Sensors" #define DS4_TOUCHPAD_SUFFIX " Touchpad" static DEFINE_SPINLOCK(sony_dev_list_lock); @@ -1074,6 +796,7 @@ struct sony_sc { struct list_head list_node; struct hid_device *hdev; struct input_dev *touchpad; + struct input_dev *sensor_dev; struct led_classdev *leds[MAX_LEDS]; unsigned long quirks; struct work_struct state_worker; @@ -1227,15 +950,11 @@ static u8 *sony_report_fixup(struct hid_device *hdev, u8 *rdesc, } /* - * The default Dualshock 4 USB descriptor doesn't assign - * the gyroscope values to corresponding axes so we need a - * modified one. + * The default Dualshock 4 BT descriptor doesn't describe report ID 17 + * which is most often used for input data. Add this mapping, so we + * use the generic hid code for parsing the buttons and axes. */ - if (sc->quirks & DUALSHOCK4_CONTROLLER_USB) { - hid_info(hdev, "Using modified Dualshock 4 report descriptor with gyroscope axes\n"); - rdesc = dualshock4_usb_rdesc; - *rsize = sizeof(dualshock4_usb_rdesc); - } else if (sc->quirks & DUALSHOCK4_CONTROLLER_BT) { + if (sc->quirks & DUALSHOCK4_CONTROLLER_BT) { hid_info(hdev, "Using modified Dualshock 4 Bluetooth report descriptor\n"); rdesc = dualshock4_bt_rdesc; *rsize = sizeof(dualshock4_bt_rdesc); @@ -1295,6 +1014,9 @@ static void dualshock4_parse_report(struct sony_sc *sc, u8 *rd, int size) int n, m, offset, num_touch_data, max_touch_data; u8 cable_state, battery_capacity, battery_charging; + /* Order of hw axes is gyro first, then accelerometer. */ + int axes[6] = {ABS_RX, ABS_RY, ABS_RZ, ABS_X, ABS_Y, ABS_Z}; + /* When using Bluetooth the header is 2 bytes longer, so skip these. */ int data_offset = (sc->quirks & DUALSHOCK4_CONTROLLER_USB) ? 0 : 2; @@ -1302,6 +1024,14 @@ static void dualshock4_parse_report(struct sony_sc *sc, u8 *rd, int size) offset = data_offset + DS4_INPUT_REPORT_BUTTON_OFFSET; input_report_key(sc->touchpad, BTN_LEFT, rd[offset+2] & 0x2); + offset = data_offset + DS4_INPUT_REPORT_GYRO_X_OFFSET; + for (n = 0; n < 6; n++, offset += 2) { + short value = get_unaligned_le16(&rd[offset]); + + input_report_abs(sc->sensor_dev, axes[n], value); + } + input_sync(sc->sensor_dev); + /* * The lower 4 bits of byte 30 (or 32 for BT) contain the battery level * and the 5th bit contains the USB cable state. @@ -1580,6 +1310,76 @@ static void sony_unregister_touchpad(struct sony_sc *sc) sc->touchpad = NULL; } +static int sony_register_sensors(struct sony_sc *sc) +{ + size_t name_sz; + char *name; + int ret; + + sc->sensor_dev = input_allocate_device(); + if (!sc->sensor_dev) + return -ENOMEM; + + input_set_drvdata(sc->sensor_dev, sc); + sc->sensor_dev->dev.parent = &sc->hdev->dev; + sc->sensor_dev->phys = sc->hdev->phys; + sc->sensor_dev->uniq = sc->hdev->uniq; + sc->sensor_dev->id.bustype = sc->hdev->bus; + sc->sensor_dev->id.vendor = sc->hdev->vendor; + sc->sensor_dev->id.product = sc->hdev->product; + sc->sensor_dev->id.version = sc->hdev->version; + + /* Append a suffix to the controller name as there are various + * DS4 compatible non-Sony devices with different names. + */ + name_sz = strlen(sc->hdev->name) + sizeof(DS4_SENSOR_SUFFIX); + name = kzalloc(name_sz, GFP_KERNEL); + if (!name) { + ret = -ENOMEM; + goto err; + } + snprintf(name, name_sz, "%s" DS4_SENSOR_SUFFIX, sc->hdev->name); + sc->sensor_dev->name = name; + + input_set_abs_params(sc->sensor_dev, ABS_X, -32768, 32767, 0, 0); + input_set_abs_params(sc->sensor_dev, ABS_Y, -32768, 32767, 0, 0); + input_set_abs_params(sc->sensor_dev, ABS_Z, -32768, 32767, 0, 0); + + input_set_abs_params(sc->sensor_dev, ABS_RX, -32768, 32767, 0, 0); + input_set_abs_params(sc->sensor_dev, ABS_RY, -32768, 32767, 0, 0); + input_set_abs_params(sc->sensor_dev, ABS_RZ, -32768, 32767, 0, 0); + + __set_bit(INPUT_PROP_ACCELEROMETER, sc->sensor_dev->propbit); + + ret = input_register_device(sc->sensor_dev); + if (ret < 0) + goto err; + + return 0; + +err: + kfree(sc->sensor_dev->name); + sc->sensor_dev->name = NULL; + + input_free_device(sc->sensor_dev); + sc->sensor_dev = NULL; + + return ret; +} + +static void sony_unregister_sensors(struct sony_sc *sc) +{ + if (!sc->sensor_dev) + return; + + kfree(sc->sensor_dev->name); + sc->sensor_dev->name = NULL; + + input_unregister_device(sc->sensor_dev); + sc->sensor_dev = NULL; +} + + /* * Sending HID_REQ_GET_REPORT changes the operation mode of the ps3 controller * to "operational". Without this, the ps3 controller will not report any @@ -2585,6 +2385,13 @@ static int sony_input_configured(struct hid_device *hdev, goto err_stop; } + ret = sony_register_sensors(sc); + if (ret) { + hid_err(sc->hdev, + "Unable to initialize motion sensors: %d\n", ret); + goto err_stop; + } + sony_init_output_report(sc, dualshock4_send_output_report); } else if (sc->quirks & MOTION_CONTROLLER) { sony_init_output_report(sc, motion_send_output_report); @@ -2719,6 +2526,12 @@ static void sony_remove(struct hid_device *hdev) if (sc->touchpad) sony_unregister_touchpad(sc); + if (sc->sensor_dev) + sony_unregister_sensors(sc); + + if (sc->sensor_dev) + sony_unregister_sensors(sc); + sony_cancel_work_sync(sc); kfree(sc->output_report_dmabuf); diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index e794f7bee22f..f561c0eb7d63 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -61,9 +61,14 @@ struct input_id { * Note that input core does not clamp reported values to the * [minimum, maximum] limits, such task is left to userspace. * - * Resolution for main axes (ABS_X, ABS_Y, ABS_Z) is reported in - * units per millimeter (units/mm), resolution for rotational axes - * (ABS_RX, ABS_RY, ABS_RZ) is reported in units per radian. + * The default resolution for main axes (ABS_X, ABS_Y, ABS_Z) + * is reported in units per millimeter (units/mm), resolution + * for rotational axes (ABS_RX, ABS_RY, ABS_RZ) is reported + * in units per radian. + * When INPUT_PROP_ACCELEROMETER is set the resolution changes. + * The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in + * in units per g (units/g) and in units per degree per second + * (units/deg/s) for rotational axes (ABS_RX, ABS_RY, ABS_RZ). */ struct input_absinfo { __s32 value; -- cgit v1.2.3-71-gd317 From 916cda1aa1b412d7cf2991c3af7479544942d121 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 26 Jan 2016 14:10:34 +0100 Subject: s390: add a system call for guarded storage This adds a new system call to enable the use of guarded storage for user space processes. The system call takes two arguments, a command and pointer to a guarded storage control block: s390_guarded_storage(int command, struct gs_cb *gs_cb); The second argument is relevant only for the GS_SET_BC_CB command. The commands in detail: 0 - GS_ENABLE Enable the guarded storage facility for the current task. The initial content of the guarded storage control block will be all zeros. After the enablement the user space code can use load-guarded-storage-controls instruction (LGSC) to load an arbitrary control block. While a task is enabled the kernel will save and restore the current content of the guarded storage registers on context switch. 1 - GS_DISABLE Disables the use of the guarded storage facility for the current task. The kernel will cease to save and restore the content of the guarded storage registers, the task specific content of these registers is lost. 2 - GS_SET_BC_CB Set a broadcast guarded storage control block. This is called per thread and stores a specific guarded storage control block in the task struct of the current task. This control block will be used for the broadcast event GS_BROADCAST. 3 - GS_CLEAR_BC_CB Clears the broadcast guarded storage control block. The guarded- storage control block is removed from the task struct that was established by GS_SET_BC_CB. 4 - GS_BROADCAST Sends a broadcast to all thread siblings of the current task. Every sibling that has established a broadcast guarded storage control block will load this control block and will be enabled for guarded storage. The broadcast guarded storage control block is used up, a second broadcast without a refresh of the stored control block with GS_SET_BC_CB will not have any effect. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/elf.h | 1 + arch/s390/include/asm/lowcore.h | 9 +- arch/s390/include/asm/nmi.h | 12 ++- arch/s390/include/asm/processor.h | 5 ++ arch/s390/include/asm/setup.h | 2 + arch/s390/include/asm/switch_to.h | 3 + arch/s390/include/asm/thread_info.h | 12 +-- arch/s390/include/uapi/asm/Kbuild | 1 + arch/s390/include/uapi/asm/guarded_storage.h | 77 ++++++++++++++++ arch/s390/include/uapi/asm/unistd.h | 2 +- arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/asm-offsets.c | 2 +- arch/s390/kernel/compat_wrapper.c | 1 + arch/s390/kernel/early.c | 2 + arch/s390/kernel/entry.S | 26 +++++- arch/s390/kernel/entry.h | 2 + arch/s390/kernel/guarded_storage.c | 128 +++++++++++++++++++++++++++ arch/s390/kernel/machine_kexec.c | 13 ++- arch/s390/kernel/nmi.c | 19 +++- arch/s390/kernel/process.c | 7 +- arch/s390/kernel/processor.c | 2 +- arch/s390/kernel/ptrace.c | 86 +++++++++++++++--- arch/s390/kernel/setup.c | 18 +++- arch/s390/kernel/smp.c | 43 +++++++-- arch/s390/kernel/syscalls.S | 2 +- arch/s390/kvm/interrupt.c | 4 +- include/uapi/linux/elf.h | 1 + 27 files changed, 436 insertions(+), 46 deletions(-) create mode 100644 arch/s390/include/uapi/asm/guarded_storage.h create mode 100644 arch/s390/kernel/guarded_storage.c (limited to 'include/uapi') diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 1d48880b3cc1..e8f623041769 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -105,6 +105,7 @@ #define HWCAP_S390_VXRS 2048 #define HWCAP_S390_VXRS_BCD 4096 #define HWCAP_S390_VXRS_EXT 8192 +#define HWCAP_S390_GS 16384 /* Internal bits, not exposed via elf */ #define HWCAP_INT_SIE 1UL diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 61261e0e95c0..8a5b082797f8 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -157,8 +157,8 @@ struct lowcore { __u64 stfle_fac_list[32]; /* 0x0f00 */ __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */ - /* Pointer to vector register save area */ - __u64 vector_save_area_addr; /* 0x11b0 */ + /* Pointer to the machine check extended save area */ + __u64 mcesad; /* 0x11b0 */ /* 64 bit extparam used for pfault/diag 250: defined by architecture */ __u64 ext_params2; /* 0x11B8 */ @@ -182,10 +182,7 @@ struct lowcore { /* Transaction abort diagnostic block */ __u8 pgm_tdb[256]; /* 0x1800 */ - __u8 pad_0x1900[0x1c00-0x1900]; /* 0x1900 */ - - /* Software defined save area for vector registers */ - __u8 vector_save_area[1024]; /* 0x1c00 */ + __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ } __packed; #define S390_lowcore (*((struct lowcore *) 0)) diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index b75fd910386a..e3e8895f5d3e 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -58,7 +58,9 @@ union mci { u64 ie : 1; /* 32 indirect storage error */ u64 ar : 1; /* 33 access register validity */ u64 da : 1; /* 34 delayed access exception */ - u64 : 7; /* 35-41 */ + u64 : 1; /* 35 */ + u64 gs : 1; /* 36 guarded storage registers */ + u64 : 5; /* 37-41 */ u64 pr : 1; /* 42 tod programmable register validity */ u64 fc : 1; /* 43 fp control register validity */ u64 ap : 1; /* 44 ancillary report */ @@ -69,6 +71,14 @@ union mci { }; }; +#define MCESA_ORIGIN_MASK (~0x3ffUL) +#define MCESA_LC_MASK (0xfUL) + +struct mcesa { + u8 vector_save_area[1024]; + u8 guarded_storage_save_area[32]; +}; + struct pt_regs; extern void s390_handle_mcck(void); diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index e4988710aa86..cc101f9371cb 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -135,6 +135,8 @@ struct thread_struct { struct list_head list; /* cpu runtime instrumentation */ struct runtime_instr_cb *ri_cb; + struct gs_cb *gs_cb; /* Current guarded storage cb */ + struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */ unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ /* * Warning: 'fpu' is dynamically-sized. It *MUST* be at @@ -215,6 +217,9 @@ void show_cacheinfo(struct seq_file *m); /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); +/* Free guarded storage control block for current */ +void exit_thread_gs(void); + /* * Return saved PC of a blocked thread. */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 30bdb5a027f3..383bd8358a8c 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -31,6 +31,7 @@ #define MACHINE_FLAG_VX _BITUL(13) #define MACHINE_FLAG_CAD _BITUL(14) #define MACHINE_FLAG_NX _BITUL(15) +#define MACHINE_FLAG_GS _BITUL(16) #define LPP_MAGIC _BITUL(31) #define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) @@ -70,6 +71,7 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) #define MACHINE_HAS_CAD (S390_lowcore.machine_flags & MACHINE_FLAG_CAD) #define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) +#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS) /* * Console mode. Override with conmode= diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index 12d45f0cfdd9..f6c2b5814ab0 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -10,6 +10,7 @@ #include #include #include +#include extern struct task_struct *__switch_to(void *, void *); extern void update_cr_regs(struct task_struct *task); @@ -33,12 +34,14 @@ static inline void restore_access_regs(unsigned int *acrs) save_fpu_regs(); \ save_access_regs(&prev->thread.acrs[0]); \ save_ri_cb(prev->thread.ri_cb); \ + save_gs_cb(prev->thread.gs_cb); \ } \ if (next->mm) { \ update_cr_regs(next); \ set_cpu_flag(CIF_FPU); \ restore_access_regs(&next->thread.acrs[0]); \ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ + restore_gs_cb(next->thread.gs_cb); \ } \ prev = __switch_to(prev,next); \ } while (0) diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index a5b54a445eb8..f36e6e2b73f0 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -54,11 +54,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define TIF_NOTIFY_RESUME 0 /* callback before returning to user */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_SYSCALL_TRACE 3 /* syscall trace active */ -#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ -#define TIF_SECCOMP 5 /* secure computing */ -#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ -#define TIF_UPROBE 7 /* breakpointed or single-stepping */ +#define TIF_UPROBE 3 /* breakpointed or single-stepping */ +#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */ +#define TIF_SYSCALL_TRACE 8 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ +#define TIF_SECCOMP 10 /* secure computing */ +#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */ #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ @@ -76,5 +77,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define _TIF_UPROBE _BITUL(TIF_UPROBE) #define _TIF_31BIT _BITUL(TIF_31BIT) #define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP) +#define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE) #endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 6848ba5c1454..86b761e583e3 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -12,6 +12,7 @@ header-y += dasd.h header-y += debug.h header-y += errno.h header-y += fcntl.h +header-y += guarded_storage.h header-y += hypfs.h header-y += ioctl.h header-y += ioctls.h diff --git a/arch/s390/include/uapi/asm/guarded_storage.h b/arch/s390/include/uapi/asm/guarded_storage.h new file mode 100644 index 000000000000..852850e8e17e --- /dev/null +++ b/arch/s390/include/uapi/asm/guarded_storage.h @@ -0,0 +1,77 @@ +#ifndef _GUARDED_STORAGE_H +#define _GUARDED_STORAGE_H + +#include + +struct gs_cb { + __u64 reserved; + __u64 gsd; + __u64 gssm; + __u64 gs_epl_a; +}; + +struct gs_epl { + __u8 pad1; + union { + __u8 gs_eam; + struct { + __u8 : 6; + __u8 e : 1; + __u8 b : 1; + }; + }; + union { + __u8 gs_eci; + struct { + __u8 tx : 1; + __u8 cx : 1; + __u8 : 5; + __u8 in : 1; + }; + }; + union { + __u8 gs_eai; + struct { + __u8 : 1; + __u8 t : 1; + __u8 as : 2; + __u8 ar : 4; + }; + }; + __u32 pad2; + __u64 gs_eha; + __u64 gs_eia; + __u64 gs_eoa; + __u64 gs_eir; + __u64 gs_era; +}; + +#define GS_ENABLE 0 +#define GS_DISABLE 1 +#define GS_SET_BC_CB 2 +#define GS_CLEAR_BC_CB 3 +#define GS_BROADCAST 4 + +static inline void load_gs_cb(struct gs_cb *gs_cb) +{ + asm volatile(".insn rxy,0xe3000000004d,0,%0" : : "Q" (*gs_cb)); +} + +static inline void store_gs_cb(struct gs_cb *gs_cb) +{ + asm volatile(".insn rxy,0xe30000000049,0,%0" : : "Q" (*gs_cb)); +} + +static inline void save_gs_cb(struct gs_cb *gs_cb) +{ + if (gs_cb) + store_gs_cb(gs_cb); +} + +static inline void restore_gs_cb(struct gs_cb *gs_cb) +{ + if (gs_cb) + load_gs_cb(gs_cb); +} + +#endif /* _GUARDED_STORAGE_H */ diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 152de9b796e1..ea42290e7d51 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -313,7 +313,7 @@ #define __NR_copy_file_range 375 #define __NR_preadv2 376 #define __NR_pwritev2 377 -/* Number 378 is reserved for guarded storage */ +#define __NR_s390_guarded_storage 378 #define __NR_statx 379 #define NR_syscalls 380 diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 060ce548fe8b..aa5adbdaf200 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -57,7 +57,7 @@ obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o -obj-y += runtime_instr.o cache.o fpu.o dumpstack.o +obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o obj-y += entry.o reipl.o relocate_kernel.o extra-y += head.o head64.o vmlinux.lds diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index c4b3570ded5b..6bb29633e1f1 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -175,7 +175,7 @@ int main(void) /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); /* hardware defined lowcore locations 0x1000 - 0x18ff */ - OFFSET(__LC_VX_SAVE_AREA_ADDR, lowcore, vector_save_area_addr); + OFFSET(__LC_MCESAD, lowcore, mcesad); OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2); OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area); OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area); diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index e89cc2e71db1..986642a3543b 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -178,4 +178,5 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len); COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb); COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 4e65c79cc5f2..95298a41076f 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -358,6 +358,8 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_NX; __ctl_set_bit(0, 20); } + if (test_facility(133)) + S390_lowcore.machine_flags |= MACHINE_FLAG_GS; } static inline void save_vector_registers(void) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 6a7d737d514c..fa8b8f28e08b 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -47,7 +47,7 @@ STACK_SIZE = 1 << STACK_SHIFT STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_UPROBE) + _TIF_UPROBE | _TIF_GUARDED_STORAGE) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) _CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \ @@ -332,6 +332,8 @@ ENTRY(system_call) TSTMSK __TI_flags(%r12),_TIF_UPROBE jo .Lsysc_uprobe_notify #endif + TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE + jo .Lsysc_guarded_storage TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP jo .Lsysc_singlestep TSTMSK __TI_flags(%r12),_TIF_SIGPENDING @@ -408,6 +410,14 @@ ENTRY(system_call) jg uprobe_notify_resume #endif +# +# _TIF_GUARDED_STORAGE is set, call guarded_storage_load +# +.Lsysc_guarded_storage: + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,.Lsysc_return + jg gs_load_bc_cb + # # _PIF_PER_TRAP is set, call do_per_trap # @@ -663,6 +673,8 @@ ENTRY(io_int_handler) jo .Lio_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lio_notify_resume + TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE + jo .Lio_guarded_storage TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lio_vxrs TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) @@ -696,6 +708,18 @@ ENTRY(io_int_handler) larl %r14,.Lio_return jg load_fpu_regs +# +# _TIF_GUARDED_STORAGE is set, call guarded_storage_load +# +.Lio_guarded_storage: + # TRACE_IRQS_ON already done at .Lio_return + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,gs_load_bc_cb + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j .Lio_return + # # _TIF_NEED_RESCHED is set, call schedule # diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 33f901865326..dbf5f7e18246 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -74,12 +74,14 @@ long sys_sigreturn(void); long sys_s390_personality(unsigned int personality); long sys_s390_runtime_instr(int command, int signum); +long sys_s390_guarded_storage(int command, struct gs_cb __user *); long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); DECLARE_PER_CPU(u64, mt_cycles[8]); void verify_facilities(void); +void gs_load_bc_cb(struct pt_regs *regs); void set_fs_fixup(void); #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c new file mode 100644 index 000000000000..6f064745c3b1 --- /dev/null +++ b/arch/s390/kernel/guarded_storage.c @@ -0,0 +1,128 @@ +/* + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky + */ + +#include +#include +#include +#include +#include +#include +#include "entry.h" + +void exit_thread_gs(void) +{ + kfree(current->thread.gs_cb); + kfree(current->thread.gs_bc_cb); + current->thread.gs_cb = current->thread.gs_bc_cb = NULL; +} + +static int gs_enable(void) +{ + struct gs_cb *gs_cb; + + if (!current->thread.gs_cb) { + gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL); + if (!gs_cb) + return -ENOMEM; + gs_cb->gsd = 25; + preempt_disable(); + __ctl_set_bit(2, 4); + load_gs_cb(gs_cb); + current->thread.gs_cb = gs_cb; + preempt_enable(); + } + return 0; +} + +static int gs_disable(void) +{ + if (current->thread.gs_cb) { + preempt_disable(); + kfree(current->thread.gs_cb); + current->thread.gs_cb = NULL; + __ctl_clear_bit(2, 4); + preempt_enable(); + } + return 0; +} + +static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb) +{ + struct gs_cb *gs_cb; + + gs_cb = current->thread.gs_bc_cb; + if (!gs_cb) { + gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL); + if (!gs_cb) + return -ENOMEM; + current->thread.gs_bc_cb = gs_cb; + } + if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb))) + return -EFAULT; + return 0; +} + +static int gs_clear_bc_cb(void) +{ + struct gs_cb *gs_cb; + + gs_cb = current->thread.gs_bc_cb; + current->thread.gs_bc_cb = NULL; + kfree(gs_cb); + return 0; +} + +void gs_load_bc_cb(struct pt_regs *regs) +{ + struct gs_cb *gs_cb; + + preempt_disable(); + clear_thread_flag(TIF_GUARDED_STORAGE); + gs_cb = current->thread.gs_bc_cb; + if (gs_cb) { + kfree(current->thread.gs_cb); + current->thread.gs_bc_cb = NULL; + __ctl_set_bit(2, 4); + load_gs_cb(gs_cb); + current->thread.gs_cb = gs_cb; + } + preempt_enable(); +} + +static int gs_broadcast(void) +{ + struct task_struct *sibling; + + read_lock(&tasklist_lock); + for_each_thread(current, sibling) { + if (!sibling->thread.gs_bc_cb) + continue; + if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE)) + kick_process(sibling); + } + read_unlock(&tasklist_lock); + return 0; +} + +SYSCALL_DEFINE2(s390_guarded_storage, int, command, + struct gs_cb __user *, gs_cb) +{ + if (!MACHINE_HAS_GS) + return -EOPNOTSUPP; + switch (command) { + case GS_ENABLE: + return gs_enable(); + case GS_DISABLE: + return gs_disable(); + case GS_SET_BC_CB: + return gs_set_bc_cb(gs_cb); + case GS_CLEAR_BC_CB: + return gs_clear_bc_cb(); + case GS_BROADCAST: + return gs_broadcast(); + default: + return -EINVAL; + } +} diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 3074c1d83829..db5658daf994 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -27,6 +27,7 @@ #include #include #include +#include typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); @@ -102,6 +103,8 @@ static void __do_machine_kdump(void *image) */ static noinline void __machine_kdump(void *image) { + struct mcesa *mcesa; + unsigned long cr2_old, cr2_new; int this_cpu, cpu; lgr_info_log(); @@ -114,8 +117,16 @@ static noinline void __machine_kdump(void *image) continue; } /* Store status of the boot CPU */ + mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); if (MACHINE_HAS_VX) - save_vx_regs((void *) &S390_lowcore.vector_save_area); + save_vx_regs((__vector128 *) mcesa->vector_save_area); + if (MACHINE_HAS_GS) { + __ctl_store(cr2_old, 2, 2); + cr2_new = cr2_old | (1UL << 4); + __ctl_load(cr2_new, 2, 2); + save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area); + __ctl_load(cr2_old, 2, 2); + } /* * To create a good backchain for this CPU in the dump store_status * is passed the address of a function. The address is saved into diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 9bf8327154ee..985589523970 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -106,6 +106,7 @@ static int notrace s390_validate_registers(union mci mci, int umode) int kill_task; u64 zero; void *fpt_save_area; + struct mcesa *mcesa; kill_task = 0; zero = 0; @@ -165,6 +166,7 @@ static int notrace s390_validate_registers(union mci mci, int umode) : : "Q" (S390_lowcore.fpt_creg_save_area)); } + mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); if (!MACHINE_HAS_VX) { /* Validate floating point registers */ asm volatile( @@ -209,8 +211,8 @@ static int notrace s390_validate_registers(union mci mci, int umode) " la 1,%0\n" " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ - : : "Q" (*(struct vx_array *) - &S390_lowcore.vector_save_area) : "1"); + : : "Q" (*(struct vx_array *) mcesa->vector_save_area) + : "1"); __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); } /* Validate access registers */ @@ -224,6 +226,19 @@ static int notrace s390_validate_registers(union mci mci, int umode) */ kill_task = 1; } + /* Validate guarded storage registers */ + if (MACHINE_HAS_GS && (S390_lowcore.cregs_save_area[2] & (1UL << 4))) { + if (!mci.gs) + /* + * Guarded storage register can't be restored and + * the current processes uses guarded storage. + * It has to be terminated. + */ + kill_task = 1; + else + load_gs_cb((struct gs_cb *) + mcesa->guarded_storage_save_area); + } /* * We don't even try to validate the TOD register, since we simply * can't write something sensible into that register. diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index f29e41c5e2ec..999d7154bbdc 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -73,8 +73,10 @@ extern void kernel_thread_starter(void); */ void exit_thread(struct task_struct *tsk) { - if (tsk == current) + if (tsk == current) { exit_thread_runtime_instr(); + exit_thread_gs(); + } } void flush_thread(void) @@ -159,6 +161,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, /* Don't copy runtime instrumentation info */ p->thread.ri_cb = NULL; frame->childregs.psw.mask &= ~PSW_MASK_RI; + /* Don't copy guarded storage control block */ + p->thread.gs_cb = NULL; + p->thread.gs_bc_cb = NULL; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 928b929a6261..c73709869447 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -95,7 +95,7 @@ static void show_cpu_summary(struct seq_file *m, void *v) { static const char *hwcap_str[] = { "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", - "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe" + "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs" }; static const char * const int_hwcap_str[] = { "sie" diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index c14df0a1ec3c..c933e255b5d5 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -44,30 +44,42 @@ void update_cr_regs(struct task_struct *task) struct pt_regs *regs = task_pt_regs(task); struct thread_struct *thread = &task->thread; struct per_regs old, new; - + unsigned long cr0_old, cr0_new; + unsigned long cr2_old, cr2_new; + int cr0_changed, cr2_changed; + + __ctl_store(cr0_old, 0, 0); + __ctl_store(cr2_old, 2, 2); + cr0_new = cr0_old; + cr2_new = cr2_old; /* Take care of the enable/disable of transactional execution. */ if (MACHINE_HAS_TE) { - unsigned long cr, cr_new; - - __ctl_store(cr, 0, 0); /* Set or clear transaction execution TXC bit 8. */ - cr_new = cr | (1UL << 55); + cr0_new |= (1UL << 55); if (task->thread.per_flags & PER_FLAG_NO_TE) - cr_new &= ~(1UL << 55); - if (cr_new != cr) - __ctl_load(cr_new, 0, 0); + cr0_new &= ~(1UL << 55); /* Set or clear transaction execution TDC bits 62 and 63. */ - __ctl_store(cr, 2, 2); - cr_new = cr & ~3UL; + cr2_new &= ~3UL; if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) - cr_new |= 1UL; + cr2_new |= 1UL; else - cr_new |= 2UL; + cr2_new |= 2UL; } - if (cr_new != cr) - __ctl_load(cr_new, 2, 2); } + /* Take care of enable/disable of guarded storage. */ + if (MACHINE_HAS_GS) { + cr2_new &= ~(1UL << 4); + if (task->thread.gs_cb) + cr2_new |= (1UL << 4); + } + /* Load control register 0/2 iff changed */ + cr0_changed = cr0_new != cr0_old; + cr2_changed = cr2_new != cr2_old; + if (cr0_changed) + __ctl_load(cr0_new, 0, 0); + if (cr2_changed) + __ctl_load(cr2_new, 2, 2); /* Copy user specified PER registers */ new.control = thread->per_user.control; new.start = thread->per_user.start; @@ -1137,6 +1149,36 @@ static int s390_system_call_set(struct task_struct *target, data, 0, sizeof(unsigned int)); } +static int s390_gs_cb_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) + return -ENODATA; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + +static int s390_gs_cb_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) + return -ENODATA; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + static const struct user_regset s390_regsets[] = { { .core_note_type = NT_PRSTATUS, @@ -1194,6 +1236,14 @@ static const struct user_regset s390_regsets[] = { .get = s390_vxrs_high_get, .set = s390_vxrs_high_set, }, + { + .core_note_type = NT_S390_GS_CB, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_cb_get, + .set = s390_gs_cb_set, + }, }; static const struct user_regset_view user_s390_view = { @@ -1422,6 +1472,14 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_compat_regs_high_get, .set = s390_compat_regs_high_set, }, + { + .core_note_type = NT_S390_GS_CB, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_cb_get, + .set = s390_gs_cb_set, + }, }; static const struct user_regset_view user_s390_compat_view = { diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 911dc0b49be0..3ae756c0db3d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -339,9 +339,15 @@ static void __init setup_lowcore(void) lc->stfl_fac_list = S390_lowcore.stfl_fac_list; memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, MAX_FACILITY_BIT/8); - if (MACHINE_HAS_VX) - lc->vector_save_area_addr = - (unsigned long) &lc->vector_save_area; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + unsigned long bits, size; + + bits = MACHINE_HAS_GS ? 11 : 10; + size = 1UL << bits; + lc->mcesad = (__u64) memblock_virt_alloc(size, size); + if (MACHINE_HAS_GS) + lc->mcesad |= bits; + } lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; lc->sync_enter_timer = S390_lowcore.sync_enter_timer; lc->async_enter_timer = S390_lowcore.async_enter_timer; @@ -779,6 +785,12 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_S390_VXRS_BCD; } + /* + * Guarded storage support HWCAP_S390_GS is bit 12. + */ + if (MACHINE_HAS_GS) + elf_hwcap |= HWCAP_S390_GS; + get_cpu_id(&cpu_id); add_device_randomness(&cpu_id, sizeof(cpu_id)); switch (cpu_id.machine) { diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 47a973b5b4f1..286bcee800f4 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "entry.h" enum { @@ -78,6 +79,8 @@ struct pcpu { static u8 boot_core_type; static struct pcpu pcpu_devices[NR_CPUS]; +static struct kmem_cache *pcpu_mcesa_cache; + unsigned int smp_cpu_mt_shift; EXPORT_SYMBOL(smp_cpu_mt_shift); @@ -188,8 +191,10 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) { unsigned long async_stack, panic_stack; + unsigned long mcesa_origin, mcesa_bits; struct lowcore *lc; + mcesa_origin = mcesa_bits = 0; if (pcpu != &pcpu_devices[0]) { pcpu->lowcore = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); @@ -197,20 +202,27 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) panic_stack = __get_free_page(GFP_KERNEL); if (!pcpu->lowcore || !panic_stack || !async_stack) goto out; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + mcesa_origin = (unsigned long) + kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL); + if (!mcesa_origin) + goto out; + mcesa_bits = MACHINE_HAS_GS ? 11 : 0; + } } else { async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET; + mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK; + mcesa_bits = pcpu->lowcore->mcesad & MCESA_LC_MASK; } lc = pcpu->lowcore; memcpy(lc, &S390_lowcore, 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512); lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; + lc->mcesad = mcesa_origin | mcesa_bits; lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); - if (MACHINE_HAS_VX) - lc->vector_save_area_addr = - (unsigned long) &lc->vector_save_area; if (vdso_alloc_per_cpu(lc)) goto out; lowcore_ptr[cpu] = lc; @@ -218,6 +230,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) return 0; out: if (pcpu != &pcpu_devices[0]) { + if (mcesa_origin) + kmem_cache_free(pcpu_mcesa_cache, + (void *) mcesa_origin); free_page(panic_stack); free_pages(async_stack, ASYNC_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); @@ -229,11 +244,17 @@ out: static void pcpu_free_lowcore(struct pcpu *pcpu) { + unsigned long mcesa_origin; + pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); lowcore_ptr[pcpu - pcpu_devices] = NULL; vdso_free_per_cpu(pcpu->lowcore); if (pcpu == &pcpu_devices[0]) return; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK; + kmem_cache_free(pcpu_mcesa_cache, (void *) mcesa_origin); + } free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); @@ -550,9 +571,11 @@ int smp_store_status(int cpu) if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; - if (!MACHINE_HAS_VX) + if (!MACHINE_HAS_VX && !MACHINE_HAS_GS) return 0; - pa = __pa(pcpu->lowcore->vector_save_area_addr); + pa = __pa(pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK); + if (MACHINE_HAS_GS) + pa |= pcpu->lowcore->mcesad & MCESA_LC_MASK; if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; @@ -897,12 +920,22 @@ void __init smp_fill_possible_mask(void) void __init smp_prepare_cpus(unsigned int max_cpus) { + unsigned long size; + /* request the 0x1201 emergency signal external interrupt */ if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1201"); /* request the 0x1202 external call external interrupt */ if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1202"); + /* create slab cache for the machine-check-extended-save-areas */ + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + size = 1UL << (MACHINE_HAS_GS ? 11 : 10); + pcpu_mcesa_cache = kmem_cache_create("nmi_save_areas", + size, size, 0, NULL); + if (!pcpu_mcesa_cache) + panic("Couldn't create nmi save area cache"); + } } void __init smp_prepare_boot_cpu(void) diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 2659b5cfeddb..54fce7b065de 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -386,5 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2) SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */ SYSCALL(sys_preadv2,compat_sys_preadv2) SYSCALL(sys_pwritev2,compat_sys_pwritev2) -NI_SYSCALL +SYSCALL(sys_s390_guarded_storage,compat_sys_s390_guarded_storage) /* 378 */ SYSCALL(sys_statx,compat_sys_statx) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 0f8f14199734..169558dc7daf 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -420,8 +420,8 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, save_access_regs(vcpu->run->s.regs.acrs); /* Extended save area */ - rc = read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, &ext_sa_addr, - sizeof(unsigned long)); + rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr, + sizeof(unsigned long)); /* Only bits 0-53 are used for address formation */ ext_sa_addr &= ~0x3ffUL; if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) { diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b59ee077a596..8c6d3bdb9a00 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -409,6 +409,7 @@ typedef struct elf64_shdr { #define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */ #define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */ #define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ +#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */ #define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ #define NT_ARM_TLS 0x401 /* ARM TLS register */ #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ -- cgit v1.2.3-71-gd317 From 79e92dc0ecc64dd221383960a8bc3399c6723d03 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 1 Mar 2017 12:31:20 -0300 Subject: [media] videodev2.h: map xvYCC601/709 to limited range quantization The xvYCC601/709 encodings were mapped by default to full range quantization. This is actually wrong since these encodings use limited range quantization, but accept values outside of the limited range. This makes sense since for values within the limited range it behaves exactly the same as BT.601 or Rec. 709. The only difference is that with the xvYCC encodings the values outside of the limited range also produce value colors. Talking to people who know a lot more about this than I do made me realize that mapping xvYCC to full range quantization was wrong, so this patch corrects this and also improves the documentation. These formats are very rare, and since the formula for decoding these Y'CbCr encodings is actually fixed and independent of the quantization field value it is safe to make this change. The main advantage is that keeps the V4L2 specification in sync with the corresponding colorspace, HDMI and CEA861 standards when it comes to these xvYCC formats. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/media/uapi/v4l/pixfmt-007.rst | 13 +++++++++---- include/uapi/linux/videodev2.h | 3 +-- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/media/uapi/v4l/pixfmt-007.rst b/Documentation/media/uapi/v4l/pixfmt-007.rst index 95a23a28c595..0c30ee2577d3 100644 --- a/Documentation/media/uapi/v4l/pixfmt-007.rst +++ b/Documentation/media/uapi/v4l/pixfmt-007.rst @@ -174,7 +174,7 @@ this colorspace: The xvYCC 709 encoding (``V4L2_YCBCR_ENC_XV709``, :ref:`xvycc`) is similar to the Rec. 709 encoding, but it allows for R', G' and B' values that are outside the range [0…1]. The resulting Y', Cb and Cr values are -scaled and offset: +scaled and offset according to the limited range formula: .. math:: @@ -187,7 +187,7 @@ scaled and offset: The xvYCC 601 encoding (``V4L2_YCBCR_ENC_XV601``, :ref:`xvycc`) is similar to the BT.601 encoding, but it allows for R', G' and B' values that are outside the range [0…1]. The resulting Y', Cb and Cr values are -scaled and offset: +scaled and offset according to the limited range formula: .. math:: @@ -198,9 +198,14 @@ scaled and offset: Cr = \frac{224}{256} * (0.5R' - 0.4187G' - 0.0813B') Y' is clamped to the range [0…1] and Cb and Cr are clamped to the range -[-0.5…0.5]. The non-standard xvYCC 709 or xvYCC 601 encodings can be +[-0.5…0.5] and quantized without further scaling or offsets. +The non-standard xvYCC 709 or xvYCC 601 encodings can be used by selecting ``V4L2_YCBCR_ENC_XV709`` or ``V4L2_YCBCR_ENC_XV601``. -The xvYCC encodings always use full range quantization. +As seen by the xvYCC formulas these encodings always use limited range quantization, +there is no full range variant. The whole point of these extended gamut encodings +is that values outside the limited range are still valid, although they +map to R', G' and B' values outside the [0…1] range and are therefore outside +the Rec. 709 colorspace gamut. .. _col-srgb: diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 45184a2ef66c..316be62f3a45 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -378,8 +378,7 @@ enum v4l2_quantization { #define V4L2_MAP_QUANTIZATION_DEFAULT(is_rgb_or_hsv, colsp, ycbcr_enc) \ (((is_rgb_or_hsv) && (colsp) == V4L2_COLORSPACE_BT2020) ? \ V4L2_QUANTIZATION_LIM_RANGE : \ - (((is_rgb_or_hsv) || (ycbcr_enc) == V4L2_YCBCR_ENC_XV601 || \ - (ycbcr_enc) == V4L2_YCBCR_ENC_XV709 || (colsp) == V4L2_COLORSPACE_JPEG) ? \ + (((is_rgb_or_hsv) || (colsp) == V4L2_COLORSPACE_JPEG) ? \ V4L2_QUANTIZATION_FULL_RANGE : V4L2_QUANTIZATION_LIM_RANGE)) enum v4l2_priority { -- cgit v1.2.3-71-gd317 From 4e0b1ab72b8af961bcaca9ec1475279c1cd9579c Mon Sep 17 00:00:00 2001 From: Fan Zhang Date: Tue, 29 Nov 2016 07:17:55 +0100 Subject: KVM: s390: gs support for kvm guests This patch adds guarded storage support for KVM guest. We need to setup the necessary control blocks, the kvm_run structure for the new registers, the necessary wrappers for VSIE, as well as the machine check save areas. GS is enabled lazily and the register saving and reloading is done in KVM code. As this feature adds new content for migration, we provide a new capability for enablement (KVM_CAP_S390_GS). Signed-off-by: Fan Zhang Reviewed-by: Christian Borntraeger Reviewed-by: Janosch Frank Signed-off-by: Christian Borntraeger --- Documentation/virtual/kvm/api.txt | 9 ++++++ arch/s390/include/asm/kvm_host.h | 8 ++++- arch/s390/include/uapi/asm/kvm.h | 14 ++++++++- arch/s390/kvm/intercept.c | 1 + arch/s390/kvm/interrupt.c | 33 +++++++++++++++++++-- arch/s390/kvm/kvm-s390.c | 62 +++++++++++++++++++++++++++++++++++++++ arch/s390/kvm/kvm-s390.h | 1 + arch/s390/kvm/priv.c | 27 +++++++++++++++++ arch/s390/kvm/vsie.c | 39 ++++++++++++++++++++++++ include/uapi/linux/kvm.h | 1 + 10 files changed, 191 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 3c248f772ae6..725250858479 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4101,6 +4101,15 @@ to take care of that. This capability can be enabled dynamically even if VCPUs were already created and are running. +7.9 KVM_CAP_S390_GS + +Architectures: s390 +Parameters: none +Returns: 0 on success; -EINVAL if the machine does not support + guarded storage; -EBUSY if a VCPU has already been created. + +Allows use of guarded storage for the KVM guest. + 8. Other capabilities. ---------------------- diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 54e36e7cd201..1af090d93bf5 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -25,6 +25,7 @@ #include #include #include +#include #define KVM_S390_BSCA_CPU_SLOTS 64 #define KVM_S390_ESCA_CPU_SLOTS 248 @@ -192,6 +193,7 @@ struct kvm_s390_sie_block { __u32 ipb; /* 0x0058 */ __u32 scaoh; /* 0x005c */ __u8 reserved60; /* 0x0060 */ +#define ECB_GS 0x40 #define ECB_TE 0x10 #define ECB_SRSI 0x04 #define ECB_HOSTPROTINT 0x02 @@ -237,7 +239,9 @@ struct kvm_s390_sie_block { __u32 crycbd; /* 0x00fc */ __u64 gcr[16]; /* 0x0100 */ __u64 gbea; /* 0x0180 */ - __u8 reserved188[24]; /* 0x0188 */ + __u8 reserved188[8]; /* 0x0188 */ + __u64 sdnxo; /* 0x0190 */ + __u8 reserved198[8]; /* 0x0198 */ __u32 fac; /* 0x01a0 */ __u8 reserved1a4[20]; /* 0x01a4 */ __u64 cbrlo; /* 0x01b8 */ @@ -573,6 +577,7 @@ struct kvm_vcpu_arch { /* if vsie is active, currently executed shadow sie control block */ struct kvm_s390_sie_block *vsie_block; unsigned int host_acrs[NUM_ACRS]; + struct gs_cb *host_gscb; struct fpu host_fpregs; struct kvm_s390_local_interrupt local_int; struct hrtimer ckc_timer; @@ -593,6 +598,7 @@ struct kvm_vcpu_arch { */ seqcount_t cputm_seqcount; __u64 cputm_start; + bool gs_enabled; }; struct kvm_vm_stat { diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index a2ffec4139ad..5bd23cfd9ae5 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -197,6 +197,10 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_VRS (1UL << 6) #define KVM_SYNC_RICCB (1UL << 7) #define KVM_SYNC_FPRS (1UL << 8) +#define KVM_SYNC_GSCB (1UL << 9) +/* length and alignment of the sdnx as a power of two */ +#define SDNXC 8 +#define SDNXL (1UL << SDNXC) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ @@ -217,8 +221,16 @@ struct kvm_sync_regs { }; __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ - __u8 padding[52]; /* riccb needs to be 64byte aligned */ + __u8 padding1[52]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ + __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ + union { + __u8 sdnx[SDNXL]; /* state description annex */ + struct { + __u64 reserved1[2]; + __u64 gscb[4]; + }; + }; }; #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index e831f4b3e1c1..f5378f336127 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -35,6 +35,7 @@ static const intercept_handler_t instruction_handlers[256] = { [0xb6] = kvm_s390_handle_stctl, [0xb7] = kvm_s390_handle_lctl, [0xb9] = kvm_s390_handle_b9, + [0xe3] = kvm_s390_handle_e3, [0xe5] = kvm_s390_handle_e5, [0xeb] = kvm_s390_handle_eb, }; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 169558dc7daf..311eef0df855 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -410,6 +410,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, struct kvm_s390_mchk_info *mchk) { unsigned long ext_sa_addr; + unsigned long lc; freg_t fprs[NUM_FPRS]; union mci mci; int rc; @@ -422,8 +423,28 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, /* Extended save area */ rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr, sizeof(unsigned long)); - /* Only bits 0-53 are used for address formation */ - ext_sa_addr &= ~0x3ffUL; + /* Only bits 0 through 63-LC are used for address formation */ + lc = ext_sa_addr & MCESA_LC_MASK; + if (test_kvm_facility(vcpu->kvm, 133)) { + switch (lc) { + case 0: + case 10: + ext_sa_addr &= ~0x3ffUL; + break; + case 11: + ext_sa_addr &= ~0x7ffUL; + break; + case 12: + ext_sa_addr &= ~0xfffUL; + break; + default: + ext_sa_addr = 0; + break; + } + } else { + ext_sa_addr &= ~0x3ffUL; + } + if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) { if (write_guest_abs(vcpu, ext_sa_addr, vcpu->run->s.regs.vrs, 512)) @@ -431,6 +452,14 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, } else { mci.vr = 0; } + if (!rc && mci.gs && ext_sa_addr && test_kvm_facility(vcpu->kvm, 133) + && (lc == 11 || lc == 12)) { + if (write_guest_abs(vcpu, ext_sa_addr + 1024, + &vcpu->run->s.regs.gscb, 32)) + mci.gs = 0; + } else { + mci.gs = 0; + } /* General interruption information */ rc |= put_guest_lc(vcpu, 1, (u8 __user *) __LC_AR_MODE_ID); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 976373c03138..f83f18b77f3d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -405,6 +405,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_RI: r = test_facility(64); break; + case KVM_CAP_S390_GS: + r = test_facility(133); + break; default: r = 0; } @@ -541,6 +544,20 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", r ? "(not available)" : "(success)"); break; + case KVM_CAP_S390_GS: + r = -EINVAL; + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus)) { + r = -EBUSY; + } else if (test_facility(133)) { + set_kvm_facility(kvm->arch.model.fac_mask, 133); + set_kvm_facility(kvm->arch.model.fac_list, 133); + r = 0; + } + mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", + r ? "(not available)" : "(success)"); + break; case KVM_CAP_S390_USER_STSI: VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); kvm->arch.user_stsi = 1; @@ -1749,6 +1766,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_s390_set_prefix(vcpu, 0); if (test_kvm_facility(vcpu->kvm, 64)) vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; + if (test_kvm_facility(vcpu->kvm, 133)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; /* fprs can be synchronized via vrs, even if the guest has no vx. With * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. */ @@ -1993,6 +2012,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->eca |= ECA_VX; vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; } + vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) + | SDNXC; vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; @@ -2720,8 +2741,10 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct runtime_instr_cb *riccb; + struct gs_cb *gscb; riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; + gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) @@ -2756,6 +2779,19 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); vcpu->arch.sie_block->ecb3 |= ECB3_RI; } + /* + * If userspace sets the gscb (e.g. after migration) to non-zero, + * we should enable GS here instead of doing the lazy enablement. + */ + if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && + test_kvm_facility(vcpu->kvm, 133) && + gscb->gssm && + !vcpu->arch.gs_enabled) { + VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); + vcpu->arch.sie_block->ecb |= ECB_GS; + vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; + vcpu->arch.gs_enabled = 1; + } save_access_regs(vcpu->arch.host_acrs); restore_access_regs(vcpu->run->s.regs.acrs); /* save host (userspace) fprs/vrs */ @@ -2770,6 +2806,20 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (test_fp_ctl(current->thread.fpu.fpc)) /* User space provided an invalid FPC, let's clear it */ current->thread.fpu.fpc = 0; + if (MACHINE_HAS_GS) { + preempt_disable(); + __ctl_set_bit(2, 4); + if (current->thread.gs_cb) { + vcpu->arch.host_gscb = current->thread.gs_cb; + save_gs_cb(vcpu->arch.host_gscb); + } + if (vcpu->arch.gs_enabled) { + current->thread.gs_cb = (struct gs_cb *) + &vcpu->run->s.regs.gscb; + restore_gs_cb(current->thread.gs_cb); + } + preempt_enable(); + } kvm_run->kvm_dirty_regs = 0; } @@ -2796,6 +2846,18 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) /* Restore will be done lazily at return */ current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; + if (MACHINE_HAS_GS) { + __ctl_set_bit(2, 4); + if (vcpu->arch.gs_enabled) + save_gs_cb(current->thread.gs_cb); + preempt_disable(); + current->thread.gs_cb = vcpu->arch.host_gscb; + restore_gs_cb(vcpu->arch.host_gscb); + preempt_enable(); + if (!vcpu->arch.host_gscb) + __ctl_clear_bit(2, 4); + vcpu->arch.host_gscb = NULL; + } } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index dfdcde125af6..455124fe0647 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -246,6 +246,7 @@ static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu) int is_valid_psw(psw_t *psw); int kvm_s390_handle_aa(struct kvm_vcpu *vcpu); int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); +int kvm_s390_handle_e3(struct kvm_vcpu *vcpu); int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); int kvm_s390_handle_01(struct kvm_vcpu *vcpu); int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index d42bb2d03041..0ffe973535fa 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -53,6 +53,33 @@ int kvm_s390_handle_aa(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } +static int handle_gs(struct kvm_vcpu *vcpu) +{ + if (test_kvm_facility(vcpu->kvm, 133)) { + VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (lazy)"); + preempt_disable(); + __ctl_set_bit(2, 4); + current->thread.gs_cb = (struct gs_cb *)&vcpu->run->s.regs.gscb; + restore_gs_cb(current->thread.gs_cb); + preempt_enable(); + vcpu->arch.sie_block->ecb |= ECB_GS; + vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; + vcpu->arch.gs_enabled = 1; + kvm_s390_retry_instr(vcpu); + return 0; + } else + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); +} + +int kvm_s390_handle_e3(struct kvm_vcpu *vcpu) +{ + int code = vcpu->arch.sie_block->ipb & 0xff; + + if (code == 0x49 || code == 0x4d) + return handle_gs(vcpu); + else + return -EOPNOTSUPP; +} /* Handle SCK (SET CLOCK) interception */ static int handle_set_clock(struct kvm_vcpu *vcpu) { diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index d91f1df5a854..2fafc2be777f 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -329,6 +329,11 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) /* Instruction Execution Prevention */ if (test_kvm_facility(vcpu->kvm, 130)) scb_s->ecb2 |= scb_o->ecb2 & ECB2_IEP; + /* Guarded Storage */ + if (test_kvm_facility(vcpu->kvm, 133)) { + scb_s->ecb |= scb_o->ecb & ECB_GS; + scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT; + } if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF)) scb_s->eca |= scb_o->eca & ECA_SII; if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB)) @@ -496,6 +501,13 @@ static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) unpin_guest_page(vcpu->kvm, gpa, hpa); scb_s->riccbd = 0; } + + hpa = scb_s->sdnxo; + if (hpa) { + gpa = scb_o->sdnxo; + unpin_guest_page(vcpu->kvm, gpa, hpa); + scb_s->sdnxo = 0; + } } /* @@ -590,6 +602,33 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) goto unpin; scb_s->riccbd = hpa; } + if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { + unsigned long sdnxc; + + gpa = scb_o->sdnxo & ~0xfUL; + sdnxc = scb_o->sdnxo & 0xfUL; + if (!gpa || !(gpa & ~0x1fffUL)) { + rc = set_validity_icpt(scb_s, 0x10b0U); + goto unpin; + } + if (sdnxc < 6 || sdnxc > 12) { + rc = set_validity_icpt(scb_s, 0x10b1U); + goto unpin; + } + if (gpa & ((1 << sdnxc) - 1)) { + rc = set_validity_icpt(scb_s, 0x10b2U); + goto unpin; + } + /* Due to alignment rules (checked above) this cannot + * cross page boundaries + */ + rc = pin_guest_page(vcpu->kvm, gpa, &hpa); + if (rc == -EINVAL) + rc = set_validity_icpt(scb_s, 0x10b0U); + if (rc) + goto unpin; + scb_s->sdnxo = hpa; + } return 0; unpin: unpin_blocks(vcpu, vsie_page); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f51d5082a377..c9d522765f8f 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -883,6 +883,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_MMU_RADIX 134 #define KVM_CAP_PPC_MMU_HASH_V3 135 #define KVM_CAP_IMMEDIATE_EXIT 136 +#define KVM_CAP_S390_GS 137 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-71-gd317 From a2d133b1d465016d0d97560b11f54ba0ace56d3e Mon Sep 17 00:00:00 2001 From: Josh Hunt Date: Mon, 20 Mar 2017 15:22:03 -0400 Subject: sock: introduce SO_MEMINFO getsockopt Allows reading of SK_MEMINFO_VARS via socket option. This way an application can get all meminfo related information in single socket option call instead of multiple calls. Adds helper function, sk_get_meminfo(), and uses that for both getsockopt and sock_diag_put_meminfo(). Suggested by Eric Dumazet. Signed-off-by: Josh Hunt Reviewed-by: Jason Baron Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/avr32/include/uapi/asm/socket.h | 2 ++ arch/frv/include/uapi/asm/socket.h | 2 ++ arch/ia64/include/uapi/asm/socket.h | 2 ++ arch/m32r/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 3 +++ arch/mn10300/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/powerpc/include/uapi/asm/socket.h | 2 ++ arch/s390/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ arch/xtensa/include/uapi/asm/socket.h | 2 ++ include/net/sock.h | 2 ++ include/uapi/asm-generic/socket.h | 2 ++ net/core/sock.c | 30 ++++++++++++++++++++++++++++++ net/core/sock_diag.c | 10 +--------- 16 files changed, 60 insertions(+), 9 deletions(-) (limited to 'include/uapi') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index afc901b7a6f6..089db42c1b40 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -99,4 +99,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 5a650426f357..6eabcbd2f82a 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -92,4 +92,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _UAPI__ASM_AVR32_SOCKET_H */ diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index 81e03530ed39..bd497f8356b9 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -92,5 +92,7 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index 57feb0c1f7d7..f1bb54686168 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -101,4 +101,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 5853f8e92c20..459c46076f6f 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -92,4 +92,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 566ecdcb5b4b..688c18dd62ef 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -110,4 +110,7 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index 0e12527c4b0e..312d2c457a04 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -92,4 +92,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 7a109b73ddf7..b98ec38f2083 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -91,4 +91,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 0x402F +#define SO_MEMINFO 0x4030 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index 44583a52f882..099a889240f6 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -99,4 +99,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index b24a64cbfeb1..6199bb34f7fa 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -98,4 +98,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index a25dc32f5d6a..12cd8c2ec422 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -88,6 +88,8 @@ #define SCM_TIMESTAMPING_OPT_STATS 0x0038 +#define SO_MEMINFO 0x0039 + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index 9fdbe1fe0473..d0b85f6c1484 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -103,4 +103,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 08142be8938e..cb241a0e8434 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2362,6 +2362,8 @@ bool sk_ns_capable(const struct sock *sk, bool sk_capable(const struct sock *sk, int cap); bool sk_net_capable(const struct sock *sk, int cap); +void sk_get_meminfo(const struct sock *sk, u32 *meminfo); + extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 2c748ddad5f8..8313702c1eae 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -94,4 +94,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/net/core/sock.c b/net/core/sock.c index a83731c36761..f8c0373a3a74 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1313,6 +1313,21 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_incoming_cpu; break; + case SO_MEMINFO: + { + u32 meminfo[SK_MEMINFO_VARS]; + + if (get_user(len, optlen)) + return -EFAULT; + + sk_get_meminfo(sk, meminfo); + + len = min_t(unsigned int, len, sizeof(meminfo)); + if (copy_to_user(optval, &meminfo, len)) + return -EFAULT; + + goto lenout; + } default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). @@ -2861,6 +2876,21 @@ void sk_common_release(struct sock *sk) } EXPORT_SYMBOL(sk_common_release); +void sk_get_meminfo(const struct sock *sk, u32 *mem) +{ + memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS); + + mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); + mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; + mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); + mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; + mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; + mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; + mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); + mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; + mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); +} + #ifdef CONFIG_PROC_FS #define PROTO_INUSE_NR 64 /* should be enough for the first time */ struct prot_inuse { diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 6b10573cc9fa..8d11ee75a100 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -59,15 +59,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) { u32 mem[SK_MEMINFO_VARS]; - mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); - mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; - mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); - mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; - mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; - mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; - mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); - mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; - mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); + sk_get_meminfo(sk, mem); return nla_put(skb, attrtype, sizeof(mem), &mem); } -- cgit v1.2.3-71-gd317 From 798c166173ffb50128993641fcf791df51bed48e Mon Sep 17 00:00:00 2001 From: andy zhou Date: Mon, 20 Mar 2017 16:32:29 -0700 Subject: openvswitch: Optimize sample action for the clone use cases With the introduction of open flow 'clone' action, the OVS user space can now translate the 'clone' action into kernel datapath 'sample' action, with 100% probability, to ensure that the clone semantics, which is that the packet seen by the clone action is the same as the packet seen by the action after clone, is faithfully carried out in the datapath. While the sample action in the datpath has the matching semantics, its implementation is only optimized for its original use. Specifically, there are two limitation: First, there is a 3 level of nesting restriction, enforced at the flow downloading time. This limit turns out to be too restrictive for the 'clone' use case. Second, the implementation avoid recursive call only if the sample action list has a single userspace action. The main optimization implemented in this series removes the static nesting limit check, instead, implement the run time recursion limit check, and recursion avoidance similar to that of the 'recirc' action. This optimization solve both #1 and #2 issues above. One related optimization attempts to avoid copying flow key as long as the actions enclosed does not change the flow key. The detection is performed only once at the flow downloading time. Another related optimization is to rewrite the action list at flow downloading time in order to save the fast path from parsing the sample action list in its original form repeatedly. Signed-off-by: Andy Zhou Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 15 +++++ net/openvswitch/actions.c | 107 ++++++++++++++--------------- net/openvswitch/datapath.h | 2 - net/openvswitch/flow_netlink.c | 141 +++++++++++++++++++++++++++------------ 4 files changed, 167 insertions(+), 98 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 7f41f7d0000f..66d1c3ccfd8e 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -578,10 +578,25 @@ enum ovs_sample_attr { OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */ OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ __OVS_SAMPLE_ATTR_MAX, + +#ifdef __KERNEL__ + OVS_SAMPLE_ATTR_ARG /* struct sample_arg */ +#endif }; #define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1) +#ifdef __KERNEL__ +struct sample_arg { + bool exec; /* When true, actions in sample will not + * change flow keys. False otherwise. + */ + u32 probability; /* Same value as + * 'OVS_SAMPLE_ATTR_PROBABILITY'. + */ +}; +#endif + /** * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action. * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 8c9c60cd359f..3529f7b87a44 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -928,73 +928,70 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, return ovs_dp_upcall(dp, skb, key, &upcall, cutlen); } +/* When 'last' is true, sample() should always consume the 'skb'. + * Otherwise, sample() should keep 'skb' intact regardless what + * actions are executed within sample(). + */ static int sample(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, const struct nlattr *attr, - const struct nlattr *actions, int actions_len) + bool last) { - const struct nlattr *acts_list = NULL; - const struct nlattr *a; - int rem; - u32 cutlen = 0; + struct nlattr *actions; + struct nlattr *sample_arg; + struct sw_flow_key *orig_key = key; + int rem = nla_len(attr); + int err = 0; + const struct sample_arg *arg; - for (a = nla_data(attr), rem = nla_len(attr); rem > 0; - a = nla_next(a, &rem)) { - u32 probability; + /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */ + sample_arg = nla_data(attr); + arg = nla_data(sample_arg); + actions = nla_next(sample_arg, &rem); - switch (nla_type(a)) { - case OVS_SAMPLE_ATTR_PROBABILITY: - probability = nla_get_u32(a); - if (!probability || prandom_u32() > probability) - return 0; - break; - - case OVS_SAMPLE_ATTR_ACTIONS: - acts_list = a; - break; - } + if ((arg->probability != U32_MAX) && + (!arg->probability || prandom_u32() > arg->probability)) { + if (last) + consume_skb(skb); + return 0; } - rem = nla_len(acts_list); - a = nla_data(acts_list); - - /* Actions list is empty, do nothing */ - if (unlikely(!rem)) + /* Unless the last action, sample works on the clone of SKB. */ + skb = last ? skb : skb_clone(skb, GFP_ATOMIC); + if (!skb) { + /* Out of memory, skip this sample action. + */ return 0; + } - /* The only known usage of sample action is having a single user-space - * action, or having a truncate action followed by a single user-space - * action. Treat this usage as a special case. - * The output_userspace() should clone the skb to be sent to the - * user space. This skb will be consumed by its caller. + /* In case the sample actions won't change 'key', + * it can be used directly to execute sample actions. + * Otherwise, allocate a new key from the + * next recursion level of 'flow_keys'. If + * successful, execute the sample actions without + * deferring. + * + * Defer the sample actions if the recursion + * limit has been reached. */ - if (unlikely(nla_type(a) == OVS_ACTION_ATTR_TRUNC)) { - struct ovs_action_trunc *trunc = nla_data(a); - - if (skb->len > trunc->max_len) - cutlen = skb->len - trunc->max_len; - - a = nla_next(a, &rem); + if (!arg->exec) { + __this_cpu_inc(exec_actions_level); + key = clone_key(key); } - if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE && - nla_is_last(a, rem))) - return output_userspace(dp, skb, key, a, actions, - actions_len, cutlen); + if (key) { + err = do_execute_actions(dp, skb, key, actions, rem); + } else if (!add_deferred_actions(skb, orig_key, actions, rem)) { - skb = skb_clone(skb, GFP_ATOMIC); - if (!skb) - /* Skip the sample action when out of memory. */ - return 0; - - if (!add_deferred_actions(skb, key, nla_data(acts_list), - nla_len(acts_list))) { if (net_ratelimit()) - pr_warn("%s: deferred actions limit reached, dropping sample action\n", + pr_warn("%s: deferred action limit reached, drop sample action\n", ovs_dp_name(dp)); - kfree_skb(skb); } - return 0; + + if (!arg->exec) + __this_cpu_dec(exec_actions_level); + + return err; } static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key, @@ -1244,9 +1241,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = execute_masked_set_action(skb, key, nla_data(a)); break; - case OVS_ACTION_ATTR_SAMPLE: - err = sample(dp, skb, key, a, attr, len); + case OVS_ACTION_ATTR_SAMPLE: { + bool last = nla_is_last(a, rem); + + err = sample(dp, skb, key, a, last); + if (last) + return err; + break; + } case OVS_ACTION_ATTR_CT: if (!is_flow_key_valid(key)) { diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 1c6e9377436d..da931bdef8a7 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -34,8 +34,6 @@ #define DP_MAX_PORTS USHRT_MAX #define DP_VPORT_HASH_BUCKETS 1024 -#define SAMPLE_ACTION_DEPTH 3 - /** * struct dp_stats_percpu - per-cpu packet processing statistics for a given * datapath. diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 6f5fa50f716d..2acfb5af2c45 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2014 Nicira, Inc. + * Copyright (c) 2007-2017 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -59,6 +59,39 @@ struct ovs_len_tbl { #define OVS_ATTR_NESTED -1 #define OVS_ATTR_VARIABLE -2 +static bool actions_may_change_flow(const struct nlattr *actions) +{ + struct nlattr *nla; + int rem; + + nla_for_each_nested(nla, actions, rem) { + u16 action = nla_type(nla); + + switch (action) { + case OVS_ACTION_ATTR_OUTPUT: + case OVS_ACTION_ATTR_RECIRC: + case OVS_ACTION_ATTR_TRUNC: + case OVS_ACTION_ATTR_USERSPACE: + break; + + case OVS_ACTION_ATTR_CT: + case OVS_ACTION_ATTR_HASH: + case OVS_ACTION_ATTR_POP_ETH: + case OVS_ACTION_ATTR_POP_MPLS: + case OVS_ACTION_ATTR_POP_VLAN: + case OVS_ACTION_ATTR_PUSH_ETH: + case OVS_ACTION_ATTR_PUSH_MPLS: + case OVS_ACTION_ATTR_PUSH_VLAN: + case OVS_ACTION_ATTR_SAMPLE: + case OVS_ACTION_ATTR_SET: + case OVS_ACTION_ATTR_SET_MASKED: + default: + return true; + } + } + return false; +} + static void update_range(struct sw_flow_match *match, size_t offset, size_t size, bool is_mask) { @@ -2021,18 +2054,20 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, - int depth, struct sw_flow_actions **sfa, + struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, bool log); static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, - const struct sw_flow_key *key, int depth, + const struct sw_flow_key *key, struct sw_flow_actions **sfa, - __be16 eth_type, __be16 vlan_tci, bool log) + __be16 eth_type, __be16 vlan_tci, + bool log, bool last) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; const struct nlattr *a; - int rem, start, err, st_acts; + int rem, start, err; + struct sample_arg arg; memset(attrs, 0, sizeof(attrs)); nla_for_each_nested(a, attr, rem) { @@ -2056,20 +2091,32 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); if (start < 0) return start; - err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, - nla_data(probability), sizeof(u32), log); + + /* When both skb and flow may be changed, put the sample + * into a deferred fifo. On the other hand, if only skb + * may be modified, the actions can be executed in place. + * + * Do this analysis at the flow installation time. + * Set 'clone_action->exec' to true if the actions can be + * executed without being deferred. + * + * If the sample is the last action, it can always be excuted + * rather than deferred. + */ + arg.exec = last || !actions_may_change_flow(actions); + arg.probability = nla_get_u32(probability); + + err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg), + log); if (err) return err; - st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log); - if (st_acts < 0) - return st_acts; - err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa, + err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type, vlan_tci, log); + if (err) return err; - add_nested_action_end(*sfa, st_acts); add_nested_action_end(*sfa, start); return 0; @@ -2406,16 +2453,13 @@ static int copy_action(const struct nlattr *from, static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, - int depth, struct sw_flow_actions **sfa, + struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, bool log) { u8 mac_proto = ovs_key_mac_proto(key); const struct nlattr *a; int rem, err; - if (depth >= SAMPLE_ACTION_DEPTH) - return -EOVERFLOW; - nla_for_each_nested(a, attr, rem) { /* Expected argument lengths, (u32)-1 for variable length. */ static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { @@ -2553,13 +2597,17 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return err; break; - case OVS_ACTION_ATTR_SAMPLE: - err = validate_and_copy_sample(net, a, key, depth, sfa, - eth_type, vlan_tci, log); + case OVS_ACTION_ATTR_SAMPLE: { + bool last = nla_is_last(a, rem); + + err = validate_and_copy_sample(net, a, key, sfa, + eth_type, vlan_tci, + log, last); if (err) return err; skip_copy = true; break; + } case OVS_ACTION_ATTR_CT: err = ovs_ct_copy_action(net, a, key, sfa, log); @@ -2613,7 +2661,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return PTR_ERR(*sfa); (*sfa)->orig_len = nla_len(attr); - err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type, + err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, key->eth.vlan.tci, log); if (err) ovs_nla_free_flow_actions(*sfa); @@ -2621,39 +2669,44 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return err; } -static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) +static int sample_action_to_attr(const struct nlattr *attr, + struct sk_buff *skb) { - const struct nlattr *a; - struct nlattr *start; - int err = 0, rem; + struct nlattr *start, *ac_start = NULL, *sample_arg; + int err = 0, rem = nla_len(attr); + const struct sample_arg *arg; + struct nlattr *actions; start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); if (!start) return -EMSGSIZE; - nla_for_each_nested(a, attr, rem) { - int type = nla_type(a); - struct nlattr *st_sample; + sample_arg = nla_data(attr); + arg = nla_data(sample_arg); + actions = nla_next(sample_arg, &rem); - switch (type) { - case OVS_SAMPLE_ATTR_PROBABILITY: - if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, - sizeof(u32), nla_data(a))) - return -EMSGSIZE; - break; - case OVS_SAMPLE_ATTR_ACTIONS: - st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); - if (!st_sample) - return -EMSGSIZE; - err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); - if (err) - return err; - nla_nest_end(skb, st_sample); - break; - } + if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) { + err = -EMSGSIZE; + goto out; + } + + ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); + if (!ac_start) { + err = -EMSGSIZE; + goto out; + } + + err = ovs_nla_put_actions(actions, rem, skb); + +out: + if (err) { + nla_nest_cancel(skb, ac_start); + nla_nest_cancel(skb, start); + } else { + nla_nest_end(skb, ac_start); + nla_nest_end(skb, start); } - nla_nest_end(skb, start); return err; } -- cgit v1.2.3-71-gd317 From bbea124bc99df968011e76eba105fe964a4eceab Mon Sep 17 00:00:00 2001 From: Joel Scherpelz Date: Wed, 22 Mar 2017 18:19:04 +0900 Subject: net: ipv6: Add sysctl for minimum prefix len acceptable in RIOs. This commit adds a new sysctl accept_ra_rt_info_min_plen that defines the minimum acceptable prefix length of Route Information Options. The new sysctl is intended to be used together with accept_ra_rt_info_max_plen to configure a range of acceptable prefix lengths. It is useful to prevent misconfigurations from unintentionally blackholing too much of the IPv6 address space (e.g., home routers announcing RIOs for fc00::/7, which is incorrect). Signed-off-by: Joel Scherpelz Acked-by: Lorenzo Colitti Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 13 +++++++++++-- include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 1 + include/uapi/linux/sysctl.h | 1 + net/ipv6/addrconf.c | 10 ++++++++++ net/ipv6/ndisc.c | 2 ++ 6 files changed, 26 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index b57308e76b1d..eaee2c8d4c00 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1461,11 +1461,20 @@ accept_ra_pinfo - BOOLEAN Functional default: enabled if accept_ra is enabled. disabled if accept_ra is disabled. +accept_ra_rt_info_min_plen - INTEGER + Minimum prefix length of Route Information in RA. + + Route Information w/ prefix smaller than this variable shall + be ignored. + + Functional default: 0 if accept_ra_rtr_pref is enabled. + -1 if accept_ra_rtr_pref is disabled. + accept_ra_rt_info_max_plen - INTEGER Maximum prefix length of Route Information in RA. - Route Information w/ prefix larger than or equal to this - variable shall be ignored. + Route Information w/ prefix larger than this variable shall + be ignored. Functional default: 0 if accept_ra_rtr_pref is enabled. -1 if accept_ra_rtr_pref is disabled. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index f0d79bd054ca..e1b442996f81 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -37,6 +37,7 @@ struct ipv6_devconf { __s32 accept_ra_rtr_pref; __s32 rtr_probe_interval; #ifdef CONFIG_IPV6_ROUTE_INFO + __s32 accept_ra_rt_info_min_plen; __s32 accept_ra_rt_info_max_plen; #endif #endif diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index d8f6a1ac9af4..2ae59178189d 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -184,6 +184,7 @@ enum { DEVCONF_ENHANCED_DAD, DEVCONF_ADDR_GEN_MODE, DEVCONF_DISABLE_POLICY, + DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN, DEVCONF_MAX }; diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index d2b12152e358..e13d48058b8d 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -568,6 +568,7 @@ enum { NET_IPV6_PROXY_NDP=23, NET_IPV6_ACCEPT_SOURCE_ROUTE=25, NET_IPV6_ACCEPT_RA_FROM_LOCAL=26, + NET_IPV6_ACCEPT_RA_RT_INFO_MIN_PLEN=27, __NET_IPV6_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8c69768a5c46..dff5beb26a01 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -224,6 +224,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_ra_rtr_pref = 1, .rtr_probe_interval = 60 * HZ, #ifdef CONFIG_IPV6_ROUTE_INFO + .accept_ra_rt_info_min_plen = 0, .accept_ra_rt_info_max_plen = 0, #endif #endif @@ -277,6 +278,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_ra_rtr_pref = 1, .rtr_probe_interval = 60 * HZ, #ifdef CONFIG_IPV6_ROUTE_INFO + .accept_ra_rt_info_min_plen = 0, .accept_ra_rt_info_max_plen = 0, #endif #endif @@ -4979,6 +4981,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_RTR_PROBE_INTERVAL] = jiffies_to_msecs(cnf->rtr_probe_interval); #ifdef CONFIG_IPV6_ROUTE_INFO + array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = cnf->accept_ra_rt_info_min_plen; array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif #endif @@ -6121,6 +6124,13 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = proc_dointvec_jiffies, }, #ifdef CONFIG_IPV6_ROUTE_INFO + { + .procname = "accept_ra_rt_info_min_plen", + .data = &ipv6_devconf.accept_ra_rt_info_min_plen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "accept_ra_rt_info_max_plen", .data = &ipv6_devconf.accept_ra_rt_info_max_plen, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 112ccbc0a8ac..b5812b3f7539 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1418,6 +1418,8 @@ skip_linkparms: if (ri->prefix_len == 0 && !in6_dev->cnf.accept_ra_defrtr) continue; + if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen) + continue; if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen) continue; rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3, -- cgit v1.2.3-71-gd317 From 56f668dfe00dcf086734f1c42ea999398fad6572 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 22 Mar 2017 10:00:33 -0700 Subject: bpf: Add array of maps support This patch adds a few helper funcs to enable map-in-map support (i.e. outer_map->inner_map). The first outer_map type BPF_MAP_TYPE_ARRAY_OF_MAPS is also added in this patch. The next patch will introduce a hash of maps type. Any bpf map type can be acted as an inner_map. The exception is BPF_MAP_TYPE_PROG_ARRAY because the extra level of indirection makes it harder to verify the owner_prog_type and owner_jited. Multi-level map-in-map is not supported (i.e. map->map is ok but not map->map->map). When adding an inner_map to an outer_map, it currently checks the map_type, key_size, value_size, map_flags, max_entries and ops. The verifier also uses those map's properties to do static analysis. map_flags is needed because we need to ensure BPF_PROG_TYPE_PERF_EVENT is using a preallocated hashtab for the inner_hash also. ops and max_entries are needed to generate inlined map-lookup instructions. For simplicity reason, a simple '==' test is used for both map_flags and max_entries. The equality of ops is implied by the equality of map_type. During outer_map creation time, an inner_map_fd is needed to create an outer_map. However, the inner_map_fd's life time does not depend on the outer_map. The inner_map_fd is merely used to initialize the inner_map_meta of the outer_map. Also, for the outer_map: * It allows element update and delete from syscall * It allows element lookup from bpf_prog The above is similar to the current fd_array pattern. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 2 + kernel/bpf/Makefile | 2 +- kernel/bpf/arraymap.c | 63 +++++++++++++++++++++++++++++++ kernel/bpf/map_in_map.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/map_in_map.h | 23 ++++++++++++ kernel/bpf/syscall.c | 7 +++- kernel/bpf/verifier.c | 42 ++++++++++++++++----- 8 files changed, 225 insertions(+), 12 deletions(-) create mode 100644 kernel/bpf/map_in_map.c create mode 100644 kernel/bpf/map_in_map.h (limited to 'include/uapi') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index da8c64ca8dc9..3f3cdf9b15e8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -50,6 +50,7 @@ struct bpf_map { const struct bpf_map_ops *ops; struct work_struct work; atomic_t usercnt; + struct bpf_map *inner_map_meta; }; struct bpf_map_type_list { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0539a0ceef38..1701ec1e7de3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -96,6 +96,7 @@ enum bpf_map_type { BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_MAP_TYPE_LPM_TRIE, + BPF_MAP_TYPE_ARRAY_OF_MAPS, }; enum bpf_prog_type { @@ -152,6 +153,7 @@ union bpf_attr { __u32 value_size; /* size of value in bytes */ __u32 max_entries; /* max number of entries in a map */ __u32 map_flags; /* prealloc or not */ + __u32 inner_map_fd; /* fd pointing to the inner map */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index e1ce4f4fd7fd..e1e5e658f2db 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -1,7 +1,7 @@ obj-y := core.o obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o -obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o +obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o ifeq ($(CONFIG_PERF_EVENTS),y) obj-$(CONFIG_BPF_SYSCALL) += stackmap.o endif diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 4d7d5d0ed76a..bc9da93db403 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -17,6 +17,8 @@ #include #include +#include "map_in_map.h" + static void bpf_array_free_percpu(struct bpf_array *array) { int i; @@ -602,3 +604,64 @@ static int __init register_cgroup_array_map(void) } late_initcall(register_cgroup_array_map); #endif + +static struct bpf_map *array_of_map_alloc(union bpf_attr *attr) +{ + struct bpf_map *map, *inner_map_meta; + + inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); + if (IS_ERR(inner_map_meta)) + return inner_map_meta; + + map = fd_array_map_alloc(attr); + if (IS_ERR(map)) { + bpf_map_meta_free(inner_map_meta); + return map; + } + + map->inner_map_meta = inner_map_meta; + + return map; +} + +static void array_of_map_free(struct bpf_map *map) +{ + /* map->inner_map_meta is only accessed by syscall which + * is protected by fdget/fdput. + */ + bpf_map_meta_free(map->inner_map_meta); + bpf_fd_array_map_clear(map); + fd_array_map_free(map); +} + +static void *array_of_map_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_map **inner_map = array_map_lookup_elem(map, key); + + if (!inner_map) + return NULL; + + return READ_ONCE(*inner_map); +} + +static const struct bpf_map_ops array_of_map_ops = { + .map_alloc = array_of_map_alloc, + .map_free = array_of_map_free, + .map_get_next_key = array_map_get_next_key, + .map_lookup_elem = array_of_map_lookup_elem, + .map_delete_elem = fd_array_map_delete_elem, + .map_fd_get_ptr = bpf_map_fd_get_ptr, + .map_fd_put_ptr = bpf_map_fd_put_ptr, +}; + +static struct bpf_map_type_list array_of_map_type __ro_after_init = { + .ops = &array_of_map_ops, + .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, +}; + +static int __init register_array_of_map(void) +{ + bpf_register_map_type(&array_of_map_type); + return 0; +} +late_initcall(register_array_of_map); diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c new file mode 100644 index 000000000000..59bcdf821ae4 --- /dev/null +++ b/kernel/bpf/map_in_map.c @@ -0,0 +1,97 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include + +#include "map_in_map.h" + +struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) +{ + struct bpf_map *inner_map, *inner_map_meta; + struct fd f; + + f = fdget(inner_map_ufd); + inner_map = __bpf_map_get(f); + if (IS_ERR(inner_map)) + return inner_map; + + /* prog_array->owner_prog_type and owner_jited + * is a runtime binding. Doing static check alone + * in the verifier is not enough. + */ + if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { + fdput(f); + return ERR_PTR(-ENOTSUPP); + } + + /* Does not support >1 level map-in-map */ + if (inner_map->inner_map_meta) { + fdput(f); + return ERR_PTR(-EINVAL); + } + + inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER); + if (!inner_map_meta) { + fdput(f); + return ERR_PTR(-ENOMEM); + } + + inner_map_meta->map_type = inner_map->map_type; + inner_map_meta->key_size = inner_map->key_size; + inner_map_meta->value_size = inner_map->value_size; + inner_map_meta->map_flags = inner_map->map_flags; + inner_map_meta->ops = inner_map->ops; + inner_map_meta->max_entries = inner_map->max_entries; + + fdput(f); + return inner_map_meta; +} + +void bpf_map_meta_free(struct bpf_map *map_meta) +{ + kfree(map_meta); +} + +bool bpf_map_meta_equal(const struct bpf_map *meta0, + const struct bpf_map *meta1) +{ + /* No need to compare ops because it is covered by map_type */ + return meta0->map_type == meta1->map_type && + meta0->key_size == meta1->key_size && + meta0->value_size == meta1->value_size && + meta0->map_flags == meta1->map_flags && + meta0->max_entries == meta1->max_entries; +} + +void *bpf_map_fd_get_ptr(struct bpf_map *map, + struct file *map_file /* not used */, + int ufd) +{ + struct bpf_map *inner_map; + struct fd f; + + f = fdget(ufd); + inner_map = __bpf_map_get(f); + if (IS_ERR(inner_map)) + return inner_map; + + if (bpf_map_meta_equal(map->inner_map_meta, inner_map)) + inner_map = bpf_map_inc(inner_map, false); + else + inner_map = ERR_PTR(-EINVAL); + + fdput(f); + return inner_map; +} + +void bpf_map_fd_put_ptr(void *ptr) +{ + /* ptr->ops->map_free() has to go through one + * rcu grace period by itself. + */ + bpf_map_put(ptr); +} diff --git a/kernel/bpf/map_in_map.h b/kernel/bpf/map_in_map.h new file mode 100644 index 000000000000..177fadb689dc --- /dev/null +++ b/kernel/bpf/map_in_map.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef __MAP_IN_MAP_H__ +#define __MAP_IN_MAP_H__ + +#include + +struct file; +struct bpf_map; + +struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd); +void bpf_map_meta_free(struct bpf_map *map_meta); +bool bpf_map_meta_equal(const struct bpf_map *meta0, + const struct bpf_map *meta1); +void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file, + int ufd); +void bpf_map_fd_put_ptr(void *ptr); + +#endif diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 48c914b983bd..6e24fdf1f373 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -215,7 +215,7 @@ int bpf_map_new_fd(struct bpf_map *map) offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ sizeof(attr->CMD##_LAST_FIELD)) != NULL -#define BPF_MAP_CREATE_LAST_FIELD map_flags +#define BPF_MAP_CREATE_LAST_FIELD inner_map_fd /* called via syscall */ static int map_create(union bpf_attr *attr) { @@ -352,6 +352,8 @@ static int map_lookup_elem(union bpf_attr *attr) err = bpf_percpu_array_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { err = bpf_stackmap_copy(map, key, value); + } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { + err = -ENOTSUPP; } else { rcu_read_lock(); ptr = map->ops->map_lookup_elem(map, key); @@ -438,7 +440,8 @@ static int map_update_elem(union bpf_attr *attr) err = bpf_percpu_array_update(map, key, value, attr->flags); } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || map->map_type == BPF_MAP_TYPE_PROG_ARRAY || - map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) { + map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || + map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { rcu_read_lock(); err = bpf_fd_array_map_update_elem(map, f.file, key, value, attr->flags); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9bf82267f2f9..3b8f528c5473 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1199,6 +1199,9 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) func_id != BPF_FUNC_current_task_under_cgroup) goto error; break; + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + if (func_id != BPF_FUNC_map_lookup_elem) + goto error; default: break; } @@ -2101,14 +2104,19 @@ static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, struct bpf_reg_state *reg = ®s[regno]; if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) { - reg->type = type; + if (type == UNKNOWN_VALUE) { + __mark_reg_unknown_value(regs, regno); + } else if (reg->map_ptr->inner_map_meta) { + reg->type = CONST_PTR_TO_MAP; + reg->map_ptr = reg->map_ptr->inner_map_meta; + } else { + reg->type = type; + } /* We don't need id from this point onwards anymore, thus we * should better reset it, so that state pruning has chances * to take effect. */ reg->id = 0; - if (type == UNKNOWN_VALUE) - __mark_reg_unknown_value(regs, regno); } } @@ -3033,16 +3041,32 @@ process_bpf_exit: return 0; } +static int check_map_prealloc(struct bpf_map *map) +{ + return (map->map_type != BPF_MAP_TYPE_HASH && + map->map_type != BPF_MAP_TYPE_PERCPU_HASH) || + !(map->map_flags & BPF_F_NO_PREALLOC); +} + static int check_map_prog_compatibility(struct bpf_map *map, struct bpf_prog *prog) { - if (prog->type == BPF_PROG_TYPE_PERF_EVENT && - (map->map_type == BPF_MAP_TYPE_HASH || - map->map_type == BPF_MAP_TYPE_PERCPU_HASH) && - (map->map_flags & BPF_F_NO_PREALLOC)) { - verbose("perf_event programs can only use preallocated hash map\n"); - return -EINVAL; + /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use + * preallocated hash maps, since doing memory allocation + * in overflow_handler can crash depending on where nmi got + * triggered. + */ + if (prog->type == BPF_PROG_TYPE_PERF_EVENT) { + if (!check_map_prealloc(map)) { + verbose("perf_event programs can only use preallocated hash map\n"); + return -EINVAL; + } + if (map->inner_map_meta && + !check_map_prealloc(map->inner_map_meta)) { + verbose("perf_event programs can only use preallocated inner hash map\n"); + return -EINVAL; + } } return 0; } -- cgit v1.2.3-71-gd317 From bcc6b1b7ebf857a9fe56202e2be3361131588c15 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 22 Mar 2017 10:00:34 -0700 Subject: bpf: Add hash of maps support This patch adds hash of maps support (hashmap->bpf_map). BPF_MAP_TYPE_HASH_OF_MAPS is added. A map-in-map contains a pointer to another map and lets call this pointer 'inner_map_ptr'. Notes on deleting inner_map_ptr from a hash map: 1. For BPF_F_NO_PREALLOC map-in-map, when deleting an inner_map_ptr, the htab_elem itself will go through a rcu grace period and the inner_map_ptr resides in the htab_elem. 2. For pre-allocated htab_elem (!BPF_F_NO_PREALLOC), when deleting an inner_map_ptr, the htab_elem may get reused immediately. This situation is similar to the existing prealloc-ated use cases. However, the bpf_map_fd_put_ptr() calls bpf_map_put() which calls inner_map->ops->map_free(inner_map) which will go through a rcu grace period (i.e. all bpf_map's map_free currently goes through a rcu grace period). Hence, the inner_map_ptr is still safe for the rcu reader side. This patch also includes BPF_MAP_TYPE_HASH_OF_MAPS to the check_map_prealloc() in the verifier. preallocation is a must for BPF_PROG_TYPE_PERF_EVENT. Hence, even we don't expect heavy updates to map-in-map, enforcing BPF_F_NO_PREALLOC for map-in-map is impossible without disallowing BPF_PROG_TYPE_PERF_EVENT from using map-in-map first. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 + include/uapi/linux/bpf.h | 1 + kernel/bpf/hashtab.c | 121 +++++++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 8 +++- kernel/bpf/verifier.c | 4 +- 5 files changed, 134 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3f3cdf9b15e8..2ae39a3e9ead 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -277,6 +277,8 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); void bpf_fd_array_map_clear(struct bpf_map *map); +int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, + void *key, void *value, u64 map_flags); /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and * forced to use 'long' read/writes to try to atomically copy long counters. diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1701ec1e7de3..ce6f029ac368 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -97,6 +97,7 @@ enum bpf_map_type { BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_MAP_TYPE_LPM_TRIE, BPF_MAP_TYPE_ARRAY_OF_MAPS, + BPF_MAP_TYPE_HASH_OF_MAPS, }; enum bpf_prog_type { diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 000153acb6d5..343fb5394c95 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -16,6 +16,7 @@ #include #include "percpu_freelist.h" #include "bpf_lru_list.h" +#include "map_in_map.h" struct bucket { struct hlist_nulls_head head; @@ -88,6 +89,11 @@ static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size return *(void __percpu **)(l->key + key_size); } +static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l) +{ + return *(void **)(l->key + roundup(map->key_size, 8)); +} + static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i) { return (struct htab_elem *) (htab->elems + i * htab->elem_size); @@ -603,6 +609,14 @@ static void htab_elem_free_rcu(struct rcu_head *head) static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) { + struct bpf_map *map = &htab->map; + + if (map->ops->map_fd_put_ptr) { + void *ptr = fd_htab_map_get_ptr(map, l); + + map->ops->map_fd_put_ptr(ptr); + } + if (l->state == HTAB_EXTRA_ELEM_USED) { l->state = HTAB_EXTRA_ELEM_FREE; return; @@ -1057,6 +1071,7 @@ static void delete_all_elements(struct bpf_htab *htab) } } } + /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ static void htab_map_free(struct bpf_map *map) { @@ -1213,12 +1228,118 @@ static struct bpf_map_type_list htab_lru_percpu_type __ro_after_init = { .type = BPF_MAP_TYPE_LRU_PERCPU_HASH, }; +static struct bpf_map *fd_htab_map_alloc(union bpf_attr *attr) +{ + struct bpf_map *map; + + if (attr->value_size != sizeof(u32)) + return ERR_PTR(-EINVAL); + + /* pointer is stored internally */ + attr->value_size = sizeof(void *); + map = htab_map_alloc(attr); + attr->value_size = sizeof(u32); + + return map; +} + +static void fd_htab_map_free(struct bpf_map *map) +{ + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct hlist_nulls_node *n; + struct hlist_nulls_head *head; + struct htab_elem *l; + int i; + + for (i = 0; i < htab->n_buckets; i++) { + head = select_bucket(htab, i); + + hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { + void *ptr = fd_htab_map_get_ptr(map, l); + + map->ops->map_fd_put_ptr(ptr); + } + } + + htab_map_free(map); +} + +/* only called from syscall */ +int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, + void *key, void *value, u64 map_flags) +{ + void *ptr; + int ret; + u32 ufd = *(u32 *)value; + + ptr = map->ops->map_fd_get_ptr(map, map_file, ufd); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + + ret = htab_map_update_elem(map, key, &ptr, map_flags); + if (ret) + map->ops->map_fd_put_ptr(ptr); + + return ret; +} + +static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr) +{ + struct bpf_map *map, *inner_map_meta; + + inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); + if (IS_ERR(inner_map_meta)) + return inner_map_meta; + + map = fd_htab_map_alloc(attr); + if (IS_ERR(map)) { + bpf_map_meta_free(inner_map_meta); + return map; + } + + map->inner_map_meta = inner_map_meta; + + return map; +} + +static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_map **inner_map = htab_map_lookup_elem(map, key); + + if (!inner_map) + return NULL; + + return READ_ONCE(*inner_map); +} + +static void htab_of_map_free(struct bpf_map *map) +{ + bpf_map_meta_free(map->inner_map_meta); + fd_htab_map_free(map); +} + +static const struct bpf_map_ops htab_of_map_ops = { + .map_alloc = htab_of_map_alloc, + .map_free = htab_of_map_free, + .map_get_next_key = htab_map_get_next_key, + .map_lookup_elem = htab_of_map_lookup_elem, + .map_delete_elem = htab_map_delete_elem, + .map_fd_get_ptr = bpf_map_fd_get_ptr, + .map_fd_put_ptr = bpf_map_fd_put_ptr, +}; + +static struct bpf_map_type_list htab_of_map_type __ro_after_init = { + .ops = &htab_of_map_ops, + .type = BPF_MAP_TYPE_HASH_OF_MAPS, +}; + static int __init register_htab_map(void) { bpf_register_map_type(&htab_type); bpf_register_map_type(&htab_percpu_type); bpf_register_map_type(&htab_lru_type); bpf_register_map_type(&htab_lru_percpu_type); + bpf_register_map_type(&htab_of_map_type); return 0; } late_initcall(register_htab_map); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6e24fdf1f373..c35ebfe6d84d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -352,7 +352,8 @@ static int map_lookup_elem(union bpf_attr *attr) err = bpf_percpu_array_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { err = bpf_stackmap_copy(map, key, value); - } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { + } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { err = -ENOTSUPP; } else { rcu_read_lock(); @@ -446,6 +447,11 @@ static int map_update_elem(union bpf_attr *attr) err = bpf_fd_array_map_update_elem(map, f.file, key, value, attr->flags); rcu_read_unlock(); + } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { + rcu_read_lock(); + err = bpf_fd_htab_map_update_elem(map, f.file, key, value, + attr->flags); + rcu_read_unlock(); } else { rcu_read_lock(); err = map->ops->map_update_elem(map, key, value, attr->flags); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3b8f528c5473..09923cc5c7c7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1200,6 +1200,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) goto error; break; case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_HASH_OF_MAPS: if (func_id != BPF_FUNC_map_lookup_elem) goto error; default: @@ -3044,7 +3045,8 @@ process_bpf_exit: static int check_map_prealloc(struct bpf_map *map) { return (map->map_type != BPF_MAP_TYPE_HASH && - map->map_type != BPF_MAP_TYPE_PERCPU_HASH) || + map->map_type != BPF_MAP_TYPE_PERCPU_HASH && + map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) || !(map->map_flags & BPF_F_NO_PREALLOC); } -- cgit v1.2.3-71-gd317 From 91b8270f2a4d1d9b268de90451cdca63a70052d6 Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Wed, 22 Mar 2017 17:27:34 -0700 Subject: Add a helper function to get socket cookie in eBPF Retrieve the socket cookie generated by sock_gen_cookie() from a sk_buff with a known socket. Generates a new cookie if one was not yet set.If the socket pointer inside sk_buff is NULL, 0 is returned. The helper function coud be useful in monitoring per socket networking traffic statistics and provide a unique socket identifier per namespace. Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Willem de Bruijn Signed-off-by: Chenbo Feng Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 1 + include/uapi/linux/bpf.h | 9 ++++++++- net/core/filter.c | 17 +++++++++++++++++ net/core/sock_diag.c | 2 +- tools/include/uapi/linux/bpf.h | 3 ++- 5 files changed, 29 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index a0596ca0e80a..a2f8109bb215 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -24,6 +24,7 @@ void sock_diag_unregister(const struct sock_diag_handler *h); void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); +u64 sock_gen_cookie(struct sock *sk); int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie); void sock_diag_save_cookie(struct sock *sk, __u32 *cookie); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ce6f029ac368..cdfc5595fbc1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -459,6 +459,12 @@ union bpf_attr { * Return: * > 0 length of the string including the trailing NUL on success * < 0 error + * + * u64 bpf_bpf_get_socket_cookie(skb) + * Get the cookie for the socket stored inside sk_buff. + * @skb: pointer to skb + * Return: 8 Bytes non-decreasing number on success or 0 if the socket + * field is missing inside sk_buff */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -506,7 +512,8 @@ union bpf_attr { FN(get_numa_node_id), \ FN(skb_change_head), \ FN(xdp_adjust_head), \ - FN(probe_read_str), + FN(probe_read_str), \ + FN(get_socket_cookie), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/net/core/filter.c b/net/core/filter.c index c7f0ccd1c0d3..35b0f97c3fdf 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -2606,6 +2607,18 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) +{ + return skb->sk ? sock_gen_cookie(skb->sk) : 0; +} + +static const struct bpf_func_proto bpf_get_socket_cookie_proto = { + .func = bpf_get_socket_cookie, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + static const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -2640,6 +2653,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) switch (func_id) { case BPF_FUNC_skb_load_bytes: return &bpf_skb_load_bytes_proto; + case BPF_FUNC_get_socket_cookie: + return &bpf_get_socket_cookie_proto; default: return bpf_base_func_proto(func_id); } @@ -2699,6 +2714,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_get_smp_processor_id_proto; case BPF_FUNC_skb_under_cgroup: return &bpf_skb_under_cgroup_proto; + case BPF_FUNC_get_socket_cookie: + return &bpf_get_socket_cookie_proto; default: return bpf_base_func_proto(func_id); } diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 8d11ee75a100..fb9d0e2fd148 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -19,7 +19,7 @@ static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); static struct workqueue_struct *broadcast_wq; -static u64 sock_gen_cookie(struct sock *sk) +u64 sock_gen_cookie(struct sock *sk) { while (1) { u64 res = atomic64_read(&sk->sk_cookie); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ce6f029ac368..a3851859e5f3 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -506,7 +506,8 @@ union bpf_attr { FN(get_numa_node_id), \ FN(skb_change_head), \ FN(xdp_adjust_head), \ - FN(probe_read_str), + FN(probe_read_str), \ + FN(get_socket_cookie), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3-71-gd317 From 6acc5c2910689fc6ee181bf63085c5efff6a42bd Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Wed, 22 Mar 2017 17:27:35 -0700 Subject: Add a eBPF helper function to retrieve socket uid Returns the owner uid of the socket inside a sk_buff. This is useful to perform per-UID accounting of network traffic or per-UID packet filtering. The socket need to be a fullsock otherwise overflowuid is returned. Signed-off-by: Chenbo Feng Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 9 ++++++++- net/core/filter.c | 22 ++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 3 ++- 3 files changed, 32 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index cdfc5595fbc1..28317a04c34d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -465,6 +465,12 @@ union bpf_attr { * @skb: pointer to skb * Return: 8 Bytes non-decreasing number on success or 0 if the socket * field is missing inside sk_buff + * + * u32 bpf_get_socket_uid(skb) + * Get the owner uid of the socket stored inside sk_buff. + * @skb: pointer to skb + * Return: uid of the socket owner on success or 0 if the socket pointer + * inside sk_buff is NULL */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -513,7 +519,8 @@ union bpf_attr { FN(skb_change_head), \ FN(xdp_adjust_head), \ FN(probe_read_str), \ - FN(get_socket_cookie), + FN(get_socket_cookie), \ + FN(get_socket_uid), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/net/core/filter.c b/net/core/filter.c index 35b0f97c3fdf..dfb9f61a2fd5 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2619,6 +2619,24 @@ static const struct bpf_func_proto bpf_get_socket_cookie_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb) +{ + struct sock *sk = sk_to_full_sk(skb->sk); + kuid_t kuid; + + if (!sk || !sk_fullsock(sk)) + return overflowuid; + kuid = sock_net_uid(sock_net(sk), sk); + return from_kuid_munged(sock_net(sk)->user_ns, kuid); +} + +static const struct bpf_func_proto bpf_get_socket_uid_proto = { + .func = bpf_get_socket_uid, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + static const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -2655,6 +2673,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) return &bpf_skb_load_bytes_proto; case BPF_FUNC_get_socket_cookie: return &bpf_get_socket_cookie_proto; + case BPF_FUNC_get_socket_uid: + return &bpf_get_socket_uid_proto; default: return bpf_base_func_proto(func_id); } @@ -2716,6 +2736,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_under_cgroup_proto; case BPF_FUNC_get_socket_cookie: return &bpf_get_socket_cookie_proto; + case BPF_FUNC_get_socket_uid: + return &bpf_get_socket_uid_proto; default: return bpf_base_func_proto(func_id); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a3851859e5f3..1ea08ce35567 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -507,7 +507,8 @@ union bpf_attr { FN(skb_change_head), \ FN(xdp_adjust_head), \ FN(probe_read_str), \ - FN(get_socket_cookie), + FN(get_socket_cookie), \ + FN(get_socket_uid), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3-71-gd317 From 6d4339028b350efbf87c61e6d9e113e5373545c9 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Fri, 24 Mar 2017 10:08:36 -0700 Subject: net: Introduce SO_INCOMING_NAPI_ID This socket option returns the NAPI ID associated with the queue on which the last frame is received. This information can be used by the apps to split the incoming flows among the threads based on the Rx queue on which they are received. If the NAPI ID actually represents a sender_cpu then the value is ignored and 0 is returned. Signed-off-by: Sridhar Samudrala Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/avr32/include/uapi/asm/socket.h | 2 ++ arch/frv/include/uapi/asm/socket.h | 2 ++ arch/ia64/include/uapi/asm/socket.h | 2 ++ arch/m32r/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 1 + arch/mn10300/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/powerpc/include/uapi/asm/socket.h | 2 ++ arch/s390/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ arch/xtensa/include/uapi/asm/socket.h | 2 ++ include/uapi/asm-generic/socket.h | 2 ++ net/core/sock.c | 12 ++++++++++++ 14 files changed, 37 insertions(+) (limited to 'include/uapi') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 089db42c1b40..1bb8cac61a28 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -101,4 +101,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 6eabcbd2f82a..f824eeb0f2e4 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -94,4 +94,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _UAPI__ASM_AVR32_SOCKET_H */ diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index bd497f8356b9..a8ad9bebfc47 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -94,5 +94,7 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index f1bb54686168..6af3253e4209 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -103,4 +103,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 459c46076f6f..e98b6bb897c0 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -94,4 +94,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 688c18dd62ef..ae2b62e39d4d 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -112,5 +112,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index 312d2c457a04..e4ac1843ee01 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -94,4 +94,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index b98ec38f2083..f754c793e82a 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -93,4 +93,6 @@ #define SO_MEMINFO 0x4030 +#define SO_INCOMING_NAPI_ID 0x4031 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index 099a889240f6..5f84af7dcb2e 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -101,4 +101,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 6199bb34f7fa..25ac4960e707 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -100,4 +100,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 12cd8c2ec422..b05513acd589 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -90,6 +90,8 @@ #define SO_MEMINFO 0x0039 +#define SO_INCOMING_NAPI_ID 0x003a + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index d0b85f6c1484..786606c81edd 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -105,4 +105,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 8313702c1eae..c98a52fb572a 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -96,4 +96,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/net/core/sock.c b/net/core/sock.c index 4b762f2a3552..1a58a9dc6888 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1328,6 +1328,18 @@ int sock_getsockopt(struct socket *sock, int level, int optname, goto lenout; } + +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_INCOMING_NAPI_ID: + v.val = READ_ONCE(sk->sk_napi_id); + + /* aggregate non-NAPI IDs down to 0 */ + if (v.val < MIN_NAPI_ID) + v.val = 0; + + break; +#endif + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). -- cgit v1.2.3-71-gd317 From 5659495a7a1455665ce1466d156597ad1bda8772 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Fri, 24 Mar 2017 23:04:44 -0700 Subject: uapi: add missing install of userio.h While commit 5523662edd4f ("Input: add userio module") added userio.h under the uapi/ directory, it forgot to add the header file to Kbuild. Thus, the file was missing from header installation. Signed-off-by: Naohiro Aota Reviewed-by: Lyude Paul Signed-off-by: Dmitry Torokhov --- include/uapi/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index f330ba4547cf..b4a9a1891db6 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -437,6 +437,7 @@ header-y += unistd.h header-y += unix_diag.h header-y += usbdevice_fs.h header-y += usbip.h +header-y += userio.h header-y += utime.h header-y += utsname.h header-y += uuid.h -- cgit v1.2.3-71-gd317 From ae6336b57ede8cdf801b04e6d943617bb945e3f9 Mon Sep 17 00:00:00 2001 From: Jonas Bonn Date: Fri, 24 Mar 2017 23:23:20 +0100 Subject: gtp: rename SGSN netlink attribute This is a mostly cosmetic rename of the SGSN netlink attribute to the GTP link. The justification for this is that we will be making the module support decapsulation of "downstream" SGSN packets, in which case the netlink parameter actually refers to the upstream GGSN peer. Renaming the parameter makes the relationship clearer. The legacy name is maintained as a define in the header file in order to not break existing code. Signed-off-by: Jonas Bonn Acked-by: Pablo Neira Ayuso Acked-by: Harald Welte Signed-off-by: David S. Miller --- drivers/net/gtp.c | 22 +++++++++++----------- include/uapi/linux/gtp.h | 3 ++- 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include/uapi') diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 3e1854f34420..1f6d911e77c7 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -56,7 +56,7 @@ struct pdp_ctx { u16 af; struct in_addr ms_addr_ip4; - struct in_addr sgsn_addr_ip4; + struct in_addr peer_addr_ip4; struct sock *sk; struct net_device *dev; @@ -489,17 +489,17 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, } netdev_dbg(dev, "found PDP context %p\n", pctx); - rt = ip4_route_output_gtp(&fl4, pctx->sk, pctx->sgsn_addr_ip4.s_addr); + rt = ip4_route_output_gtp(&fl4, pctx->sk, pctx->peer_addr_ip4.s_addr); if (IS_ERR(rt)) { netdev_dbg(dev, "no route to SSGN %pI4\n", - &pctx->sgsn_addr_ip4.s_addr); + &pctx->peer_addr_ip4.s_addr); dev->stats.tx_carrier_errors++; goto err; } if (rt->dst.dev == dev) { netdev_dbg(dev, "circular route to SSGN %pI4\n", - &pctx->sgsn_addr_ip4.s_addr); + &pctx->peer_addr_ip4.s_addr); dev->stats.collisions++; goto err_rt; } @@ -866,8 +866,8 @@ static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info) { pctx->gtp_version = nla_get_u32(info->attrs[GTPA_VERSION]); pctx->af = AF_INET; - pctx->sgsn_addr_ip4.s_addr = - nla_get_be32(info->attrs[GTPA_SGSN_ADDRESS]); + pctx->peer_addr_ip4.s_addr = + nla_get_be32(info->attrs[GTPA_PEER_ADDRESS]); pctx->ms_addr_ip4.s_addr = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]); @@ -957,13 +957,13 @@ static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk, switch (pctx->gtp_version) { case GTP_V0: netdev_dbg(dev, "GTPv0-U: new PDP ctx id=%llx ssgn=%pI4 ms=%pI4 (pdp=%p)\n", - pctx->u.v0.tid, &pctx->sgsn_addr_ip4, + pctx->u.v0.tid, &pctx->peer_addr_ip4, &pctx->ms_addr_ip4, pctx); break; case GTP_V1: netdev_dbg(dev, "GTPv1-U: new PDP ctx id=%x/%x ssgn=%pI4 ms=%pI4 (pdp=%p)\n", pctx->u.v1.i_tei, pctx->u.v1.o_tei, - &pctx->sgsn_addr_ip4, &pctx->ms_addr_ip4, pctx); + &pctx->peer_addr_ip4, &pctx->ms_addr_ip4, pctx); break; } @@ -994,7 +994,7 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[GTPA_VERSION] || !info->attrs[GTPA_LINK] || - !info->attrs[GTPA_SGSN_ADDRESS] || + !info->attrs[GTPA_PEER_ADDRESS] || !info->attrs[GTPA_MS_ADDRESS]) return -EINVAL; @@ -1126,7 +1126,7 @@ static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq, goto nlmsg_failure; if (nla_put_u32(skb, GTPA_VERSION, pctx->gtp_version) || - nla_put_be32(skb, GTPA_SGSN_ADDRESS, pctx->sgsn_addr_ip4.s_addr) || + nla_put_be32(skb, GTPA_PEER_ADDRESS, pctx->peer_addr_ip4.s_addr) || nla_put_be32(skb, GTPA_MS_ADDRESS, pctx->ms_addr_ip4.s_addr)) goto nla_put_failure; @@ -1237,7 +1237,7 @@ static struct nla_policy gtp_genl_policy[GTPA_MAX + 1] = { [GTPA_LINK] = { .type = NLA_U32, }, [GTPA_VERSION] = { .type = NLA_U32, }, [GTPA_TID] = { .type = NLA_U64, }, - [GTPA_SGSN_ADDRESS] = { .type = NLA_U32, }, + [GTPA_PEER_ADDRESS] = { .type = NLA_U32, }, [GTPA_MS_ADDRESS] = { .type = NLA_U32, }, [GTPA_FLOW] = { .type = NLA_U16, }, [GTPA_NET_NS_FD] = { .type = NLA_U32, }, diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h index 72a04a0e8cce..57d1edb8efd9 100644 --- a/include/uapi/linux/gtp.h +++ b/include/uapi/linux/gtp.h @@ -19,7 +19,8 @@ enum gtp_attrs { GTPA_LINK, GTPA_VERSION, GTPA_TID, /* for GTPv0 only */ - GTPA_SGSN_ADDRESS, + GTPA_PEER_ADDRESS, /* Remote GSN peer, either SGSN or GGSN */ +#define GTPA_SGSN_ADDRESS GTPA_PEER_ADDRESS /* maintain legacy attr name */ GTPA_MS_ADDRESS, GTPA_FLOW, GTPA_NET_NS_FD, -- cgit v1.2.3-71-gd317 From 91ed81f9abc76d5a61b07cb8286c680c9330b7a1 Mon Sep 17 00:00:00 2001 From: Jonas Bonn Date: Fri, 24 Mar 2017 23:23:21 +0100 Subject: gtp: support SGSN-side tunnels The GTP-tunnel driver is explicitly GGSN-side as it searches for PDP contexts based on the incoming packets _destination_ address. If we want to place ourselves on the SGSN side of the tunnel, then we want to be identifying PDP contexts based on _source_ address. Let it be noted that in a "real" configuration this module would never be used: the SGSN normally does not see IP packets as input. The justification for this functionality is for PGW load-testing applications where the input to the SGSN is locally generally IP traffic. This patch adds a "role" argument at GTP-link creation time to specify whether we are on the GGSN or SGSN side of the tunnel; this flag is then used to determine which part of the IP packet to use in determining the PDP context. Signed-off-by: Jonas Bonn Acked-by: Pablo Neira Ayuso Acked-by: Harald Welte Signed-off-by: David S. Miller --- drivers/net/gtp.c | 42 ++++++++++++++++++++++++++++++------------ include/uapi/linux/if_link.h | 7 +++++++ 2 files changed, 37 insertions(+), 12 deletions(-) (limited to 'include/uapi') diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 1f6d911e77c7..4fea1b3dfbb4 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -74,6 +74,7 @@ struct gtp_dev { struct net_device *dev; + unsigned int role; unsigned int hash_size; struct hlist_head *tid_hash; struct hlist_head *addr_hash; @@ -154,8 +155,8 @@ static struct pdp_ctx *ipv4_pdp_find(struct gtp_dev *gtp, __be32 ms_addr) return NULL; } -static bool gtp_check_src_ms_ipv4(struct sk_buff *skb, struct pdp_ctx *pctx, - unsigned int hdrlen) +static bool gtp_check_ms_ipv4(struct sk_buff *skb, struct pdp_ctx *pctx, + unsigned int hdrlen, unsigned int role) { struct iphdr *iph; @@ -164,27 +165,31 @@ static bool gtp_check_src_ms_ipv4(struct sk_buff *skb, struct pdp_ctx *pctx, iph = (struct iphdr *)(skb->data + hdrlen); - return iph->saddr == pctx->ms_addr_ip4.s_addr; + if (role == GTP_ROLE_SGSN) + return iph->daddr == pctx->ms_addr_ip4.s_addr; + else + return iph->saddr == pctx->ms_addr_ip4.s_addr; } -/* Check if the inner IP source address in this packet is assigned to any +/* Check if the inner IP address in this packet is assigned to any * existing mobile subscriber. */ -static bool gtp_check_src_ms(struct sk_buff *skb, struct pdp_ctx *pctx, - unsigned int hdrlen) +static bool gtp_check_ms(struct sk_buff *skb, struct pdp_ctx *pctx, + unsigned int hdrlen, unsigned int role) { switch (ntohs(skb->protocol)) { case ETH_P_IP: - return gtp_check_src_ms_ipv4(skb, pctx, hdrlen); + return gtp_check_ms_ipv4(skb, pctx, hdrlen, role); } return false; } -static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb, unsigned int hdrlen) +static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb, + unsigned int hdrlen, unsigned int role) { struct pcpu_sw_netstats *stats; - if (!gtp_check_src_ms(skb, pctx, hdrlen)) { + if (!gtp_check_ms(skb, pctx, hdrlen, role)) { netdev_dbg(pctx->dev, "No PDP ctx for this MS\n"); return 1; } @@ -239,7 +244,7 @@ static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb) return 1; } - return gtp_rx(pctx, skb, hdrlen); + return gtp_rx(pctx, skb, hdrlen, gtp->role); } static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb) @@ -281,7 +286,7 @@ static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb) return 1; } - return gtp_rx(pctx, skb, hdrlen); + return gtp_rx(pctx, skb, hdrlen, gtp->role); } static void gtp_encap_destroy(struct sock *sk) @@ -481,7 +486,11 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, * Prepend PDP header with TEI/TID from PDP ctx. */ iph = ip_hdr(skb); - pctx = ipv4_pdp_find(gtp, iph->daddr); + if (gtp->role == GTP_ROLE_SGSN) + pctx = ipv4_pdp_find(gtp, iph->saddr); + else + pctx = ipv4_pdp_find(gtp, iph->daddr); + if (!pctx) { netdev_dbg(dev, "no PDP ctx found for %pI4, skip\n", &iph->daddr); @@ -685,6 +694,7 @@ static const struct nla_policy gtp_policy[IFLA_GTP_MAX + 1] = { [IFLA_GTP_FD0] = { .type = NLA_U32 }, [IFLA_GTP_FD1] = { .type = NLA_U32 }, [IFLA_GTP_PDP_HASHSIZE] = { .type = NLA_U32 }, + [IFLA_GTP_ROLE] = { .type = NLA_U32 }, }; static int gtp_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -810,6 +820,7 @@ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]) { struct sock *sk1u = NULL; struct sock *sk0 = NULL; + unsigned int role = GTP_ROLE_GGSN; if (data[IFLA_GTP_FD0]) { u32 fd0 = nla_get_u32(data[IFLA_GTP_FD0]); @@ -830,8 +841,15 @@ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]) } } + if (data[IFLA_GTP_ROLE]) { + role = nla_get_u32(data[IFLA_GTP_ROLE]); + if (role > GTP_ROLE_SGSN) + return -EINVAL; + } + gtp->sk0 = sk0; gtp->sk1u = sk1u; + gtp->role = role; return 0; } diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 320fc1e747ee..8b405afb2376 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -538,11 +538,18 @@ enum { #define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1) /* GTP section */ + +enum ifla_gtp_role { + GTP_ROLE_GGSN = 0, + GTP_ROLE_SGSN, +}; + enum { IFLA_GTP_UNSPEC, IFLA_GTP_FD0, IFLA_GTP_FD1, IFLA_GTP_PDP_HASHSIZE, + IFLA_GTP_ROLE, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) -- cgit v1.2.3-71-gd317 From 71c3797779d3cd8378767f5b2d8cfd3b2f88c5c1 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 22 Mar 2017 21:30:24 +0900 Subject: ALSA: firewire-motu: add hwdep interface This commit adds hwdep interface so as the other sound drivers for units on IEEE 1394 bus have. This interface is designed for mixer/control applications. By using this interface, an application can get information about firewire node, can lock/unlock kernel streaming and can get notification at starting/stopping kernel streaming. Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- include/uapi/sound/asound.h | 3 +- include/uapi/sound/firewire.h | 3 +- sound/firewire/motu/Makefile | 2 +- sound/firewire/motu/motu-hwdep.c | 192 ++++++++++++++++++++++++++++++++++++++ sound/firewire/motu/motu-midi.c | 16 ++++ sound/firewire/motu/motu-pcm.c | 20 +++- sound/firewire/motu/motu-stream.c | 38 ++++++++ sound/firewire/motu/motu.c | 5 + sound/firewire/motu/motu.h | 13 +++ 9 files changed, 285 insertions(+), 7 deletions(-) create mode 100644 sound/firewire/motu/motu-hwdep.c (limited to 'include/uapi') diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index be353a78c303..fd7b561af768 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -107,9 +107,10 @@ enum { SNDRV_HWDEP_IFACE_FW_DIGI00X, /* Digidesign Digi 002/003 family */ SNDRV_HWDEP_IFACE_FW_TASCAM, /* TASCAM FireWire series */ SNDRV_HWDEP_IFACE_LINE6, /* Line6 USB processors */ + SNDRV_HWDEP_IFACE_FW_MOTU, /* MOTU FireWire series */ /* Don't forget to change the following: */ - SNDRV_HWDEP_IFACE_LAST = SNDRV_HWDEP_IFACE_LINE6 + SNDRV_HWDEP_IFACE_LAST = SNDRV_HWDEP_IFACE_FW_MOTU }; struct snd_hwdep_info { diff --git a/include/uapi/sound/firewire.h b/include/uapi/sound/firewire.h index db79a12fcc78..59c6d81f5364 100644 --- a/include/uapi/sound/firewire.h +++ b/include/uapi/sound/firewire.h @@ -65,7 +65,8 @@ union snd_firewire_event { #define SNDRV_FIREWIRE_TYPE_OXFW 4 #define SNDRV_FIREWIRE_TYPE_DIGI00X 5 #define SNDRV_FIREWIRE_TYPE_TASCAM 6 -/* RME, MOTU, ... */ +#define SNDRV_FIREWIRE_TYPE_MOTU 7 +/* RME... */ struct snd_firewire_get_info { unsigned int type; /* SNDRV_FIREWIRE_TYPE_xxx */ diff --git a/sound/firewire/motu/Makefile b/sound/firewire/motu/Makefile index a512c1e0f49c..cc195d5a5a6e 100644 --- a/sound/firewire/motu/Makefile +++ b/sound/firewire/motu/Makefile @@ -1,3 +1,3 @@ snd-firewire-motu-objs := motu.o amdtp-motu.o motu-transaction.o motu-stream.o \ - motu-proc.o motu-pcm.o motu-midi.o + motu-proc.o motu-pcm.o motu-midi.o motu-hwdep.o obj-$(CONFIG_SND_FIREWIRE_MOTU) += snd-firewire-motu.o diff --git a/sound/firewire/motu/motu-hwdep.c b/sound/firewire/motu/motu-hwdep.c new file mode 100644 index 000000000000..e795a5219a21 --- /dev/null +++ b/sound/firewire/motu/motu-hwdep.c @@ -0,0 +1,192 @@ +/* + * motu-hwdep.c - a part of driver for MOTU FireWire series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +/* + * This codes have five functionalities. + * + * 1.get information about firewire node + * 2.get notification about starting/stopping stream + * 3.lock/unlock streaming + * + */ + +#include "motu.h" + +static long hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count, + loff_t *offset) +{ + struct snd_motu *motu = hwdep->private_data; + DEFINE_WAIT(wait); + union snd_firewire_event event; + + spin_lock_irq(&motu->lock); + + while (!motu->dev_lock_changed) { + prepare_to_wait(&motu->hwdep_wait, &wait, TASK_INTERRUPTIBLE); + spin_unlock_irq(&motu->lock); + schedule(); + finish_wait(&motu->hwdep_wait, &wait); + if (signal_pending(current)) + return -ERESTARTSYS; + spin_lock_irq(&motu->lock); + } + + memset(&event, 0, sizeof(event)); + if (motu->dev_lock_changed) { + event.lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS; + event.lock_status.status = (motu->dev_lock_count > 0); + motu->dev_lock_changed = false; + + count = min_t(long, count, sizeof(event.lock_status)); + } + + spin_unlock_irq(&motu->lock); + + if (copy_to_user(buf, &event, count)) + return -EFAULT; + + return count; +} + +static unsigned int hwdep_poll(struct snd_hwdep *hwdep, struct file *file, + poll_table *wait) +{ + struct snd_motu *motu = hwdep->private_data; + unsigned int events; + + poll_wait(file, &motu->hwdep_wait, wait); + + spin_lock_irq(&motu->lock); + if (motu->dev_lock_changed) + events = POLLIN | POLLRDNORM; + else + events = 0; + spin_unlock_irq(&motu->lock); + + return events | POLLOUT; +} + +static int hwdep_get_info(struct snd_motu *motu, void __user *arg) +{ + struct fw_device *dev = fw_parent_device(motu->unit); + struct snd_firewire_get_info info; + + memset(&info, 0, sizeof(info)); + info.type = SNDRV_FIREWIRE_TYPE_MOTU; + info.card = dev->card->index; + *(__be32 *)&info.guid[0] = cpu_to_be32(dev->config_rom[3]); + *(__be32 *)&info.guid[4] = cpu_to_be32(dev->config_rom[4]); + strlcpy(info.device_name, dev_name(&dev->device), + sizeof(info.device_name)); + + if (copy_to_user(arg, &info, sizeof(info))) + return -EFAULT; + + return 0; +} + +static int hwdep_lock(struct snd_motu *motu) +{ + int err; + + spin_lock_irq(&motu->lock); + + if (motu->dev_lock_count == 0) { + motu->dev_lock_count = -1; + err = 0; + } else { + err = -EBUSY; + } + + spin_unlock_irq(&motu->lock); + + return err; +} + +static int hwdep_unlock(struct snd_motu *motu) +{ + int err; + + spin_lock_irq(&motu->lock); + + if (motu->dev_lock_count == -1) { + motu->dev_lock_count = 0; + err = 0; + } else { + err = -EBADFD; + } + + spin_unlock_irq(&motu->lock); + + return err; +} + +static int hwdep_release(struct snd_hwdep *hwdep, struct file *file) +{ + struct snd_motu *motu = hwdep->private_data; + + spin_lock_irq(&motu->lock); + if (motu->dev_lock_count == -1) + motu->dev_lock_count = 0; + spin_unlock_irq(&motu->lock); + + return 0; +} + +static int hwdep_ioctl(struct snd_hwdep *hwdep, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct snd_motu *motu = hwdep->private_data; + + switch (cmd) { + case SNDRV_FIREWIRE_IOCTL_GET_INFO: + return hwdep_get_info(motu, (void __user *)arg); + case SNDRV_FIREWIRE_IOCTL_LOCK: + return hwdep_lock(motu); + case SNDRV_FIREWIRE_IOCTL_UNLOCK: + return hwdep_unlock(motu); + default: + return -ENOIOCTLCMD; + } +} + +#ifdef CONFIG_COMPAT +static int hwdep_compat_ioctl(struct snd_hwdep *hwdep, struct file *file, + unsigned int cmd, unsigned long arg) +{ + return hwdep_ioctl(hwdep, file, cmd, + (unsigned long)compat_ptr(arg)); +} +#else +#define hwdep_compat_ioctl NULL +#endif + +int snd_motu_create_hwdep_device(struct snd_motu *motu) +{ + static const struct snd_hwdep_ops ops = { + .read = hwdep_read, + .release = hwdep_release, + .poll = hwdep_poll, + .ioctl = hwdep_ioctl, + .ioctl_compat = hwdep_compat_ioctl, + }; + struct snd_hwdep *hwdep; + int err; + + err = snd_hwdep_new(motu->card, motu->card->driver, 0, &hwdep); + if (err < 0) + return err; + + strcpy(hwdep->name, "MOTU"); + hwdep->iface = SNDRV_HWDEP_IFACE_FW_MOTU; + hwdep->ops = ops; + hwdep->private_data = motu; + hwdep->exclusive = true; + + return 0; +} diff --git a/sound/firewire/motu/motu-midi.c b/sound/firewire/motu/motu-midi.c index f232f29589d0..e3acfcc53f4e 100644 --- a/sound/firewire/motu/motu-midi.c +++ b/sound/firewire/motu/motu-midi.c @@ -12,6 +12,10 @@ static int midi_capture_open(struct snd_rawmidi_substream *substream) struct snd_motu *motu = substream->rmidi->private_data; int err; + err = snd_motu_stream_lock_try(motu); + if (err < 0) + return err; + mutex_lock(&motu->mutex); motu->capture_substreams++; @@ -19,6 +23,9 @@ static int midi_capture_open(struct snd_rawmidi_substream *substream) mutex_unlock(&motu->mutex); + if (err < 0) + snd_motu_stream_lock_release(motu); + return err; } @@ -27,6 +34,10 @@ static int midi_playback_open(struct snd_rawmidi_substream *substream) struct snd_motu *motu = substream->rmidi->private_data; int err; + err = snd_motu_stream_lock_try(motu); + if (err < 0) + return err; + mutex_lock(&motu->mutex); motu->playback_substreams++; @@ -34,6 +45,9 @@ static int midi_playback_open(struct snd_rawmidi_substream *substream) mutex_unlock(&motu->mutex); + if (err < 0) + snd_motu_stream_lock_release(motu); + return err; } @@ -48,6 +62,7 @@ static int midi_capture_close(struct snd_rawmidi_substream *substream) mutex_unlock(&motu->mutex); + snd_motu_stream_lock_release(motu); return 0; } @@ -62,6 +77,7 @@ static int midi_playback_close(struct snd_rawmidi_substream *substream) mutex_unlock(&motu->mutex); + snd_motu_stream_lock_release(motu); return 0; } diff --git a/sound/firewire/motu/motu-pcm.c b/sound/firewire/motu/motu-pcm.c index a50bcd6f4a63..94558f3d218b 100644 --- a/sound/firewire/motu/motu-pcm.c +++ b/sound/firewire/motu/motu-pcm.c @@ -159,15 +159,19 @@ static int pcm_open(struct snd_pcm_substream *substream) unsigned int rate; int err; + err = snd_motu_stream_lock_try(motu); + if (err < 0) + return err; + mutex_lock(&motu->mutex); err = protocol->cache_packet_formats(motu); if (err < 0) - return err; + goto err_locked; err = init_hw_info(motu, substream); if (err < 0) - return err; + goto err_locked; /* * When source of clock is not internal or any PCM streams are running, @@ -175,13 +179,13 @@ static int pcm_open(struct snd_pcm_substream *substream) */ err = protocol->get_clock_source(motu, &src); if (err < 0) - return err; + goto err_locked; if (src != SND_MOTU_CLOCK_SOURCE_INTERNAL || amdtp_stream_pcm_running(&motu->tx_stream) || amdtp_stream_pcm_running(&motu->rx_stream)) { err = protocol->get_clock_rate(motu, &rate); if (err < 0) - return err; + goto err_locked; substream->runtime->hw.rate_min = rate; substream->runtime->hw.rate_max = rate; } @@ -190,11 +194,19 @@ static int pcm_open(struct snd_pcm_substream *substream) mutex_unlock(&motu->mutex); + return err; +err_locked: + mutex_unlock(&motu->mutex); + snd_motu_stream_lock_release(motu); return err; } static int pcm_close(struct snd_pcm_substream *substream) { + struct snd_motu *motu = substream->private_data; + + snd_motu_stream_lock_release(motu); + return 0; } diff --git a/sound/firewire/motu/motu-stream.c b/sound/firewire/motu/motu-stream.c index 911d3487f775..bd458029099e 100644 --- a/sound/firewire/motu/motu-stream.c +++ b/sound/firewire/motu/motu-stream.c @@ -341,3 +341,41 @@ void snd_motu_stream_destroy_duplex(struct snd_motu *motu) motu->playback_substreams = 0; motu->capture_substreams = 0; } + +static void motu_lock_changed(struct snd_motu *motu) +{ + motu->dev_lock_changed = true; + wake_up(&motu->hwdep_wait); +} + +int snd_motu_stream_lock_try(struct snd_motu *motu) +{ + int err; + + spin_lock_irq(&motu->lock); + + if (motu->dev_lock_count < 0) { + err = -EBUSY; + goto out; + } + + if (motu->dev_lock_count++ == 0) + motu_lock_changed(motu); + err = 0; +out: + spin_unlock_irq(&motu->lock); + return err; +} + +void snd_motu_stream_lock_release(struct snd_motu *motu) +{ + spin_lock_irq(&motu->lock); + + if (WARN_ON(motu->dev_lock_count <= 0)) + goto out; + + if (--motu->dev_lock_count == 0) + motu_lock_changed(motu); +out: + spin_unlock_irq(&motu->lock); +} diff --git a/sound/firewire/motu/motu.c b/sound/firewire/motu/motu.c index d4da1377fa50..619554b9dbef 100644 --- a/sound/firewire/motu/motu.c +++ b/sound/firewire/motu/motu.c @@ -109,6 +109,10 @@ static void do_registration(struct work_struct *work) goto error; } + err = snd_motu_create_hwdep_device(motu); + if (err < 0) + goto error; + err = snd_card_register(motu->card); if (err < 0) goto error; @@ -145,6 +149,7 @@ static int motu_probe(struct fw_unit *unit, mutex_init(&motu->mutex); spin_lock_init(&motu->lock); + init_waitqueue_head(&motu->hwdep_wait); /* Allocate and register this sound card later. */ INIT_DEFERRABLE_WORK(&motu->dwork, do_registration); diff --git a/sound/firewire/motu/motu.h b/sound/firewire/motu/motu.h index 338b35193001..7b1d85f29b49 100644 --- a/sound/firewire/motu/motu.h +++ b/sound/firewire/motu/motu.h @@ -16,12 +16,16 @@ #include #include #include +#include +#include #include #include #include #include #include +#include +#include #include "../lib.h" #include "../amdtp-stream.h" @@ -62,6 +66,11 @@ struct snd_motu { /* For notification. */ struct fw_address_handler async_handler; u32 msg; + + /* For uapi */ + int dev_lock_count; + bool dev_lock_changed; + wait_queue_head_t hwdep_wait; }; enum snd_motu_spec_flags { @@ -136,10 +145,14 @@ int snd_motu_stream_init_duplex(struct snd_motu *motu); void snd_motu_stream_destroy_duplex(struct snd_motu *motu); int snd_motu_stream_start_duplex(struct snd_motu *motu, unsigned int rate); void snd_motu_stream_stop_duplex(struct snd_motu *motu); +int snd_motu_stream_lock_try(struct snd_motu *motu); +void snd_motu_stream_lock_release(struct snd_motu *motu); void snd_motu_proc_init(struct snd_motu *motu); int snd_motu_create_pcm_devices(struct snd_motu *motu); int snd_motu_create_midi_devices(struct snd_motu *motu); + +int snd_motu_create_hwdep_device(struct snd_motu *motu); #endif -- cgit v1.2.3-71-gd317 From 5aaab1bf37ede45df4f5d13d735faf824edf3ec8 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 22 Mar 2017 21:30:25 +0900 Subject: ALSA: firewire-motu: enable to read transaction cache via hwdep interface MOTU FireWire series can transfer messages to registered address. These messages are transferred for the status of internal clock synchronization just after starting streams. When the synchronization is stable, it's 0x01ffffff. Else, it's 0x05ffffff. This commit adds a functionality for user space applications to receive content of the message. Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- include/uapi/sound/firewire.h | 7 +++++++ sound/firewire/motu/motu-hwdep.c | 10 ++++++++-- sound/firewire/motu/motu-transaction.c | 20 ++++++++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/sound/firewire.h b/include/uapi/sound/firewire.h index 59c6d81f5364..29afc5eab42d 100644 --- a/include/uapi/sound/firewire.h +++ b/include/uapi/sound/firewire.h @@ -10,6 +10,7 @@ #define SNDRV_FIREWIRE_EVENT_DICE_NOTIFICATION 0xd1ce004e #define SNDRV_FIREWIRE_EVENT_EFW_RESPONSE 0x4e617475 #define SNDRV_FIREWIRE_EVENT_DIGI00X_MESSAGE 0x746e736c +#define SNDRV_FIREWIRE_EVENT_MOTU_NOTIFICATION 0x64776479 struct snd_firewire_event_common { unsigned int type; /* SNDRV_FIREWIRE_EVENT_xxx */ @@ -46,12 +47,18 @@ struct snd_firewire_event_digi00x_message { __u32 message; /* Digi00x-specific message */ }; +struct snd_firewire_event_motu_notification { + unsigned int type; + __u32 message; /* MOTU-specific bits. */ +}; + union snd_firewire_event { struct snd_firewire_event_common common; struct snd_firewire_event_lock_status lock_status; struct snd_firewire_event_dice_notification dice_notification; struct snd_firewire_event_efw_response efw_response; struct snd_firewire_event_digi00x_message digi00x_message; + struct snd_firewire_event_motu_notification motu_notification; }; diff --git a/sound/firewire/motu/motu-hwdep.c b/sound/firewire/motu/motu-hwdep.c index e795a5219a21..b87ccb69d597 100644 --- a/sound/firewire/motu/motu-hwdep.c +++ b/sound/firewire/motu/motu-hwdep.c @@ -26,7 +26,7 @@ static long hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count, spin_lock_irq(&motu->lock); - while (!motu->dev_lock_changed) { + while (!motu->dev_lock_changed && motu->msg == 0) { prepare_to_wait(&motu->hwdep_wait, &wait, TASK_INTERRUPTIBLE); spin_unlock_irq(&motu->lock); schedule(); @@ -43,6 +43,12 @@ static long hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count, motu->dev_lock_changed = false; count = min_t(long, count, sizeof(event.lock_status)); + } else { + event.motu_notification.type = SNDRV_FIREWIRE_EVENT_MOTU_NOTIFICATION; + event.motu_notification.message = motu->msg; + motu->msg = 0; + + count = min_t(long, count, sizeof(event.motu_notification)); } spin_unlock_irq(&motu->lock); @@ -62,7 +68,7 @@ static unsigned int hwdep_poll(struct snd_hwdep *hwdep, struct file *file, poll_wait(file, &motu->hwdep_wait, wait); spin_lock_irq(&motu->lock); - if (motu->dev_lock_changed) + if (motu->dev_lock_changed || motu->msg) events = POLLIN | POLLRDNORM; else events = 0; diff --git a/sound/firewire/motu/motu-transaction.c b/sound/firewire/motu/motu-transaction.c index 416dd9833896..7fc30091e0de 100644 --- a/sound/firewire/motu/motu-transaction.c +++ b/sound/firewire/motu/motu-transaction.c @@ -50,7 +50,27 @@ static void handle_message(struct fw_card *card, struct fw_request *request, int generation, unsigned long long offset, void *data, size_t length, void *callback_data) { + struct snd_motu *motu = callback_data; + __be32 *buf = (__be32 *)data; + unsigned long flags; + + if (tcode != TCODE_WRITE_QUADLET_REQUEST) { + fw_send_response(card, request, RCODE_COMPLETE); + return; + } + + if (offset != motu->async_handler.offset || length != 4) { + fw_send_response(card, request, RCODE_ADDRESS_ERROR); + return; + } + + spin_lock_irqsave(&motu->lock, flags); + motu->msg = be32_to_cpu(*buf); + spin_unlock_irqrestore(&motu->lock, flags); + fw_send_response(card, request, RCODE_COMPLETE); + + wake_up(&motu->hwdep_wait); } int snd_motu_transaction_reregister(struct snd_motu *motu) -- cgit v1.2.3-71-gd317 From a8a3c426772e55ae9c3209f061cb6317268f932c Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 14 Mar 2017 10:15:19 +0000 Subject: KVM: MIPS: Add VZ & TE capabilities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add new KVM_CAP_MIPS_VZ and KVM_CAP_MIPS_TE capabilities, and in order to allow MIPS KVM to support VZ without confusing old users (which expect the trap & emulate implementation), define and start checking KVM_CREATE_VM type codes. The codes available are: - KVM_VM_MIPS_TE = 0 This is the current value expected from the user, and will create a VM using trap & emulate in user mode, confined to the user mode address space. This may in future become unavailable if the kernel is only configured to support VZ, in which case the EINVAL error will be returned and KVM_CAP_MIPS_TE won't be available even though KVM_CAP_MIPS_VZ is. - KVM_VM_MIPS_VZ = 1 This can be provided when the KVM_CAP_MIPS_VZ capability is available to create a VM using VZ, with a fully virtualized guest virtual address space. If VZ support is unavailable in the kernel, the EINVAL error will be returned (although old kernels without the KVM_CAP_MIPS_VZ capability may well succeed and create a trap & emulate VM). This is designed to allow the desired implementation (T&E vs VZ) to be potentially chosen at runtime rather than being fixed in the kernel configuration. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: Ralf Baechle Cc: Jonathan Corbet Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: linux-doc@vger.kernel.org --- Documentation/virtual/kvm/api.txt | 47 ++++++++++++++++++++++++++++++++++++++- arch/mips/kvm/mips.c | 9 ++++++++ include/uapi/linux/kvm.h | 6 +++++ 3 files changed, 61 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 3c248f772ae6..4b5fa2571efa 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -115,12 +115,17 @@ will access the virtual machine's physical address space; offset zero corresponds to guest physical address zero. Use of mmap() on a VM fd is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is available. -You most certainly want to use 0 as machine type. +You probably want to use 0 as machine type. In order to create user controlled virtual machines on S390, check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as privileged user (CAP_SYS_ADMIN). +To use hardware assisted virtualization on MIPS (VZ ASE) rather than +the default trap & emulate implementation (which changes the virtual +memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the +flag KVM_VM_MIPS_VZ. + 4.3 KVM_GET_MSR_INDEX_LIST @@ -4147,3 +4152,43 @@ This capability, if KVM_CHECK_EXTENSION indicates that it is available, means that that the kernel can support guests using the hashed page table MMU defined in Power ISA V3.00 (as implemented in the POWER9 processor), including in-memory segment tables. + +8.5 KVM_CAP_MIPS_VZ + +Architectures: mips + +This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that +it is available, means that full hardware assisted virtualization capabilities +of the hardware are available for use through KVM. An appropriate +KVM_VM_MIPS_* type must be passed to KVM_CREATE_VM to create a VM which +utilises it. + +If KVM_CHECK_EXTENSION on a kvm VM handle indicates that this capability is +available, it means that the VM is using full hardware assisted virtualization +capabilities of the hardware. This is useful to check after creating a VM with +KVM_VM_MIPS_DEFAULT. + +The value returned by KVM_CHECK_EXTENSION should be compared against known +values (see below). All other values are reserved. This is to allow for the +possibility of other hardware assisted virtualization implementations which +may be incompatible with the MIPS VZ ASE. + + 0: The trap & emulate implementation is in use to run guest code in user + mode. Guest virtual memory segments are rearranged to fit the guest in the + user mode address space. + + 1: The MIPS VZ ASE is in use, providing full hardware assisted + virtualization, including standard guest virtual memory segments. + +8.6 KVM_CAP_MIPS_TE + +Architectures: mips + +This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that +it is available, means that the trap & emulate implementation is available to +run guest code in user mode, even if KVM_CAP_MIPS_VZ indicates that hardware +assisted virtualisation is also available. KVM_VM_MIPS_TE (0) must be passed +to KVM_CREATE_VM to create a VM which utilises it. + +If KVM_CHECK_EXTENSION on a kvm VM handle indicates that this capability is +available, it means that the VM is using trap & emulate. diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index c507533ef6ea..476ece99bf3b 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -107,6 +107,14 @@ void kvm_arch_check_processor_compat(void *rtn) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { + switch (type) { + case KVM_VM_MIPS_TE: + break; + default: + /* Unsupported KVM type */ + return -EINVAL; + }; + /* Allocate page table to map GPA -> RPA */ kvm->arch.gpa_mm.pgd = kvm_pgd_alloc(); if (!kvm->arch.gpa_mm.pgd) @@ -1038,6 +1046,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_READONLY_MEM: case KVM_CAP_SYNC_MMU: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_MIPS_TE: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f51d5082a377..58ddedce4235 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -702,6 +702,10 @@ struct kvm_ppc_resize_hpt { #define KVM_VM_PPC_HV 1 #define KVM_VM_PPC_PR 2 +/* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */ +#define KVM_VM_MIPS_TE 0 +#define KVM_VM_MIPS_VZ 1 + #define KVM_S390_SIE_PAGE_OFFSET 1 /* @@ -883,6 +887,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_MMU_RADIX 134 #define KVM_CAP_PPC_MMU_HASH_V3 135 #define KVM_CAP_IMMEDIATE_EXIT 136 +#define KVM_CAP_MIPS_VZ 137 +#define KVM_CAP_MIPS_TE 138 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-71-gd317 From 578fd61d2d210a3b58dc107f5382b965922ac253 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 14 Mar 2017 10:15:20 +0000 Subject: KVM: MIPS: Add 64BIT capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new KVM_CAP_MIPS_64BIT capability to indicate that 64-bit MIPS guests are available and supported. In this case it should still be possible to run 32-bit guest code. If not available it won't be possible to run 64-bit guest code and the instructions may not be available, or the kernel may not support full context switching of 64-bit registers. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: Ralf Baechle Cc: Jonathan Corbet Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: linux-doc@vger.kernel.org --- Documentation/virtual/kvm/api.txt | 25 +++++++++++++++++++++++++ include/uapi/linux/kvm.h | 1 + 2 files changed, 26 insertions(+) (limited to 'include/uapi') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 4b5fa2571efa..1b8486c094b4 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4192,3 +4192,28 @@ to KVM_CREATE_VM to create a VM which utilises it. If KVM_CHECK_EXTENSION on a kvm VM handle indicates that this capability is available, it means that the VM is using trap & emulate. + +8.7 KVM_CAP_MIPS_64BIT + +Architectures: mips + +This capability indicates the supported architecture type of the guest, i.e. the +supported register and address width. + +The values returned when this capability is checked by KVM_CHECK_EXTENSION on a +kvm VM handle correspond roughly to the CP0_Config.AT register field, and should +be checked specifically against known values (see below). All other values are +reserved. + + 0: MIPS32 or microMIPS32. + Both registers and addresses are 32-bits wide. + It will only be possible to run 32-bit guest code. + + 1: MIPS64 or microMIPS64 with access only to 32-bit compatibility segments. + Registers are 64-bits wide, but addresses are 32-bits wide. + 64-bit guest code may run but cannot access MIPS64 memory segments. + It will also be possible to run 32-bit guest code. + + 2: MIPS64 or microMIPS64 with access to all address segments. + Both registers and addresses are 64-bits wide. + It will be possible to run 64-bit or 32-bit guest code. diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 58ddedce4235..1e1a6c728a18 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -889,6 +889,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_IMMEDIATE_EXIT 136 #define KVM_CAP_MIPS_VZ 137 #define KVM_CAP_MIPS_TE 138 +#define KVM_CAP_MIPS_64BIT 139 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-71-gd317 From 1555d204e743b6956d2be294a317121f6112238d Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Tue, 28 Mar 2017 17:24:10 +0200 Subject: devlink: Support for pipeline debug (dpipe) The pipeline debug is used to export the pipeline abstractions for the main objects - tables, headers and entries. The only support for set is for changing the counter parameter on specific table. The basic structures: Header - can represent a real protocol header information or internal metadata. Generic protocol headers like IPv4 can be shared between drivers. Each driver can add local headers. Field - part of a header. Can represent protocol field or specific ASIC metadata field. Hardware special metadata fields can be mapped to different resources, for example switch ASIC ports can have internal number which from the systems point of view is mapped to netdeivce ifindex. Match - represent specific match rule. Can describe match on specific field or header. The header index should be specified as well in order to support several header instances of the same type (tunneling). Action - represents specific action rule. Actions can describe operations on specific field values for example like set, increment, etc. And header operation like add and delete. Value - represents value which can be associated with specific match or action. Table - represents a hardware block which can be described with match/ action behavior. The match/action can be done on the packets data or on the internal metadata that it gathered along the packets traversal throw the pipeline which is vendor specific and should be exported in order to provide understanding of ASICs behavior. Entry - represents single record in a specific table. The entry is identified by specific combination of values for match/action. Prior to accessing the tables/entries the drivers provide the header/ field data base which is used by driver to user-space. The data base is split between the shared headers and unique headers. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 259 ++++++++++++++ include/uapi/linux/devlink.h | 67 +++- net/core/devlink.c | 836 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1161 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/net/devlink.h b/include/net/devlink.h index d29e5fc82582..24de13f8c94f 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -25,6 +25,8 @@ struct devlink { struct list_head list; struct list_head port_list; struct list_head sb_list; + struct list_head dpipe_table_list; + struct devlink_dpipe_headers *dpipe_headers; const struct devlink_ops *ops; struct device *dev; possible_net_t _net; @@ -49,6 +51,178 @@ struct devlink_sb_pool_info { enum devlink_sb_threshold_type threshold_type; }; +/** + * struct devlink_dpipe_field - dpipe field object + * @name: field name + * @id: index inside the headers field array + * @bitwidth: bitwidth + * @mapping_type: mapping type + */ +struct devlink_dpipe_field { + const char *name; + unsigned int id; + unsigned int bitwidth; + enum devlink_dpipe_field_mapping_type mapping_type; +}; + +/** + * struct devlink_dpipe_header - dpipe header object + * @name: header name + * @id: index, global/local detrmined by global bit + * @fields: fields + * @fields_count: number of fields + * @global: indicates if header is shared like most protocol header + * or driver specific + */ +struct devlink_dpipe_header { + const char *name; + unsigned int id; + struct devlink_dpipe_field *fields; + unsigned int fields_count; + bool global; +}; + +/** + * struct devlink_dpipe_match - represents match operation + * @type: type of match + * @header_index: header index (packets can have several headers of same + * type like in case of tunnels) + * @header: header + * @fieled_id: field index + */ +struct devlink_dpipe_match { + enum devlink_dpipe_match_type type; + unsigned int header_index; + struct devlink_dpipe_header *header; + unsigned int field_id; +}; + +/** + * struct devlink_dpipe_action - represents action operation + * @type: type of action + * @header_index: header index (packets can have several headers of same + * type like in case of tunnels) + * @header: header + * @fieled_id: field index + */ +struct devlink_dpipe_action { + enum devlink_dpipe_action_type type; + unsigned int header_index; + struct devlink_dpipe_header *header; + unsigned int field_id; +}; + +/** + * struct devlink_dpipe_value - represents value of match/action + * @action: action + * @match: match + * @mapping_value: in case the field has some mapping this value + * specified the mapping value + * @mapping_valid: specify if mapping value is valid + * @value_size: value size + * @value: value + * @mask: bit mask + */ +struct devlink_dpipe_value { + union { + struct devlink_dpipe_action *action; + struct devlink_dpipe_match *match; + }; + unsigned int mapping_value; + bool mapping_valid; + unsigned int value_size; + void *value; + void *mask; +}; + +/** + * struct devlink_dpipe_entry - table entry object + * @index: index of the entry in the table + * @match_values: match values + * @matche_values_count: count of matches tuples + * @action_values: actions values + * @action_values_count: count of actions values + * @counter: value of counter + * @counter_valid: Specify if value is valid from hardware + */ +struct devlink_dpipe_entry { + u64 index; + struct devlink_dpipe_value *match_values; + unsigned int match_values_count; + struct devlink_dpipe_value *action_values; + unsigned int action_values_count; + u64 counter; + bool counter_valid; +}; + +/** + * struct devlink_dpipe_dump_ctx - context provided to driver in order + * to dump + * @info: info + * @cmd: devlink command + * @skb: skb + * @nest: top attribute + * @hdr: hdr + */ +struct devlink_dpipe_dump_ctx { + struct genl_info *info; + enum devlink_command cmd; + struct sk_buff *skb; + struct nlattr *nest; + void *hdr; +}; + +struct devlink_dpipe_table_ops; + +/** + * struct devlink_dpipe_table - table object + * @priv: private + * @name: table name + * @size: maximum number of entries + * @counters_enabled: indicates if counters are active + * @counter_control_extern: indicates if counter control is in dpipe or + * external tool + * @table_ops: table operations + * @rcu: rcu + */ +struct devlink_dpipe_table { + void *priv; + struct list_head list; + const char *name; + u64 size; + bool counters_enabled; + bool counter_control_extern; + struct devlink_dpipe_table_ops *table_ops; + struct rcu_head rcu; +}; + +/** + * struct devlink_dpipe_table_ops - dpipe_table ops + * @actions_dump - dumps all tables actions + * @matches_dump - dumps all tables matches + * @entries_dump - dumps all active entries in the table + * @counters_set_update - when changing the counter status hardware sync + * maybe needed to allocate/free counter related + * resources + */ +struct devlink_dpipe_table_ops { + int (*actions_dump)(void *priv, struct sk_buff *skb); + int (*matches_dump)(void *priv, struct sk_buff *skb); + int (*entries_dump)(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx); + int (*counters_set_update)(void *priv, bool enable); +}; + +/** + * struct devlink_dpipe_headers - dpipe headers + * @headers - header array can be shared (global bit) or driver specific + * @headers_count - count of headers + */ +struct devlink_dpipe_headers { + struct devlink_dpipe_header **headers; + unsigned int headers_count; +}; + struct devlink_ops { int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); @@ -132,6 +306,26 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u16 egress_pools_count, u16 ingress_tc_count, u16 egress_tc_count); void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index); +int devlink_dpipe_table_register(struct devlink *devlink, + const char *table_name, + struct devlink_dpipe_table_ops *table_ops, + void *priv, u64 size, + bool counter_control_extern); +void devlink_dpipe_table_unregister(struct devlink *devlink, + const char *table_name); +int devlink_dpipe_headers_register(struct devlink *devlink, + struct devlink_dpipe_headers *dpipe_headers); +void devlink_dpipe_headers_unregister(struct devlink *devlink); +bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, + const char *table_name); +int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx); +int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx, + struct devlink_dpipe_entry *entry); +int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx); +int devlink_dpipe_action_put(struct sk_buff *skb, + struct devlink_dpipe_action *action); +int devlink_dpipe_match_put(struct sk_buff *skb, + struct devlink_dpipe_match *match); #else @@ -200,6 +394,71 @@ static inline void devlink_sb_unregister(struct devlink *devlink, { } +static inline int +devlink_dpipe_table_register(struct devlink *devlink, + const char *table_name, + struct devlink_dpipe_table_ops *table_ops, + void *priv, u64 size, + bool counter_control_extern) +{ + return 0; +} + +static inline void devlink_dpipe_table_unregister(struct devlink *devlink, + const char *table_name) +{ +} + +static inline int devlink_dpipe_headers_register(struct devlink *devlink, + struct devlink_dpipe_headers * + dpipe_headers) +{ + return 0; +} + +static inline void devlink_dpipe_headers_unregister(struct devlink *devlink) +{ +} + +static inline bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, + const char *table_name) +{ + return false; +} + +static inline int +devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx) +{ + return 0; +} + +static inline int +devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx, + struct devlink_dpipe_entry *entry) +{ + return 0; +} + +static inline int +devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx) +{ + return 0; +} + +static inline int +devlink_dpipe_action_put(struct sk_buff *skb, + struct devlink_dpipe_action *action) +{ + return 0; +} + +static inline int +devlink_dpipe_match_put(struct sk_buff *skb, + struct devlink_dpipe_match *match) +{ + return 0; +} + #endif #endif /* _NET_DEVLINK_H_ */ diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 0f1f3a12e23c..b47bee277347 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -65,8 +65,12 @@ enum devlink_command { #define DEVLINK_CMD_ESWITCH_MODE_SET /* obsolete, never use this! */ \ DEVLINK_CMD_ESWITCH_SET - /* add new commands above here */ + DEVLINK_CMD_DPIPE_TABLE_GET, + DEVLINK_CMD_DPIPE_ENTRIES_GET, + DEVLINK_CMD_DPIPE_HEADERS_GET, + DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 }; @@ -148,10 +152,71 @@ enum devlink_attr { DEVLINK_ATTR_ESWITCH_MODE, /* u16 */ DEVLINK_ATTR_ESWITCH_INLINE_MODE, /* u8 */ + DEVLINK_ATTR_DPIPE_TABLES, /* nested */ + DEVLINK_ATTR_DPIPE_TABLE, /* nested */ + DEVLINK_ATTR_DPIPE_TABLE_NAME, /* string */ + DEVLINK_ATTR_DPIPE_TABLE_SIZE, /* u64 */ + DEVLINK_ATTR_DPIPE_TABLE_MATCHES, /* nested */ + DEVLINK_ATTR_DPIPE_TABLE_ACTIONS, /* nested */ + DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, /* u8 */ + + DEVLINK_ATTR_DPIPE_ENTRIES, /* nested */ + DEVLINK_ATTR_DPIPE_ENTRY, /* nested */ + DEVLINK_ATTR_DPIPE_ENTRY_INDEX, /* u64 */ + DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES, /* nested */ + DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES, /* nested */ + DEVLINK_ATTR_DPIPE_ENTRY_COUNTER, /* u64 */ + + DEVLINK_ATTR_DPIPE_MATCH, /* nested */ + DEVLINK_ATTR_DPIPE_MATCH_VALUE, /* nested */ + DEVLINK_ATTR_DPIPE_MATCH_TYPE, /* u32 */ + + DEVLINK_ATTR_DPIPE_ACTION, /* nested */ + DEVLINK_ATTR_DPIPE_ACTION_VALUE, /* nested */ + DEVLINK_ATTR_DPIPE_ACTION_TYPE, /* u32 */ + + DEVLINK_ATTR_DPIPE_VALUE, + DEVLINK_ATTR_DPIPE_VALUE_MASK, + DEVLINK_ATTR_DPIPE_VALUE_MAPPING, /* u32 */ + + DEVLINK_ATTR_DPIPE_HEADERS, /* nested */ + DEVLINK_ATTR_DPIPE_HEADER, /* nested */ + DEVLINK_ATTR_DPIPE_HEADER_NAME, /* string */ + DEVLINK_ATTR_DPIPE_HEADER_ID, /* u32 */ + DEVLINK_ATTR_DPIPE_HEADER_FIELDS, /* nested */ + DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, /* u8 */ + DEVLINK_ATTR_DPIPE_HEADER_INDEX, /* u32 */ + + DEVLINK_ATTR_DPIPE_FIELD, /* nested */ + DEVLINK_ATTR_DPIPE_FIELD_NAME, /* string */ + DEVLINK_ATTR_DPIPE_FIELD_ID, /* u32 */ + DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, /* u32 */ + DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, /* u32 */ + + DEVLINK_ATTR_PAD, + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, DEVLINK_ATTR_MAX = __DEVLINK_ATTR_MAX - 1 }; +/* Mapping between internal resource described by the field and system + * structure + */ +enum devlink_dpipe_field_mapping_type { + DEVLINK_DPIPE_FIELD_MAPPING_TYPE_NONE, + DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX, +}; + +/* Match type - specify the type of the match */ +enum devlink_dpipe_match_type { + DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT, +}; + +/* Action type - specify the action type */ +enum devlink_dpipe_action_type { + DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY, +}; + #endif /* _UAPI_LINUX_DEVLINK_H_ */ diff --git a/net/core/devlink.c b/net/core/devlink.c index e9c1e6acfb6d..24b766003a61 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1493,8 +1493,686 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, if (err) return err; } + return 0; +} + +int devlink_dpipe_match_put(struct sk_buff *skb, + struct devlink_dpipe_match *match) +{ + struct devlink_dpipe_header *header = match->header; + struct devlink_dpipe_field *field = &header->fields[match->field_id]; + struct nlattr *match_attr; + + match_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_MATCH); + if (!match_attr) + return -EMSGSIZE; + + if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_MATCH_TYPE, match->type) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, match->header_index) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) || + nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global)) + goto nla_put_failure; + + nla_nest_end(skb, match_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, match_attr); + return -EMSGSIZE; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_match_put); + +static int devlink_dpipe_matches_put(struct devlink_dpipe_table *table, + struct sk_buff *skb) +{ + struct nlattr *matches_attr; + + matches_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_MATCHES); + if (!matches_attr) + return -EMSGSIZE; + + if (table->table_ops->matches_dump(table->priv, skb)) + goto nla_put_failure; + + nla_nest_end(skb, matches_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, matches_attr); + return -EMSGSIZE; +} + +int devlink_dpipe_action_put(struct sk_buff *skb, + struct devlink_dpipe_action *action) +{ + struct devlink_dpipe_header *header = action->header; + struct devlink_dpipe_field *field = &header->fields[action->field_id]; + struct nlattr *action_attr; + + action_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ACTION); + if (!action_attr) + return -EMSGSIZE; + + if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_ACTION_TYPE, action->type) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, action->header_index) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) || + nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global)) + goto nla_put_failure; + + nla_nest_end(skb, action_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, action_attr); + return -EMSGSIZE; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_action_put); + +static int devlink_dpipe_actions_put(struct devlink_dpipe_table *table, + struct sk_buff *skb) +{ + struct nlattr *actions_attr; + + actions_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_ACTIONS); + if (!actions_attr) + return -EMSGSIZE; + + if (table->table_ops->actions_dump(table->priv, skb)) + goto nla_put_failure; + + nla_nest_end(skb, actions_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, actions_attr); + return -EMSGSIZE; +} + +static int devlink_dpipe_table_put(struct sk_buff *skb, + struct devlink_dpipe_table *table) +{ + struct nlattr *table_attr; + + table_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE); + if (!table_attr) + return -EMSGSIZE; + + if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) || + nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table->size, + DEVLINK_ATTR_PAD)) + goto nla_put_failure; + if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, + table->counters_enabled)) + goto nla_put_failure; + + if (devlink_dpipe_matches_put(table, skb)) + goto nla_put_failure; + + if (devlink_dpipe_actions_put(table, skb)) + goto nla_put_failure; + + nla_nest_end(skb, table_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, table_attr); + return -EMSGSIZE; +} + +static int devlink_dpipe_send_and_alloc_skb(struct sk_buff **pskb, + struct genl_info *info) +{ + int err; + + if (*pskb) { + err = genlmsg_reply(*pskb, info); + if (err) + return err; + } + *pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!*pskb) + return -ENOMEM; + return 0; +} + +static int devlink_dpipe_tables_fill(struct genl_info *info, + enum devlink_command cmd, int flags, + struct list_head *dpipe_tables, + const char *table_name) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_dpipe_table *table; + struct nlattr *tables_attr; + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + bool incomplete; + void *hdr; + int i; + int err; + + table = list_first_entry(dpipe_tables, + struct devlink_dpipe_table, list); +start_again: + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + return err; + + hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, + &devlink_nl_family, NLM_F_MULTI, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(skb, devlink)) + goto nla_put_failure; + tables_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLES); + if (!tables_attr) + goto nla_put_failure; + + i = 0; + incomplete = false; + list_for_each_entry_from(table, dpipe_tables, list) { + if (!table_name) { + err = devlink_dpipe_table_put(skb, table); + if (err) { + if (!i) + goto err_table_put; + incomplete = true; + break; + } + } else { + if (!strcmp(table->name, table_name)) { + err = devlink_dpipe_table_put(skb, table); + if (err) + break; + } + } + i++; + } + + nla_nest_end(skb, tables_attr); + genlmsg_end(skb, hdr); + if (incomplete) + goto start_again; + +send_done: + nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + goto err_skb_send_alloc; + goto send_done; + } + + return genlmsg_reply(skb, info); + +nla_put_failure: + err = -EMSGSIZE; +err_table_put: +err_skb_send_alloc: + genlmsg_cancel(skb, hdr); + nlmsg_free(skb); + return err; +} + +static int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const char *table_name = NULL; + + if (info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]) + table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]); + + return devlink_dpipe_tables_fill(info, DEVLINK_CMD_DPIPE_TABLE_GET, 0, + &devlink->dpipe_table_list, + table_name); +} + +static int devlink_dpipe_value_put(struct sk_buff *skb, + struct devlink_dpipe_value *value) +{ + if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE, + value->value_size, value->value)) + return -EMSGSIZE; + if (value->mask) + if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE_MASK, + value->value_size, value->mask)) + return -EMSGSIZE; + if (value->mapping_valid) + if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_VALUE_MAPPING, + value->mapping_value)) + return -EMSGSIZE; + return 0; +} + +static int devlink_dpipe_action_value_put(struct sk_buff *skb, + struct devlink_dpipe_value *value) +{ + if (!value->action) + return -EINVAL; + if (devlink_dpipe_action_put(skb, value->action)) + return -EMSGSIZE; + if (devlink_dpipe_value_put(skb, value)) + return -EMSGSIZE; + return 0; +} + +static int devlink_dpipe_action_values_put(struct sk_buff *skb, + struct devlink_dpipe_value *values, + unsigned int values_count) +{ + struct nlattr *action_attr; + int i; + int err; + + for (i = 0; i < values_count; i++) { + action_attr = nla_nest_start(skb, + DEVLINK_ATTR_DPIPE_ACTION_VALUE); + if (!action_attr) + return -EMSGSIZE; + err = devlink_dpipe_action_value_put(skb, &values[i]); + if (err) + goto err_action_value_put; + nla_nest_end(skb, action_attr); + } + return 0; + +err_action_value_put: + nla_nest_cancel(skb, action_attr); + return err; +} + +static int devlink_dpipe_match_value_put(struct sk_buff *skb, + struct devlink_dpipe_value *value) +{ + if (!value->match) + return -EINVAL; + if (devlink_dpipe_match_put(skb, value->match)) + return -EMSGSIZE; + if (devlink_dpipe_value_put(skb, value)) + return -EMSGSIZE; + return 0; +} + +static int devlink_dpipe_match_values_put(struct sk_buff *skb, + struct devlink_dpipe_value *values, + unsigned int values_count) +{ + struct nlattr *match_attr; + int i; + int err; + + for (i = 0; i < values_count; i++) { + match_attr = nla_nest_start(skb, + DEVLINK_ATTR_DPIPE_MATCH_VALUE); + if (!match_attr) + return -EMSGSIZE; + err = devlink_dpipe_match_value_put(skb, &values[i]); + if (err) + goto err_match_value_put; + nla_nest_end(skb, match_attr); + } + return 0; + +err_match_value_put: + nla_nest_cancel(skb, match_attr); + return err; +} + +static int devlink_dpipe_entry_put(struct sk_buff *skb, + struct devlink_dpipe_entry *entry) +{ + struct nlattr *entry_attr, *matches_attr, *actions_attr; + int err; + + entry_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ENTRY); + if (!entry_attr) + return -EMSGSIZE; + + if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index, + DEVLINK_ATTR_PAD)) + goto nla_put_failure; + if (entry->counter_valid) + if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER, + entry->counter, DEVLINK_ATTR_PAD)) + goto nla_put_failure; + + matches_attr = nla_nest_start(skb, + DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES); + if (!matches_attr) + goto nla_put_failure; + + err = devlink_dpipe_match_values_put(skb, entry->match_values, + entry->match_values_count); + if (err) { + nla_nest_cancel(skb, matches_attr); + goto err_match_values_put; + } + nla_nest_end(skb, matches_attr); + + actions_attr = nla_nest_start(skb, + DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES); + if (!actions_attr) + goto nla_put_failure; + + err = devlink_dpipe_action_values_put(skb, entry->action_values, + entry->action_values_count); + if (err) { + nla_nest_cancel(skb, actions_attr); + goto err_action_values_put; + } + nla_nest_end(skb, actions_attr); + nla_nest_end(skb, entry_attr); return 0; + +nla_put_failure: + err = -EMSGSIZE; +err_match_values_put: +err_action_values_put: + nla_nest_cancel(skb, entry_attr); + return err; +} + +static struct devlink_dpipe_table * +devlink_dpipe_table_find(struct list_head *dpipe_tables, + const char *table_name) +{ + struct devlink_dpipe_table *table; + + list_for_each_entry_rcu(table, dpipe_tables, list) { + if (!strcmp(table->name, table_name)) + return table; + } + return NULL; +} + +int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct devlink *devlink; + int err; + + err = devlink_dpipe_send_and_alloc_skb(&dump_ctx->skb, + dump_ctx->info); + if (err) + return err; + + dump_ctx->hdr = genlmsg_put(dump_ctx->skb, + dump_ctx->info->snd_portid, + dump_ctx->info->snd_seq, + &devlink_nl_family, NLM_F_MULTI, + dump_ctx->cmd); + if (!dump_ctx->hdr) + goto nla_put_failure; + + devlink = dump_ctx->info->user_ptr[0]; + if (devlink_nl_put_handle(dump_ctx->skb, devlink)) + goto nla_put_failure; + dump_ctx->nest = nla_nest_start(dump_ctx->skb, + DEVLINK_ATTR_DPIPE_ENTRIES); + if (!dump_ctx->nest) + goto nla_put_failure; + return 0; + +nla_put_failure: + genlmsg_cancel(dump_ctx->skb, dump_ctx->hdr); + nlmsg_free(dump_ctx->skb); + return -EMSGSIZE; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_prepare); + +int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx, + struct devlink_dpipe_entry *entry) +{ + return devlink_dpipe_entry_put(dump_ctx->skb, entry); +} +EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_append); + +int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx) +{ + nla_nest_end(dump_ctx->skb, dump_ctx->nest); + genlmsg_end(dump_ctx->skb, dump_ctx->hdr); + return 0; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close); + +static int devlink_dpipe_entries_fill(struct genl_info *info, + enum devlink_command cmd, int flags, + struct devlink_dpipe_table *table) +{ + struct devlink_dpipe_dump_ctx dump_ctx; + struct nlmsghdr *nlh; + int err; + + dump_ctx.skb = NULL; + dump_ctx.cmd = cmd; + dump_ctx.info = info; + + err = table->table_ops->entries_dump(table->priv, + table->counters_enabled, + &dump_ctx); + if (err) + goto err_entries_dump; + +send_done: + nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info); + if (err) + goto err_skb_send_alloc; + goto send_done; + } + return genlmsg_reply(dump_ctx.skb, info); + +err_entries_dump: +err_skb_send_alloc: + genlmsg_cancel(dump_ctx.skb, dump_ctx.hdr); + nlmsg_free(dump_ctx.skb); + return err; +} + +static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_dpipe_table *table; + const char *table_name; + + if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]) + return -EINVAL; + + table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]); + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name); + if (!table) + return -EINVAL; + + if (!table->table_ops->entries_dump) + return -EINVAL; + + return devlink_dpipe_entries_fill(info, DEVLINK_CMD_DPIPE_ENTRIES_GET, + 0, table); +} + +static int devlink_dpipe_fields_put(struct sk_buff *skb, + const struct devlink_dpipe_header *header) +{ + struct devlink_dpipe_field *field; + struct nlattr *field_attr; + int i; + + for (i = 0; i < header->fields_count; i++) { + field = &header->fields[i]; + field_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_FIELD); + if (!field_attr) + return -EMSGSIZE; + if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, field->bitwidth) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, field->mapping_type)) + goto nla_put_failure; + nla_nest_end(skb, field_attr); + } + return 0; + +nla_put_failure: + nla_nest_cancel(skb, field_attr); + return -EMSGSIZE; +} + +static int devlink_dpipe_header_put(struct sk_buff *skb, + struct devlink_dpipe_header *header) +{ + struct nlattr *fields_attr, *header_attr; + int err; + + header_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER); + if (!header) + return -EMSGSIZE; + + if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_HEADER_NAME, header->name) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) || + nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global)) + goto nla_put_failure; + + fields_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER_FIELDS); + if (!fields_attr) + goto nla_put_failure; + + err = devlink_dpipe_fields_put(skb, header); + if (err) { + nla_nest_cancel(skb, fields_attr); + goto nla_put_failure; + } + nla_nest_end(skb, fields_attr); + nla_nest_end(skb, header_attr); + return 0; + +nla_put_failure: + err = -EMSGSIZE; + nla_nest_cancel(skb, header_attr); + return err; +} + +static int devlink_dpipe_headers_fill(struct genl_info *info, + enum devlink_command cmd, int flags, + struct devlink_dpipe_headers * + dpipe_headers) +{ + struct devlink *devlink = info->user_ptr[0]; + struct nlattr *headers_attr; + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + void *hdr; + int i, j; + int err; + + i = 0; +start_again: + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + return err; + + hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, + &devlink_nl_family, NLM_F_MULTI, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(skb, devlink)) + goto nla_put_failure; + headers_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADERS); + if (!headers_attr) + goto nla_put_failure; + + j = 0; + for (; i < dpipe_headers->headers_count; i++) { + err = devlink_dpipe_header_put(skb, dpipe_headers->headers[i]); + if (err) { + if (!j) + goto err_table_put; + break; + } + j++; + } + nla_nest_end(skb, headers_attr); + genlmsg_end(skb, hdr); + if (i != dpipe_headers->headers_count) + goto start_again; + +send_done: + nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + goto err_skb_send_alloc; + goto send_done; + } + return genlmsg_reply(skb, info); + +nla_put_failure: + err = -EMSGSIZE; +err_table_put: +err_skb_send_alloc: + genlmsg_cancel(skb, hdr); + nlmsg_free(skb); + return err; +} + +static int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + + if (!devlink->dpipe_headers) + return -EOPNOTSUPP; + return devlink_dpipe_headers_fill(info, DEVLINK_CMD_DPIPE_HEADERS_GET, + 0, devlink->dpipe_headers); +} + +static int devlink_dpipe_table_counters_set(struct devlink *devlink, + const char *table_name, + bool enable) +{ + struct devlink_dpipe_table *table; + + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name); + if (!table) + return -EINVAL; + + if (table->counter_control_extern) + return -EOPNOTSUPP; + + if (!(table->counters_enabled ^ enable)) + return 0; + + table->counters_enabled = enable; + if (table->table_ops->counters_set_update) + table->table_ops->counters_set_update(table->priv, enable); + return 0; +} + +static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const char *table_name; + bool counters_enable; + + if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME] || + !info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]) + return -EINVAL; + + table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]); + counters_enable = !!nla_get_u8(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]); + + return devlink_dpipe_table_counters_set(devlink, table_name, + counters_enable); } static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { @@ -1512,6 +2190,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 }, [DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 }, [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -1644,6 +2324,34 @@ static const struct genl_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, + { + .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, + .doit = devlink_nl_cmd_dpipe_table_get, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, + .doit = devlink_nl_cmd_dpipe_entries_get, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, + .doit = devlink_nl_cmd_dpipe_headers_get, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, + .doit = devlink_nl_cmd_dpipe_table_counters_set, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { @@ -1680,6 +2388,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) devlink_net_set(devlink, &init_net); INIT_LIST_HEAD(&devlink->port_list); INIT_LIST_HEAD(&devlink->sb_list); + INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list); return devlink; } EXPORT_SYMBOL_GPL(devlink_alloc); @@ -1880,6 +2589,133 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index) } EXPORT_SYMBOL_GPL(devlink_sb_unregister); +/** + * devlink_dpipe_headers_register - register dpipe headers + * + * @devlink: devlink + * @dpipe_headers: dpipe header array + * + * Register the headers supported by hardware. + */ +int devlink_dpipe_headers_register(struct devlink *devlink, + struct devlink_dpipe_headers *dpipe_headers) +{ + mutex_lock(&devlink_mutex); + devlink->dpipe_headers = dpipe_headers; + mutex_unlock(&devlink_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register); + +/** + * devlink_dpipe_headers_unregister - unregister dpipe headers + * + * @devlink: devlink + * + * Unregister the headers supported by hardware. + */ +void devlink_dpipe_headers_unregister(struct devlink *devlink) +{ + mutex_lock(&devlink_mutex); + devlink->dpipe_headers = NULL; + mutex_unlock(&devlink_mutex); +} +EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister); + +/** + * devlink_dpipe_table_counter_enabled - check if counter allocation + * required + * @devlink: devlink + * @table_name: tables name + * + * Used by driver to check if counter allocation is required. + * After counter allocation is turned on the table entries + * are updated to include counter statistics. + * + * After that point on the driver must respect the counter + * state so that each entry added to the table is added + * with a counter. + */ +bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, + const char *table_name) +{ + struct devlink_dpipe_table *table; + bool enabled; + + rcu_read_lock(); + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name); + enabled = false; + if (table) + enabled = table->counters_enabled; + rcu_read_unlock(); + return enabled; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled); + +/** + * devlink_dpipe_table_register - register dpipe table + * + * @devlink: devlink + * @table_name: table name + * @table_ops: table ops + * @priv: priv + * @size: size + * @counter_control_extern: external control for counters + */ +int devlink_dpipe_table_register(struct devlink *devlink, + const char *table_name, + struct devlink_dpipe_table_ops *table_ops, + void *priv, u64 size, + bool counter_control_extern) +{ + struct devlink_dpipe_table *table; + + if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name)) + return -EEXIST; + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + table->name = table_name; + table->table_ops = table_ops; + table->priv = priv; + table->size = size; + table->counter_control_extern = counter_control_extern; + + mutex_lock(&devlink_mutex); + list_add_tail_rcu(&table->list, &devlink->dpipe_table_list); + mutex_unlock(&devlink_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_table_register); + +/** + * devlink_dpipe_table_unregister - unregister dpipe table + * + * @devlink: devlink + * @table_name: table name + */ +void devlink_dpipe_table_unregister(struct devlink *devlink, + const char *table_name) +{ + struct devlink_dpipe_table *table; + + mutex_lock(&devlink_mutex); + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name); + if (!table) + goto unlock; + list_del_rcu(&table->list); + mutex_unlock(&devlink_mutex); + kfree_rcu(table, rcu); + return; +unlock: + mutex_unlock(&devlink_mutex); +} +EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister); + static int __init devlink_module_init(void) { return genl_register_family(&devlink_nl_family); -- cgit v1.2.3-71-gd317 From 983701eb0676db96c604db8a3f7ae936a7029ff5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 28 Mar 2017 14:28:01 -0700 Subject: rtnetlink: Add RTM_DELNETCONF Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 2 ++ security/selinux/nlmsgtab.c | 1 + 2 files changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 3dd72aee4d32..cce061382e40 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -122,6 +122,8 @@ enum { RTM_NEWNETCONF = 80, #define RTM_NEWNETCONF RTM_NEWNETCONF + RTM_DELNETCONF, +#define RTM_DELNETCONF RTM_DELNETCONF RTM_GETNETCONF = 82, #define RTM_GETNETCONF RTM_GETNETCONF diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 2ca9cde939d4..8e67bb4c9cab 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -69,6 +69,7 @@ static struct nlmsg_perm nlmsg_route_perms[] = { RTM_GETDCB, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_SETDCB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_NEWNETCONF, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, + { RTM_DELNETCONF, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETNETCONF, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWMDB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELMDB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, -- cgit v1.2.3-71-gd317 From 9ad59fea162c139f62335f0ca0ce1fdf4f82bd91 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 2 Mar 2017 16:05:45 +0100 Subject: drm/etnaviv: submit support for in-fences Loosely based on commit f0a42bb5423a ("drm/msm: submit support for in-fences"). Unfortunately, struct drm_etnaviv_gem_submit doesn't have a flags field yet, so we have to extend the structure and trust that drm_ioctl will clear the flags for us if an older userspace only submits part of the struct. Signed-off-by: Philipp Zabel Reviewed-by: Gustavo Padovan Reviewed-by: Sumit Semwal Reviewed-by: Lucas Stach Signed-off-by: Lucas Stach --- drivers/gpu/drm/etnaviv/Kconfig | 1 + drivers/gpu/drm/etnaviv/etnaviv_gem.h | 1 + drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 34 +++++++++++++++++++++++++++- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 5 +++- drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 2 +- include/uapi/drm/etnaviv_drm.h | 6 +++++ 6 files changed, 46 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/etnaviv/Kconfig b/drivers/gpu/drm/etnaviv/Kconfig index cc1731c5289c..71cee4e9fefb 100644 --- a/drivers/gpu/drm/etnaviv/Kconfig +++ b/drivers/gpu/drm/etnaviv/Kconfig @@ -5,6 +5,7 @@ config DRM_ETNAVIV depends on ARCH_MXC || ARCH_DOVE || (ARM && COMPILE_TEST) depends on MMU select SHMEM + select SYNC_FILE select TMPFS select IOMMU_API select IOMMU_SUPPORT diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h index e63ff116a3b3..120410d67eb5 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h @@ -107,6 +107,7 @@ struct etnaviv_gem_submit { u32 fence; unsigned int nr_bos; struct etnaviv_gem_submit_bo bos[0]; + u32 flags; }; int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 726090d7a6ac..fb8d5befbf4f 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -14,7 +14,9 @@ * this program. If not, see . */ +#include #include +#include #include "etnaviv_cmdbuf.h" #include "etnaviv_drv.h" #include "etnaviv_gpu.h" @@ -169,8 +171,10 @@ static int submit_fence_sync(const struct etnaviv_gem_submit *submit) for (i = 0; i < submit->nr_bos; i++) { struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj; bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE; + bool explicit = !(submit->flags & ETNA_SUBMIT_NO_IMPLICIT); - ret = etnaviv_gpu_fence_sync_obj(etnaviv_obj, context, write); + ret = etnaviv_gpu_fence_sync_obj(etnaviv_obj, context, write, + explicit); if (ret) break; } @@ -303,6 +307,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, struct etnaviv_gem_submit *submit; struct etnaviv_cmdbuf *cmdbuf; struct etnaviv_gpu *gpu; + struct dma_fence *in_fence = NULL; void *stream; int ret; @@ -326,6 +331,11 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, return -EINVAL; } + if (args->flags & ~ETNA_SUBMIT_FLAGS) { + DRM_ERROR("invalid flags: 0x%x\n", args->flags); + return -EINVAL; + } + /* * Copy the command submission and bo array to kernel space in * one go, and do this outside of any locks. @@ -371,6 +381,8 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, goto err_submit_cmds; } + submit->flags = args->flags; + ret = submit_lookup_objects(submit, file, bos, args->nr_bos); if (ret) goto err_submit_objects; @@ -385,6 +397,24 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, goto err_submit_objects; } + if (args->flags & ETNA_SUBMIT_FENCE_FD_IN) { + in_fence = sync_file_get_fence(args->fence_fd); + if (!in_fence) { + ret = -EINVAL; + goto err_submit_objects; + } + + /* + * Wait if the fence is from a foreign context, or if the fence + * array contains any fence from a foreign context. + */ + if (!dma_fence_match_context(in_fence, gpu->fence_context)) { + ret = dma_fence_wait(in_fence, true); + if (ret) + goto err_submit_objects; + } + } + ret = submit_fence_sync(submit); if (ret) goto err_submit_objects; @@ -419,6 +449,8 @@ out: flush_workqueue(priv->wq); err_submit_objects: + if (in_fence) + dma_fence_put(in_fence); submit_cleanup(submit); err_submit_cmds: diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index cab4cf546c17..68a4f59e4c22 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1077,7 +1077,7 @@ static struct dma_fence *etnaviv_gpu_fence_alloc(struct etnaviv_gpu *gpu) } int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj, - unsigned int context, bool exclusive) + unsigned int context, bool exclusive, bool explicit) { struct reservation_object *robj = etnaviv_obj->resv; struct reservation_object_list *fobj; @@ -1090,6 +1090,9 @@ int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj, return ret; } + if (explicit) + return 0; + /* * If we have any shared fences, then the exclusive fence * should be ignored as it will already have been signalled. diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 6a1e68eec24c..9227a9740447 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -183,7 +183,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m); #endif int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj, - unsigned int context, bool exclusive); + unsigned int context, bool exclusive, bool implicit); void etnaviv_gpu_retire(struct etnaviv_gpu *gpu); int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu, diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h index 2584c1cca42f..e9c388a1d8eb 100644 --- a/include/uapi/drm/etnaviv_drm.h +++ b/include/uapi/drm/etnaviv_drm.h @@ -154,6 +154,10 @@ struct drm_etnaviv_gem_submit_bo { * one or more cmdstream buffers. This allows for conditional execution * (context-restore), and IB buffers needed for per tile/bin draw cmds. */ +#define ETNA_SUBMIT_NO_IMPLICIT 0x0001 +#define ETNA_SUBMIT_FENCE_FD_IN 0x0002 +#define ETNA_SUBMIT_FLAGS (ETNA_SUBMIT_NO_IMPLICIT | \ + ETNA_SUBMIT_FENCE_FD_IN) #define ETNA_PIPE_3D 0x00 #define ETNA_PIPE_2D 0x01 #define ETNA_PIPE_VG 0x02 @@ -167,6 +171,8 @@ struct drm_etnaviv_gem_submit { __u64 bos; /* in, ptr to array of submit_bo's */ __u64 relocs; /* in, ptr to array of submit_reloc's */ __u64 stream; /* in, ptr to cmdstream */ + __u32 flags; /* in, mask of ETNA_SUBMIT_x */ + __s32 fence_fd; /* in, fence fd (see ETNA_SUBMIT_FENCE_FD_IN) */ }; /* The normal way to synchronize with the GPU is just to CPU_PREP on -- cgit v1.2.3-71-gd317 From 78ec187f64fa5d8f837b8fc5bbbad88a89b63ab4 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 2 Mar 2017 16:14:43 +0100 Subject: drm/etnaviv: submit support for out-fences Based on commit 4cd0945901a6 ("drm/msm: submit support for out-fences"). We increment the minor driver version so userspace can detect explicit fence support. Signed-off-by: Philipp Zabel Signed-off-by: Lucas Stach --- v3: Changed to work with fence returned from GPU submit. --- drivers/gpu/drm/etnaviv/etnaviv_drv.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 28 ++++++++++++++++++++++++++++ include/uapi/drm/etnaviv_drm.h | 6 ++++-- 3 files changed, 33 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 289a9f8c6671..5255278dde56 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -512,7 +512,7 @@ static struct drm_driver etnaviv_drm_driver = { .desc = "etnaviv DRM", .date = "20151214", .major = 1, - .minor = 0, + .minor = 1, }; /* diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 1b6f9b843815..e1909429837e 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -309,6 +309,8 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, struct etnaviv_cmdbuf *cmdbuf; struct etnaviv_gpu *gpu; struct dma_fence *in_fence = NULL; + struct sync_file *sync_file = NULL; + int out_fence_fd = -1; void *stream; int ret; @@ -376,6 +378,14 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, goto err_submit_cmds; } + if (args->flags & ETNA_SUBMIT_FENCE_FD_OUT) { + out_fence_fd = get_unused_fd_flags(O_CLOEXEC); + if (out_fence_fd < 0) { + ret = out_fence_fd; + goto err_submit_cmds; + } + } + submit = submit_create(dev, gpu, args->nr_bos); if (!submit) { ret = -ENOMEM; @@ -436,6 +446,22 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, if (ret == 0) cmdbuf = NULL; + if (args->flags & ETNA_SUBMIT_FENCE_FD_OUT) { + /* + * This can be improved: ideally we want to allocate the sync + * file before kicking off the GPU job and just attach the + * fence to the sync file here, eliminating the ENOMEM + * possibility at this stage. + */ + sync_file = sync_file_create(submit->fence); + if (!sync_file) { + ret = -ENOMEM; + goto out; + } + fd_install(out_fence_fd, sync_file->file); + } + + args->fence_fd = out_fence_fd; args->fence = submit->fence->seqno; out: @@ -455,6 +481,8 @@ err_submit_objects: submit_cleanup(submit); err_submit_cmds: + if (ret && (out_fence_fd >= 0)) + put_unused_fd(out_fence_fd); /* if we still own the cmdbuf */ if (cmdbuf) etnaviv_cmdbuf_free(cmdbuf); diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h index e9c388a1d8eb..76f6f78a352b 100644 --- a/include/uapi/drm/etnaviv_drm.h +++ b/include/uapi/drm/etnaviv_drm.h @@ -156,8 +156,10 @@ struct drm_etnaviv_gem_submit_bo { */ #define ETNA_SUBMIT_NO_IMPLICIT 0x0001 #define ETNA_SUBMIT_FENCE_FD_IN 0x0002 +#define ETNA_SUBMIT_FENCE_FD_OUT 0x0004 #define ETNA_SUBMIT_FLAGS (ETNA_SUBMIT_NO_IMPLICIT | \ - ETNA_SUBMIT_FENCE_FD_IN) + ETNA_SUBMIT_FENCE_FD_IN | \ + ETNA_SUBMIT_FENCE_FD_OUT) #define ETNA_PIPE_3D 0x00 #define ETNA_PIPE_2D 0x01 #define ETNA_PIPE_VG 0x02 @@ -172,7 +174,7 @@ struct drm_etnaviv_gem_submit { __u64 relocs; /* in, ptr to array of submit_reloc's */ __u64 stream; /* in, ptr to cmdstream */ __u32 flags; /* in, mask of ETNA_SUBMIT_x */ - __s32 fence_fd; /* in, fence fd (see ETNA_SUBMIT_FENCE_FD_IN) */ + __s32 fence_fd; /* in/out, fence fd (see ETNA_SUBMIT_FENCE_FD_x) */ }; /* The normal way to synchronize with the GPU is just to CPU_PREP on -- cgit v1.2.3-71-gd317 From b85891bd6d1bf887b3398f4c44b7a30b37f4485e Mon Sep 17 00:00:00 2001 From: Junwei Zhang Date: Mon, 16 Jan 2017 13:59:01 +0800 Subject: drm/amdgpu: IOCTL interface for PRT support v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Till GFX8 we can only enable PRT support globally, but with the next hardware generation we can do this on a per page basis. Keep the interface consistent by adding PRT mappings and enable support globally on current hardware when the first mapping is made. v2: disable PRT support delayed and on all error paths v3: PRT and other permissions are mutal exclusive, PRT mappings don't need a BO. v4: update PRT mappings durign CS as well, make va_flags 64bit Signed-off-by: Junwei Zhang Signed-off-by: Christian König Reviewed-by: Nicolai Hähnle Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 16 +++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 62 ++++++++++++++++++++------------- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 10 ++++++ include/uapi/drm/amdgpu_drm.h | 2 ++ 5 files changed, 64 insertions(+), 27 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 618f12884eed..b9212537b17d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -701,6 +701,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); struct amdgpu_fpriv { struct amdgpu_vm vm; + struct amdgpu_bo_va *prt_va; struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 99424cb8020b..89dcb07ab213 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -759,10 +759,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo amdgpu_bo_unref(&parser->uf_entry.robj); } -static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, - struct amdgpu_vm *vm) +static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) { struct amdgpu_device *adev = p->adev; + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va *bo_va; struct amdgpu_bo *bo; int i, r; @@ -779,6 +780,15 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, if (r) return r; + r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); + if (r) + return r; + + r = amdgpu_sync_fence(adev, &p->job->sync, + fpriv->prt_va->last_pt_update); + if (r) + return r; + if (amdgpu_sriov_vf(adev)) { struct dma_fence *f; bo_va = vm->csa_bo_va; @@ -855,7 +865,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, if (p->job->vm) { p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); - r = amdgpu_bo_vm_update_pte(p, vm); + r = amdgpu_bo_vm_update_pte(p); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 106cf83c2e6b..3c22656aa1bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -553,6 +553,12 @@ error: int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { + const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE | + AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | + AMDGPU_VM_PAGE_EXECUTABLE; + const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE | + AMDGPU_VM_PAGE_PRT; + struct drm_amdgpu_gem_va *args = data; struct drm_gem_object *gobj; struct amdgpu_device *adev = dev->dev_private; @@ -563,7 +569,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct ttm_validate_buffer tv; struct ww_acquire_ctx ticket; struct list_head list; - uint32_t invalid_flags, va_flags = 0; + uint64_t va_flags = 0; int r = 0; if (!adev->vm_manager.enabled) @@ -577,11 +583,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - invalid_flags = ~(AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_READABLE | - AMDGPU_VM_PAGE_WRITEABLE | AMDGPU_VM_PAGE_EXECUTABLE); - if ((args->flags & invalid_flags)) { - dev_err(&dev->pdev->dev, "invalid flags 0x%08X vs 0x%08X\n", - args->flags, invalid_flags); + if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) { + dev_err(&dev->pdev->dev, "invalid flags combination 0x%08X\n", + args->flags); return -EINVAL; } @@ -595,28 +599,34 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - gobj = drm_gem_object_lookup(filp, args->handle); - if (gobj == NULL) - return -ENOENT; - abo = gem_to_amdgpu_bo(gobj); INIT_LIST_HEAD(&list); - tv.bo = &abo->tbo; - tv.shared = false; - list_add(&tv.head, &list); + if (!(args->flags & AMDGPU_VM_PAGE_PRT)) { + gobj = drm_gem_object_lookup(filp, args->handle); + if (gobj == NULL) + return -ENOENT; + abo = gem_to_amdgpu_bo(gobj); + tv.bo = &abo->tbo; + tv.shared = false; + list_add(&tv.head, &list); + } else { + gobj = NULL; + abo = NULL; + } amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd); r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); - if (r) { - drm_gem_object_unreference_unlocked(gobj); - return r; - } + if (r) + goto error_unref; - bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo); - if (!bo_va) { - ttm_eu_backoff_reservation(&ticket, &list); - drm_gem_object_unreference_unlocked(gobj); - return -ENOENT; + if (abo) { + bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo); + if (!bo_va) { + r = -ENOENT; + goto error_backoff; + } + } else { + bo_va = fpriv->prt_va; } switch (args->operation) { @@ -627,6 +637,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, va_flags |= AMDGPU_PTE_WRITEABLE; if (args->flags & AMDGPU_VM_PAGE_EXECUTABLE) va_flags |= AMDGPU_PTE_EXECUTABLE; + if (args->flags & AMDGPU_VM_PAGE_PRT) + va_flags |= AMDGPU_PTE_PRT; r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, va_flags); @@ -637,11 +649,13 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, default: break; } - if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && - !amdgpu_vm_debug) + if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug) amdgpu_gem_va_update_vm(adev, bo_va, &list, args->operation); + +error_backoff: ttm_eu_backoff_reservation(&ticket, &list); +error_unref: drm_gem_object_unreference_unlocked(gobj); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 61d94c745672..49f93ee019e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -655,6 +655,14 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto out_suspend; } + fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL); + if (!fpriv->prt_va) { + r = -ENOMEM; + amdgpu_vm_fini(adev, &fpriv->vm); + kfree(fpriv); + goto out_suspend; + } + if (amdgpu_sriov_vf(adev)) { r = amdgpu_map_static_csa(adev, &fpriv->vm); if (r) @@ -699,6 +707,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_uvd_free_handles(adev, file_priv); amdgpu_vce_free_handles(adev, file_priv); + amdgpu_vm_bo_rmv(adev, fpriv->prt_va); + if (amdgpu_sriov_vf(adev)) { /* TODO: how to handle reserve failure */ BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, false)); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 5797283c2d79..1c0ddf71193e 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -361,6 +361,8 @@ struct drm_amdgpu_gem_op { #define AMDGPU_VM_PAGE_WRITEABLE (1 << 2) /* executable mapping, new for VI */ #define AMDGPU_VM_PAGE_EXECUTABLE (1 << 3) +/* partially resident texture */ +#define AMDGPU_VM_PAGE_PRT (1 << 4) struct drm_amdgpu_gem_va { /** GEM object handle */ -- cgit v1.2.3-71-gd317 From 5ebbac4b5c9159130046bf7c56b7f4c71ca7d1b7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Mar 2017 18:25:15 -0500 Subject: drm/amdgpu: expose GPU sensor related information This includes shader/memory clocks, temperature, GPU load, etc. v2: - add sub-queries for AMDPGU_INFO_GPU_SENSOR_* - do not break the ABI v3: - return -ENOENT when amdgpu_dpm == 0 - expose more sensor queries v4: - s/GPU_POWER/GPU_AVG_POWER/ - improve VDDNB/VDDGFX query description - fix amdgpu_dpm check v5: - agd: fix warning v6: - agd: bump version Signed-off-by: Samuel Pitoiset Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 75 +++++++++++++++++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 20 +++++++++ 3 files changed, 97 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b76cd699eb0d..6d5d0a74ad2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -60,9 +60,10 @@ * - 3.8.0 - Add support raster config init in the kernel * - 3.9.0 - Add support for memory query info about VRAM and GTT. * - 3.10.0 - Add support for new fences ioctl, new gem ioctl flags + * - 3.11.0 - Add support for sensor query info (clocks, temp, etc). */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 10 +#define KMS_DRIVER_MINOR 11 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 49f93ee019e3..027692bf8457 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -240,6 +240,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file uint32_t ui32 = 0; uint64_t ui64 = 0; int i, found; + int ui32_size = sizeof(ui32); if (!info->return_size || !info->return_pointer) return -EINVAL; @@ -596,6 +597,80 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file return -EINVAL; } } + case AMDGPU_INFO_SENSOR: { + struct pp_gpu_power query = {0}; + int query_size = sizeof(query); + + if (amdgpu_dpm == 0) + return -ENOENT; + + switch (info->sensor_info.type) { + case AMDGPU_INFO_SENSOR_GFX_SCLK: + /* get sclk in Mhz */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_GFX_SCLK, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + ui32 /= 100; + break; + case AMDGPU_INFO_SENSOR_GFX_MCLK: + /* get mclk in Mhz */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_GFX_MCLK, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + ui32 /= 100; + break; + case AMDGPU_INFO_SENSOR_GPU_TEMP: + /* get temperature in millidegrees C */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_GPU_TEMP, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + break; + case AMDGPU_INFO_SENSOR_GPU_LOAD: + /* get GPU load */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_GPU_LOAD, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + break; + case AMDGPU_INFO_SENSOR_GPU_AVG_POWER: + /* get average GPU power */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_GPU_POWER, + (void *)&query, &query_size)) { + return -EINVAL; + } + ui32 = query.average_gpu_power >> 8; + break; + case AMDGPU_INFO_SENSOR_VDDNB: + /* get VDDNB in millivolts */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_VDDNB, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + break; + case AMDGPU_INFO_SENSOR_VDDGFX: + /* get VDDGFX in millivolts */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_VDDGFX, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + break; + default: + DRM_DEBUG_KMS("Invalid request %d\n", + info->sensor_info.type); + return -EINVAL; + } + return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; + } default: DRM_DEBUG_KMS("Invalid request %d\n", info->query); return -EINVAL; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 1c0ddf71193e..a30fe693175f 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -532,6 +532,22 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_VBIOS_IMAGE 0x2 /* Query UVD handles */ #define AMDGPU_INFO_NUM_HANDLES 0x1C +/* Query sensor related information */ +#define AMDGPU_INFO_SENSOR 0x1D + /* Subquery id: Query GPU shader clock */ + #define AMDGPU_INFO_SENSOR_GFX_SCLK 0x1 + /* Subquery id: Query GPU memory clock */ + #define AMDGPU_INFO_SENSOR_GFX_MCLK 0x2 + /* Subquery id: Query GPU temperature */ + #define AMDGPU_INFO_SENSOR_GPU_TEMP 0x3 + /* Subquery id: Query GPU load */ + #define AMDGPU_INFO_SENSOR_GPU_LOAD 0x4 + /* Subquery id: Query average GPU power */ + #define AMDGPU_INFO_SENSOR_GPU_AVG_POWER 0x5 + /* Subquery id: Query northbridge voltage */ + #define AMDGPU_INFO_SENSOR_VDDNB 0x6 + /* Subquery id: Query graphics voltage */ + #define AMDGPU_INFO_SENSOR_VDDGFX 0x7 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff @@ -595,6 +611,10 @@ struct drm_amdgpu_info { __u32 type; __u32 offset; } vbios_info; + + struct { + __u32 type; + } sensor_info; }; }; -- cgit v1.2.3-71-gd317 From df6e2c4aeb263f9b9b904c1a087411ddf25c5e94 Mon Sep 17 00:00:00 2001 From: Junwei Zhang Date: Fri, 17 Feb 2017 11:05:49 +0800 Subject: drm/amdgpu: export gfx config double offchip LDS buffers (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: move the config struct to drm_amdgpu_info_device v3: move the config feature to amdgpu_gca_config Signed-off-by: Junwei Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 14 ++++++++++++++ include/uapi/drm/amdgpu_drm.h | 2 ++ 6 files changed, 33 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 80ab8516db7c..ef72f52a6a20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -845,6 +845,9 @@ struct amdgpu_gca_config { uint32_t macrotile_mode_array[16]; struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE]; + + /* gfx configure feature */ + uint32_t double_offchip_lds_buf; }; struct amdgpu_cu_info { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 027692bf8457..096386515f2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -528,6 +528,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.vram_type = adev->mc.vram_type; dev_info.vram_bit_width = adev->mc.vram_width; dev_info.vce_harvest_config = adev->vce.harvest_config; + dev_info.gc_double_offchip_lds_buf = + adev->gfx.config.double_offchip_lds_buf; return copy_to_user(out, &dev_info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 2086e7e68de4..e78433799a6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1579,6 +1579,11 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } +static void gfx_v6_0_config_init(struct amdgpu_device *adev) +{ + adev->gfx.config.double_offchip_lds_buf = 1; +} + static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) { u32 gb_addr_config = 0; @@ -1736,6 +1741,7 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) gfx_v6_0_setup_spi(adev); gfx_v6_0_get_cu_info(adev); + gfx_v6_0_config_init(adev); WREG32(mmCP_QUEUE_THRESHOLDS, ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) | (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT))); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 1f9354541f29..286d6763afa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1876,6 +1876,11 @@ static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); } +static void gfx_v7_0_config_init(struct amdgpu_device *adev) +{ + adev->gfx.config.double_offchip_lds_buf = 1; +} + /** * gfx_v7_0_gpu_init - setup the 3D engine * @@ -1899,6 +1904,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) gfx_v7_0_setup_rb(adev); gfx_v7_0_get_cu_info(adev); + gfx_v7_0_config_init(adev); /* set HW defaults for 3D engine */ WREG32(mmCP_MEQ_THRESHOLDS, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index c2a325185753..5682d945e588 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3846,6 +3846,19 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); } +static void gfx_v8_0_config_init(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + default: + adev->gfx.config.double_offchip_lds_buf = 1; + break; + case CHIP_CARRIZO: + case CHIP_STONEY: + adev->gfx.config.double_offchip_lds_buf = 0; + break; + } +} + static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) { u32 tmp; @@ -3859,6 +3872,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) gfx_v8_0_tiling_mode_table_init(adev); gfx_v8_0_setup_rb(adev); gfx_v8_0_get_cu_info(adev); + gfx_v8_0_config_init(adev); /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index a30fe693175f..732c662fad79 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -726,6 +726,8 @@ struct drm_amdgpu_info_device { __u32 vram_bit_width; /* vce harvesting instance */ __u32 vce_harvest_config; + /* gfx double offchip LDS buffers */ + __u32 gc_double_offchip_lds_buf; }; struct drm_amdgpu_info_hw_ip { -- cgit v1.2.3-71-gd317 From a50798b6c724354b80d7c5e3de2ceaf8441a831e Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Tue, 10 Jan 2017 11:49:08 -0500 Subject: uapi/drm: add AMDGPU_HW_IP_UVD_ENC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Christian König Signed-off-by: Leo Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 732c662fad79..2c30e324cb12 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -385,7 +385,8 @@ struct drm_amdgpu_gem_va { #define AMDGPU_HW_IP_DMA 2 #define AMDGPU_HW_IP_UVD 3 #define AMDGPU_HW_IP_VCE 4 -#define AMDGPU_HW_IP_NUM 5 +#define AMDGPU_HW_IP_UVD_ENC 5 +#define AMDGPU_HW_IP_NUM 6 #define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1 -- cgit v1.2.3-71-gd317 From dc54d3d1744d23ed0b345fd8bc1c493b74e8df44 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 13 Mar 2017 10:13:38 +0100 Subject: drm/amdgpu: implement AMDGPU_VA_OP_CLEAR v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A new VM operation to remove all mappings in a range. v2: limit unmapped area as noted by Jerry Signed-off-by: Christian König Reviewed-by: Junwei Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 27 ++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 99 +++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 + include/uapi/drm/amdgpu_drm.h | 1 + 5 files changed, 124 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 0240f108f90e..b311b389bd5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -507,14 +507,16 @@ static int amdgpu_gem_va_check(void *param, struct amdgpu_bo *bo) * amdgpu_gem_va_update_vm -update the bo_va in its VM * * @adev: amdgpu_device pointer + * @vm: vm to update * @bo_va: bo_va to update * @list: validation list - * @operation: map or unmap + * @operation: map, unmap or clear * * Update the bo_va directly after setting its address. Errors are not * vital here, so they are not reported back to userspace. */ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, + struct amdgpu_vm *vm, struct amdgpu_bo_va *bo_va, struct list_head *list, uint32_t operation) @@ -529,16 +531,16 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, goto error; } - r = amdgpu_vm_validate_pt_bos(adev, bo_va->vm, amdgpu_gem_va_check, + r = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_va_check, NULL); if (r) goto error; - r = amdgpu_vm_update_page_directory(adev, bo_va->vm); + r = amdgpu_vm_update_page_directory(adev, vm); if (r) goto error; - r = amdgpu_vm_clear_freed(adev, bo_va->vm); + r = amdgpu_vm_clear_freed(adev, vm); if (r) goto error; @@ -592,6 +594,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, switch (args->operation) { case AMDGPU_VA_OP_MAP: case AMDGPU_VA_OP_UNMAP: + case AMDGPU_VA_OP_CLEAR: break; default: dev_err(&dev->pdev->dev, "unsupported operation %d\n", @@ -600,7 +603,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, } INIT_LIST_HEAD(&list); - if (!(args->flags & AMDGPU_VM_PAGE_PRT)) { + if ((args->operation != AMDGPU_VA_OP_CLEAR) && + !(args->flags & AMDGPU_VM_PAGE_PRT)) { gobj = drm_gem_object_lookup(filp, args->handle); if (gobj == NULL) return -ENOENT; @@ -625,8 +629,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, r = -ENOENT; goto error_backoff; } - } else { + } else if (args->operation != AMDGPU_VA_OP_CLEAR) { bo_va = fpriv->prt_va; + } else { + bo_va = NULL; } switch (args->operation) { @@ -644,11 +650,18 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, case AMDGPU_VA_OP_UNMAP: r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address); break; + + case AMDGPU_VA_OP_CLEAR: + r = amdgpu_vm_bo_clear_mappings(adev, &fpriv->vm, + args->va_address, + args->map_size); + break; default: break; } if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug) - amdgpu_gem_va_update_vm(adev, bo_va, &list, args->operation); + amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, &list, + args->operation); error_backoff: ttm_eu_backoff_reservation(&ticket, &list); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 08ccb3d34b21..3e955190f013 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -188,7 +188,7 @@ TRACE_EVENT(amdgpu_vm_bo_map, ), TP_fast_assign( - __entry->bo = bo_va->bo; + __entry->bo = bo_va ? bo_va->bo : NULL; __entry->start = mapping->it.start; __entry->last = mapping->it.last; __entry->offset = mapping->offset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 296e985d0b65..b67e94e25cfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1612,6 +1612,105 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, return 0; } +/** + * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range + * + * @adev: amdgpu_device pointer + * @vm: VM structure to use + * @saddr: start of the range + * @size: size of the range + * + * Remove all mappings in a range, split them as appropriate. + * Returns 0 for success, error for failure. + */ +int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + uint64_t saddr, uint64_t size) +{ + struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; + struct interval_tree_node *it; + LIST_HEAD(removed); + uint64_t eaddr; + + eaddr = saddr + size - 1; + saddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr /= AMDGPU_GPU_PAGE_SIZE; + + /* Allocate all the needed memory */ + before = kzalloc(sizeof(*before), GFP_KERNEL); + if (!before) + return -ENOMEM; + + after = kzalloc(sizeof(*after), GFP_KERNEL); + if (!after) { + kfree(before); + return -ENOMEM; + } + + /* Now gather all removed mappings */ + it = interval_tree_iter_first(&vm->va, saddr, eaddr); + while (it) { + tmp = container_of(it, struct amdgpu_bo_va_mapping, it); + it = interval_tree_iter_next(it, saddr, eaddr); + + /* Remember mapping split at the start */ + if (tmp->it.start < saddr) { + before->it.start = tmp->it.start;; + before->it.last = saddr - 1; + before->offset = tmp->offset; + before->flags = tmp->flags; + list_add(&before->list, &tmp->list); + } + + /* Remember mapping split at the end */ + if (tmp->it.last > eaddr) { + after->it.start = eaddr + 1; + after->it.last = tmp->it.last; + after->offset = tmp->offset; + after->offset += after->it.start - tmp->it.start; + after->flags = tmp->flags; + list_add(&after->list, &tmp->list); + } + + list_del(&tmp->list); + list_add(&tmp->list, &removed); + } + + /* And free them up */ + list_for_each_entry_safe(tmp, next, &removed, list) { + interval_tree_remove(&tmp->it, &vm->va); + list_del(&tmp->list); + + if (tmp->it.start < saddr) + tmp->it.start = saddr; + if (tmp->it.last > eaddr) + tmp->it.last = eaddr; + + list_add(&tmp->list, &vm->freed); + trace_amdgpu_vm_bo_unmap(NULL, tmp); + } + + /* Insert partial mapping before the range*/ + if (before->it.start != before->it.last) { + interval_tree_insert(&before->it, &vm->va); + if (before->flags & AMDGPU_PTE_PRT) + amdgpu_vm_prt_get(adev); + } else { + kfree(before); + } + + /* Insert partial mapping after the range */ + if (after->it.start != after->it.last) { + interval_tree_insert(&after->it, &vm->va); + if (after->flags & AMDGPU_PTE_PRT) + amdgpu_vm_prt_get(adev); + } else { + kfree(after); + } + + return 0; +} + /** * amdgpu_vm_bo_rmv - remove a bo to a specific vm * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 1e5a3b2c7927..95fe47733b7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -210,6 +210,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint64_t addr); +int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + uint64_t saddr, uint64_t size); void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 2c30e324cb12..199f1b46fd2c 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -350,6 +350,7 @@ struct drm_amdgpu_gem_op { #define AMDGPU_VA_OP_MAP 1 #define AMDGPU_VA_OP_UNMAP 2 +#define AMDGPU_VA_OP_CLEAR 3 /* Delay the page table update till the next CS */ #define AMDGPU_VM_DELAY_UPDATE (1 << 0) -- cgit v1.2.3-71-gd317 From 80f95c579d800fa22e9e57ecb3d50b9e93bc1f82 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 13 Mar 2017 10:13:39 +0100 Subject: drm/amdgpu: add a VM mapping replace operation v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new operation to replace mappings in a VM with a new one. v2: Fix Jerry's comment, separate out clear operation. Signed-off-by: Christian König Reviewed-by: Junwei Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 15 +++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 64 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +++ include/uapi/drm/amdgpu_drm.h | 1 + 4 files changed, 83 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b311b389bd5a..c71c087727b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -544,7 +544,8 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, if (r) goto error; - if (operation == AMDGPU_VA_OP_MAP) + if (operation == AMDGPU_VA_OP_MAP || + operation == AMDGPU_VA_OP_REPLACE) r = amdgpu_vm_bo_update(adev, bo_va, false); error: @@ -595,6 +596,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, case AMDGPU_VA_OP_MAP: case AMDGPU_VA_OP_UNMAP: case AMDGPU_VA_OP_CLEAR: + case AMDGPU_VA_OP_REPLACE: break; default: dev_err(&dev->pdev->dev, "unsupported operation %d\n", @@ -656,6 +658,17 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->va_address, args->map_size); break; + case AMDGPU_VA_OP_REPLACE: + r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address, + args->map_size); + if (r) + goto error_backoff; + + va_flags = amdgpu_vm_get_pte_flags(adev, args->flags); + r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address, + args->offset_in_bo, args->map_size, + va_flags); + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b67e94e25cfc..2da08027ff29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1560,6 +1560,70 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, return 0; } +/** + * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings + * + * @adev: amdgpu_device pointer + * @bo_va: bo_va to store the address + * @saddr: where to map the BO + * @offset: requested offset in the BO + * @flags: attributes of pages (read/write/valid/etc.) + * + * Add a mapping of the BO at the specefied addr into the VM. Replace existing + * mappings as we do so. + * Returns 0 for success, error for failure. + * + * Object has to be reserved and unreserved outside! + */ +int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, + struct amdgpu_bo_va *bo_va, + uint64_t saddr, uint64_t offset, + uint64_t size, uint64_t flags) +{ + struct amdgpu_bo_va_mapping *mapping; + struct amdgpu_vm *vm = bo_va->vm; + uint64_t eaddr; + int r; + + /* validate the parameters */ + if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK || + size == 0 || size & AMDGPU_GPU_PAGE_MASK) + return -EINVAL; + + /* make sure object fit at this offset */ + eaddr = saddr + size - 1; + if (saddr >= eaddr || + (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) + return -EINVAL; + + /* Allocate all the needed memory */ + mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); + if (!mapping) + return -ENOMEM; + + r = amdgpu_vm_bo_clear_mappings(adev, bo_va->vm, saddr, size); + if (r) { + kfree(mapping); + return r; + } + + saddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr /= AMDGPU_GPU_PAGE_SIZE; + + mapping->it.start = saddr; + mapping->it.last = eaddr; + mapping->offset = offset; + mapping->flags = flags; + + list_add(&mapping->list, &bo_va->invalids); + interval_tree_insert(&mapping->it, &vm->va); + + if (flags & AMDGPU_PTE_PRT) + amdgpu_vm_prt_get(adev); + + return 0; +} + /** * amdgpu_vm_bo_unmap - remove bo mapping from vm * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 95fe47733b7f..ab0429d12992 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -207,6 +207,10 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint64_t addr, uint64_t offset, uint64_t size, uint64_t flags); +int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, + struct amdgpu_bo_va *bo_va, + uint64_t addr, uint64_t offset, + uint64_t size, uint64_t flags); int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint64_t addr); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 199f1b46fd2c..37e2c0da01fb 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -351,6 +351,7 @@ struct drm_amdgpu_gem_op { #define AMDGPU_VA_OP_MAP 1 #define AMDGPU_VA_OP_UNMAP 2 #define AMDGPU_VA_OP_CLEAR 3 +#define AMDGPU_VA_OP_REPLACE 4 /* Delay the page table update till the next CS */ #define AMDGPU_VM_DELAY_UPDATE (1 << 0) -- cgit v1.2.3-71-gd317 From 66e02bc343bafac589875a72607a1ae6c4d25bcb Mon Sep 17 00:00:00 2001 From: Alex Xie Date: Tue, 14 Feb 2017 12:04:52 -0500 Subject: drm/amdgpu: Add MTYPE flags to GPU VM IOCTL interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Alex Xie Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +- include/uapi/drm/amdgpu_drm.h | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 5e068eefcaec..b4f52fd7e237 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -558,7 +558,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, { const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | - AMDGPU_VM_PAGE_EXECUTABLE; + AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK; const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_PRT; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 37e2c0da01fb..5d8e7090fe9e 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -365,6 +365,18 @@ struct drm_amdgpu_gem_op { #define AMDGPU_VM_PAGE_EXECUTABLE (1 << 3) /* partially resident texture */ #define AMDGPU_VM_PAGE_PRT (1 << 4) +/* MTYPE flags use bit 5 to 8 */ +#define AMDGPU_VM_MTYPE_MASK (0xf << 5) +/* Default MTYPE. Pre-AI must use this. Recommended for newer ASICs. */ +#define AMDGPU_VM_MTYPE_DEFAULT (0 << 5) +/* Use NC MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_NC (1 << 5) +/* Use WC MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_WC (2 << 5) +/* Use CC MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_CC (3 << 5) +/* Use UC MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_UC (4 << 5) struct drm_amdgpu_gem_va { /** GEM object handle */ -- cgit v1.2.3-71-gd317 From bce23e00f3369ce8c32c90f087e37c01f83002d1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 28 Mar 2017 12:52:08 -0400 Subject: drm/amdgpu: add NGG parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NGG (Next Generation Graphics) is a new feature in GFX9.0. This adds the relevant parameters. Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 29 +++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 21 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 7 +++++++ include/uapi/drm/amdgpu_drm.h | 8 ++++++++ 4 files changed, 65 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 75525980a5a2..886f105958a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -103,6 +103,11 @@ extern char *amdgpu_disable_cu; extern char *amdgpu_virtual_display; extern unsigned amdgpu_pp_feature_mask; extern int amdgpu_vram_page_split; +extern int amdgpu_ngg; +extern int amdgpu_prim_buf_per_se; +extern int amdgpu_pos_buf_per_se; +extern int amdgpu_cntl_sb_buf_per_se; +extern int amdgpu_param_buf_per_se; #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ @@ -957,6 +962,28 @@ struct amdgpu_gfx_funcs { void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst); }; +struct amdgpu_ngg_buf { + struct amdgpu_bo *bo; + uint64_t gpu_addr; + uint32_t size; + uint32_t bo_size; +}; + +enum { + PRIM = 0, + POS, + CNTL, + PARAM, + NGG_BUF_MAX +}; + +struct amdgpu_ngg { + struct amdgpu_ngg_buf buf[NGG_BUF_MAX]; + uint32_t gds_reserve_addr; + uint32_t gds_reserve_size; + bool init; +}; + struct amdgpu_gfx { struct mutex gpu_clock_mutex; struct amdgpu_gfx_config config; @@ -1000,6 +1027,8 @@ struct amdgpu_gfx { uint32_t grbm_soft_reset; uint32_t srbm_soft_reset; bool in_reset; + /* NGG */ + struct amdgpu_ngg ngg; }; int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 7292f4e7bb1a..2b05c891747b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -103,6 +103,11 @@ unsigned amdgpu_pg_mask = 0xffffffff; char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; unsigned amdgpu_pp_feature_mask = 0xffffffff; +int amdgpu_ngg = 0; +int amdgpu_prim_buf_per_se = 0; +int amdgpu_pos_buf_per_se = 0; +int amdgpu_cntl_sb_buf_per_se = 0; +int amdgpu_param_buf_per_se = 0; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -210,6 +215,22 @@ MODULE_PARM_DESC(virtual_display, "Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)"); module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444); +MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))"); +module_param_named(ngg, amdgpu_ngg, int, 0444); + +MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)"); +module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444); + +MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)"); +module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444); + +MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)"); +module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444); + +MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)"); +module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); + + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index ef91c8e2b8e3..a6d15978d821 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -541,6 +541,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.gc_double_offchip_lds_buf = adev->gfx.config.double_offchip_lds_buf; + if (amdgpu_ngg) { + dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[PRIM].gpu_addr; + dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[POS].gpu_addr; + dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[CNTL].gpu_addr; + dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[PARAM].gpu_addr; + } + return copy_to_user(out, &dev_info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; } diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 5d8e7090fe9e..d3f121a02bed 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -743,6 +743,14 @@ struct drm_amdgpu_info_device { __u32 vce_harvest_config; /* gfx double offchip LDS buffers */ __u32 gc_double_offchip_lds_buf; + /* NGG Primitive Buffer */ + __u64 prim_buf_gpu_addr; + /* NGG Position Buffer */ + __u64 pos_buf_gpu_addr; + /* NGG Control Sideband */ + __u64 cntl_sb_buf_gpu_addr; + /* NGG Parameter Cache */ + __u64 param_buf_gpu_addr; }; struct drm_amdgpu_info_hw_ip { -- cgit v1.2.3-71-gd317 From a8f1f1ce286a94e8c3cb9cc8b4b21560793d4518 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 3 Mar 2017 15:54:06 -0500 Subject: drm/amdgpu: Add asic family for vega10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acked-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index d3f121a02bed..08e90d8ea52a 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -802,6 +802,7 @@ struct drm_amdgpu_info_vce_clock_table { #define AMDGPU_FAMILY_KV 125 /* Kaveri, Kabini, Mullins */ #define AMDGPU_FAMILY_VI 130 /* Iceland, Tonga */ #define AMDGPU_FAMILY_CZ 135 /* Carrizo, Stoney */ +#define AMDGPU_FAMILY_AI 141 /* Vega10 */ #if defined(__cplusplus) } -- cgit v1.2.3-71-gd317 From 00ac6f6be1c875495320c257a70c82768fffedb3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 3 Mar 2017 16:00:11 -0500 Subject: drm/amdgpu: add tiling flags for GFX9 (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: Marek: allow shifts >32 in AMDGPU_TILING_SET/GET Acked-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 08e90d8ea52a..770dc56df3dd 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -209,6 +209,7 @@ struct drm_amdgpu_gem_userptr { __u32 handle; }; +/* SI-CI-VI: */ /* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */ #define AMDGPU_TILING_ARRAY_MODE_SHIFT 0 #define AMDGPU_TILING_ARRAY_MODE_MASK 0xf @@ -227,10 +228,15 @@ struct drm_amdgpu_gem_userptr { #define AMDGPU_TILING_NUM_BANKS_SHIFT 21 #define AMDGPU_TILING_NUM_BANKS_MASK 0x3 +/* GFX9 and later: */ +#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT 0 +#define AMDGPU_TILING_SWIZZLE_MODE_MASK 0x1f + +/* Set/Get helpers for tiling flags. */ #define AMDGPU_TILING_SET(field, value) \ - (((value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT) + (((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT) #define AMDGPU_TILING_GET(value, field) \ - (((value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK) + (((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK) #define AMDGPU_GEM_METADATA_OP_SET_METADATA 1 #define AMDGPU_GEM_METADATA_OP_GET_METADATA 2 -- cgit v1.2.3-71-gd317 From 6a7ed07e272d058a72910571a0d189bd04f92b49 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Fri, 3 Mar 2017 19:15:26 -0500 Subject: drm/amdgpu: add psp firmware info into info query and debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acked-by: Christian König Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 25 +++++++++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 4 ++++ 2 files changed, 29 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index a6d15978d821..5ded370a4b35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -208,6 +208,14 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->sdma.instance[query_fw->index].fw_version; fw_info->feature = adev->sdma.instance[query_fw->index].feature_version; break; + case AMDGPU_INFO_FW_SOS: + fw_info->ver = adev->psp.sos_fw_version; + fw_info->feature = adev->psp.sos_feature_version; + break; + case AMDGPU_INFO_FW_ASD: + fw_info->ver = adev->psp.asd_fw_version; + fw_info->feature = adev->psp.asd_feature_version; + break; default: return -EINVAL; } @@ -1080,6 +1088,23 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) fw_info.feature, fw_info.ver); } + /* PSP SOS */ + query_fw.fw_type = AMDGPU_INFO_FW_SOS; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "SOS feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + + + /* PSP ASD */ + query_fw.fw_type = AMDGPU_INFO_FW_ASD; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "ASD feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + /* SMC */ query_fw.fw_type = AMDGPU_INFO_FW_SMC; ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 770dc56df3dd..c03bae906787 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -523,6 +523,10 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_FW_SMC 0x0a /* Subquery id: Query SDMA firmware version */ #define AMDGPU_INFO_FW_SDMA 0x0b + /* Subquery id: Query PSP SOS firmware version */ + #define AMDGPU_INFO_FW_SOS 0x0c + /* Subquery id: Query PSP ASD firmware version */ + #define AMDGPU_INFO_FW_ASD 0x0d /* number of bytes moved for TTM migration */ #define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f /* the used VRAM size */ -- cgit v1.2.3-71-gd317 From ed834af243d04637fbb9ca63c8e58677edf266bc Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 8 Mar 2017 15:37:00 +0800 Subject: uapi/drm:change Preamble Description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Preamble in linux doesn't mean it is CE PREAMBLE IB, instead it means this IB could be dropped if no ctx switch happens. Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index c03bae906787..4179241e5e7c 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -445,7 +445,7 @@ union drm_amdgpu_cs { /* This IB should be submitted to CE */ #define AMDGPU_IB_FLAG_CE (1<<0) -/* CE Preamble */ +/* Preamble flag, which means the IB could be dropped if no context switch */ #define AMDGPU_IB_FLAG_PREAMBLE (1<<1) struct drm_amdgpu_cs_chunk_ib { -- cgit v1.2.3-71-gd317 From 71aec257ec52caadf8ea3c297e6cff39fec49a2d Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 8 Mar 2017 15:38:54 +0800 Subject: uapi/drm:add new flag for Preemption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit when MCBP supported, we will set pre_enb bit for those IBs with PREEMPT flag tagged Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4179241e5e7c..516a9f285730 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -448,6 +448,9 @@ union drm_amdgpu_cs { /* Preamble flag, which means the IB could be dropped if no context switch */ #define AMDGPU_IB_FLAG_PREAMBLE (1<<1) +/* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */ +#define AMDGPU_IB_FLAG_PREEMPT (1<<2) + struct drm_amdgpu_cs_chunk_ib { __u32 _pad; /** AMDGPU_IB_FLAG_* */ -- cgit v1.2.3-71-gd317 From a3caf7440dedd2399f90f27ff11ac390bf03e6c4 Mon Sep 17 00:00:00 2001 From: Vidyullatha Kanchanapally Date: Fri, 31 Mar 2017 00:22:34 +0300 Subject: cfg80211: Add support for FILS shared key authentication offload Enhance nl80211 and cfg80211 connect request and response APIs to support FILS shared key authentication offload. The new nl80211 attributes can be used to provide additional information to the driver to establish a FILS connection. Also enhance the set/del PMKSA to allow support for adding and deleting PMKSA based on FILS cache identifier. Add a new feature flag that drivers can use to advertize support for FILS shared key authentication and association in station mode when using their own SME. Signed-off-by: Vidyullatha Kanchanapally Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 13 +++++++ include/net/cfg80211.h | 57 +++++++++++++++++++++++++++- include/uapi/linux/nl80211.h | 86 ++++++++++++++++++++++++++++++++++++++++-- net/wireless/nl80211.c | 90 +++++++++++++++++++++++++++++++++++++++++--- net/wireless/sme.c | 25 +++++++++++- 5 files changed, 259 insertions(+), 12 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 22bf0676d928..294fa6273a62 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1723,6 +1723,9 @@ enum ieee80211_statuscode { WLAN_STATUS_REJECT_DSE_BAND = 96, WLAN_STATUS_DENIED_WITH_SUGGESTED_BAND_AND_CHANNEL = 99, WLAN_STATUS_DENIED_DUE_TO_SPECTRUM_MANAGEMENT = 103, + /* 802.11ai */ + WLAN_STATUS_FILS_AUTHENTICATION_FAILURE = 108, + WLAN_STATUS_UNKNOWN_AUTHENTICATION_SERVER = 109, }; @@ -2104,6 +2107,12 @@ enum ieee80211_key_len { #define FILS_NONCE_LEN 16 #define FILS_MAX_KEK_LEN 64 +#define FILS_ERP_MAX_USERNAME_LEN 16 +#define FILS_ERP_MAX_REALM_LEN 253 +#define FILS_ERP_MAX_RRK_LEN 64 + +#define PMK_MAX_LEN 48 + /* Public action codes */ enum ieee80211_pub_actioncode { WLAN_PUB_ACTION_EXT_CHANSW_ANN = 4, @@ -2355,6 +2364,10 @@ enum ieee80211_sa_query_action { #define WLAN_AKM_SUITE_TDLS SUITE(0x000FAC, 7) #define WLAN_AKM_SUITE_SAE SUITE(0x000FAC, 8) #define WLAN_AKM_SUITE_FT_OVER_SAE SUITE(0x000FAC, 9) +#define WLAN_AKM_SUITE_FILS_SHA256 SUITE(0x000FAC, 14) +#define WLAN_AKM_SUITE_FILS_SHA384 SUITE(0x000FAC, 15) +#define WLAN_AKM_SUITE_FT_FILS_SHA256 SUITE(0x000FAC, 16) +#define WLAN_AKM_SUITE_FT_FILS_SHA384 SUITE(0x000FAC, 17) #define WLAN_MAX_KEY_LEN 32 diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index da12d5b86e1b..042137d7d226 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2073,6 +2073,19 @@ struct cfg80211_bss_selection { * the BSSID of the current association, i.e., to the value that is * included in the Current AP address field of the Reassociation Request * frame. + * @fils_erp_username: EAP re-authentication protocol (ERP) username part of the + * NAI or %NULL if not specified. This is used to construct FILS wrapped + * data IE. + * @fils_erp_username_len: Length of @fils_erp_username in octets. + * @fils_erp_realm: EAP re-authentication protocol (ERP) realm part of NAI or + * %NULL if not specified. This specifies the domain name of ER server and + * is used to construct FILS wrapped data IE. + * @fils_erp_realm_len: Length of @fils_erp_realm in octets. + * @fils_erp_next_seq_num: The next sequence number to use in the FILS ERP + * messages. This is also used to construct FILS wrapped data IE. + * @fils_erp_rrk: ERP re-authentication Root Key (rRK) used to derive additional + * keys in FILS or %NULL if not specified. + * @fils_erp_rrk_len: Length of @fils_erp_rrk in octets. */ struct cfg80211_connect_params { struct ieee80211_channel *channel; @@ -2098,6 +2111,13 @@ struct cfg80211_connect_params { bool pbss; struct cfg80211_bss_selection bss_select; const u8 *prev_bssid; + const u8 *fils_erp_username; + size_t fils_erp_username_len; + const u8 *fils_erp_realm; + size_t fils_erp_realm_len; + u16 fils_erp_next_seq_num; + const u8 *fils_erp_rrk; + size_t fils_erp_rrk_len; }; /** @@ -2136,12 +2156,27 @@ enum wiphy_params_flags { * This structure is passed to the set/del_pmksa() method for PMKSA * caching. * - * @bssid: The AP's BSSID. - * @pmkid: The PMK material itself. + * @bssid: The AP's BSSID (may be %NULL). + * @pmkid: The identifier to refer a PMKSA. + * @pmk: The PMK for the PMKSA identified by @pmkid. This is used for key + * derivation by a FILS STA. Otherwise, %NULL. + * @pmk_len: Length of the @pmk. The length of @pmk can differ depending on + * the hash algorithm used to generate this. + * @ssid: SSID to specify the ESS within which a PMKSA is valid when using FILS + * cache identifier (may be %NULL). + * @ssid_len: Length of the @ssid in octets. + * @cache_id: 2-octet cache identifier advertized by a FILS AP identifying the + * scope of PMKSA. This is valid only if @ssid_len is non-zero (may be + * %NULL). */ struct cfg80211_pmksa { const u8 *bssid; const u8 *pmkid; + const u8 *pmk; + size_t pmk_len; + const u8 *ssid; + size_t ssid_len; + const u8 *cache_id; }; /** @@ -5153,6 +5188,17 @@ static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) * @req_ie_len: Association request IEs length * @resp_ie: Association response IEs (may be %NULL) * @resp_ie_len: Association response IEs length + * @fils_kek: KEK derived from a successful FILS connection (may be %NULL) + * @fils_kek_len: Length of @fils_kek in octets + * @update_erp_next_seq_num: Boolean value to specify whether the value in + * @fils_erp_next_seq_num is valid. + * @fils_erp_next_seq_num: The next sequence number to use in ERP message in + * FILS Authentication. This value should be specified irrespective of the + * status for a FILS connection. + * @pmk: A new PMK if derived from a successful FILS connection (may be %NULL). + * @pmk_len: Length of @pmk in octets + * @pmkid: A new PMKID if derived from a successful FILS connection or the PMKID + * used for this FILS connection (may be %NULL). * @timeout_reason: Reason for connection timeout. This is used when the * connection fails due to a timeout instead of an explicit rejection from * the AP. %NL80211_TIMEOUT_UNSPECIFIED is used when the timeout reason is @@ -5168,6 +5214,13 @@ struct cfg80211_connect_resp_params { size_t req_ie_len; const u8 *resp_ie; size_t resp_ie_len; + const u8 *fils_kek; + size_t fils_kek_len; + bool update_erp_next_seq_num; + u16 fils_erp_next_seq_num; + const u8 *pmk; + size_t pmk_len; + const u8 *pmkid; enum nl80211_timeout_reason timeout_reason; }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index cd4dfef58fab..6095a6c4c412 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -172,6 +172,42 @@ * Multiple such rules can be created. */ +/** + * DOC: FILS shared key authentication offload + * + * FILS shared key authentication offload can be advertized by drivers by + * setting @NL80211_EXT_FEATURE_FILS_SK_OFFLOAD flag. The drivers that support + * FILS shared key authentication offload should be able to construct the + * authentication and association frames for FILS shared key authentication and + * eventually do a key derivation as per IEEE 802.11ai. The below additional + * parameters should be given to driver in %NL80211_CMD_CONNECT. + * %NL80211_ATTR_FILS_ERP_USERNAME - used to construct keyname_nai + * %NL80211_ATTR_FILS_ERP_REALM - used to construct keyname_nai + * %NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM - used to construct erp message + * %NL80211_ATTR_FILS_ERP_RRK - used to generate the rIK and rMSK + * rIK should be used to generate an authentication tag on the ERP message and + * rMSK should be used to derive a PMKSA. + * rIK, rMSK should be generated and keyname_nai, sequence number should be used + * as specified in IETF RFC 6696. + * + * When FILS shared key authentication is completed, driver needs to provide the + * below additional parameters to userspace. + * %NL80211_ATTR_FILS_KEK - used for key renewal + * %NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM - used in further EAP-RP exchanges + * %NL80211_ATTR_PMKID - used to identify the PMKSA used/generated + * %Nl80211_ATTR_PMK - used to update PMKSA cache in userspace + * The PMKSA can be maintained in userspace persistently so that it can be used + * later after reboots or wifi turn off/on also. + * + * %NL80211_ATTR_FILS_CACHE_ID is the cache identifier advertized by a FILS + * capable AP supporting PMK caching. It specifies the scope within which the + * PMKSAs are cached in an ESS. %NL80211_CMD_SET_PMKSA and + * %NL80211_CMD_DEL_PMKSA are enhanced to allow support for PMKSA caching based + * on FILS cache identifier. Additionally %NL80211_ATTR_PMK is used with + * %NL80211_SET_PMKSA to specify the PMK corresponding to a PMKSA for driver to + * use in a FILS shared key connection with PMKSA caching. + */ + /** * enum nl80211_commands - supported nl80211 commands * @@ -370,10 +406,18 @@ * @NL80211_CMD_NEW_SURVEY_RESULTS: survey data notification (as a reply to * NL80211_CMD_GET_SURVEY and on the "scan" multicast group) * - * @NL80211_CMD_SET_PMKSA: Add a PMKSA cache entry, using %NL80211_ATTR_MAC - * (for the BSSID) and %NL80211_ATTR_PMKID. + * @NL80211_CMD_SET_PMKSA: Add a PMKSA cache entry using %NL80211_ATTR_MAC + * (for the BSSID), %NL80211_ATTR_PMKID, and optionally %NL80211_ATTR_PMK + * (PMK is used for PTKSA derivation in case of FILS shared key offload) or + * using %NL80211_ATTR_SSID, %NL80211_ATTR_FILS_CACHE_ID, + * %NL80211_ATTR_PMKID, and %NL80211_ATTR_PMK in case of FILS + * authentication where %NL80211_ATTR_FILS_CACHE_ID is the identifier + * advertized by a FILS capable AP identifying the scope of PMKSA in an + * ESS. * @NL80211_CMD_DEL_PMKSA: Delete a PMKSA cache entry, using %NL80211_ATTR_MAC - * (for the BSSID) and %NL80211_ATTR_PMKID. + * (for the BSSID) and %NL80211_ATTR_PMKID or using %NL80211_ATTR_SSID, + * %NL80211_ATTR_FILS_CACHE_ID, and %NL80211_ATTR_PMKID in case of FILS + * authentication. * @NL80211_CMD_FLUSH_PMKSA: Flush all PMKSA cache entries. * * @NL80211_CMD_REG_CHANGE: indicates to userspace the regulatory domain @@ -2012,6 +2056,31 @@ enum nl80211_commands { * u32 attribute with an &enum nl80211_timeout_reason value. This is used, * e.g., with %NL80211_CMD_CONNECT event. * + * @NL80211_ATTR_FILS_ERP_USERNAME: EAP Re-authentication Protocol (ERP) + * username part of NAI used to refer keys rRK and rIK. This is used with + * %NL80211_CMD_CONNECT. + * + * @NL80211_ATTR_FILS_ERP_REALM: EAP Re-authentication Protocol (ERP) realm part + * of NAI specifying the domain name of the ER server. This is used with + * %NL80211_CMD_CONNECT. + * + * @NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM: Unsigned 16-bit ERP next sequence number + * to use in ERP messages. This is used in generating the FILS wrapped data + * for FILS authentication and is used with %NL80211_CMD_CONNECT. + * + * @NL80211_ATTR_FILS_ERP_RRK: ERP re-authentication Root Key (rRK) for the + * NAI specified by %NL80211_ATTR_FILS_ERP_USERNAME and + * %NL80211_ATTR_FILS_ERP_REALM. This is used for generating rIK and rMSK + * from successful FILS authentication and is used with + * %NL80211_CMD_CONNECT. + * + * @NL80211_ATTR_FILS_CACHE_ID: A 2-octet identifier advertized by a FILS AP + * identifying the scope of PMKSAs. This is used with + * @NL80211_CMD_SET_PMKSA and @NL80211_CMD_DEL_PMKSA. + * + * @NL80211_ATTR_PMK: PMK for the PMKSA identified by %NL80211_ATTR_PMKID. + * This is used with @NL80211_CMD_SET_PMKSA. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2423,6 +2492,14 @@ enum nl80211_attrs { NL80211_ATTR_TIMEOUT_REASON, + NL80211_ATTR_FILS_ERP_USERNAME, + NL80211_ATTR_FILS_ERP_REALM, + NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM, + NL80211_ATTR_FILS_ERP_RRK, + NL80211_ATTR_FILS_CACHE_ID, + + NL80211_ATTR_PMK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -4759,6 +4836,8 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_CQM_RSSI_LIST: With this driver the * %NL80211_ATTR_CQM_RSSI_THOLD attribute accepts a list of zero or more * RSSI threshold values to monitor rather than exactly one threshold. + * @NL80211_EXT_FEATURE_FILS_SK_OFFLOAD: Driver SME supports FILS shared key + * authentication with %NL80211_CMD_CONNECT. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4778,6 +4857,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED, NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI, NL80211_EXT_FEATURE_CQM_RSSI_LIST, + NL80211_EXT_FEATURE_FILS_SK_OFFLOAD, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3d635c865281..9910aae08f1a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -410,6 +410,15 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { .len = sizeof(struct nl80211_bss_select_rssi_adjust) }, [NL80211_ATTR_TIMEOUT_REASON] = { .type = NLA_U32 }, + [NL80211_ATTR_FILS_ERP_USERNAME] = { .type = NLA_BINARY, + .len = FILS_ERP_MAX_USERNAME_LEN }, + [NL80211_ATTR_FILS_ERP_REALM] = { .type = NLA_BINARY, + .len = FILS_ERP_MAX_REALM_LEN }, + [NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] = { .type = NLA_U16 }, + [NL80211_ATTR_FILS_ERP_RRK] = { .type = NLA_BINARY, + .len = FILS_ERP_MAX_RRK_LEN }, + [NL80211_ATTR_FILS_CACHE_ID] = { .len = 2 }, + [NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN }, }; /* policy for the key attributes */ @@ -3832,6 +3841,19 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev, return false; return true; case NL80211_CMD_CONNECT: + /* SAE not supported yet */ + if (auth_type == NL80211_AUTHTYPE_SAE) + return false; + /* FILS with SK PFS or PK not supported yet */ + if (auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || + auth_type == NL80211_AUTHTYPE_FILS_PK) + return false; + if (!wiphy_ext_feature_isset( + &rdev->wiphy, + NL80211_EXT_FEATURE_FILS_SK_OFFLOAD) && + auth_type == NL80211_AUTHTYPE_FILS_SK) + return false; + return true; case NL80211_CMD_START_AP: /* SAE not supported yet */ if (auth_type == NL80211_AUTHTYPE_SAE) @@ -8906,6 +8928,35 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) } } + if (wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_FILS_SK_OFFLOAD) && + info->attrs[NL80211_ATTR_FILS_ERP_USERNAME] && + info->attrs[NL80211_ATTR_FILS_ERP_REALM] && + info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] && + info->attrs[NL80211_ATTR_FILS_ERP_RRK]) { + connect.fils_erp_username = + nla_data(info->attrs[NL80211_ATTR_FILS_ERP_USERNAME]); + connect.fils_erp_username_len = + nla_len(info->attrs[NL80211_ATTR_FILS_ERP_USERNAME]); + connect.fils_erp_realm = + nla_data(info->attrs[NL80211_ATTR_FILS_ERP_REALM]); + connect.fils_erp_realm_len = + nla_len(info->attrs[NL80211_ATTR_FILS_ERP_REALM]); + connect.fils_erp_next_seq_num = + nla_get_u16( + info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM]); + connect.fils_erp_rrk = + nla_data(info->attrs[NL80211_ATTR_FILS_ERP_RRK]); + connect.fils_erp_rrk_len = + nla_len(info->attrs[NL80211_ATTR_FILS_ERP_RRK]); + } else if (info->attrs[NL80211_ATTR_FILS_ERP_USERNAME] || + info->attrs[NL80211_ATTR_FILS_ERP_REALM] || + info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] || + info->attrs[NL80211_ATTR_FILS_ERP_RRK]) { + kzfree(connkeys); + return -EINVAL; + } + wdev_lock(dev->ieee80211_ptr); err = cfg80211_connect(rdev, dev, &connect, connkeys, @@ -9025,14 +9076,28 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info) memset(&pmksa, 0, sizeof(struct cfg80211_pmksa)); - if (!info->attrs[NL80211_ATTR_MAC]) - return -EINVAL; - if (!info->attrs[NL80211_ATTR_PMKID]) return -EINVAL; pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]); - pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); + + if (info->attrs[NL80211_ATTR_MAC]) { + pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); + } else if (info->attrs[NL80211_ATTR_SSID] && + info->attrs[NL80211_ATTR_FILS_CACHE_ID] && + (info->genlhdr->cmd == NL80211_CMD_DEL_PMKSA || + info->attrs[NL80211_ATTR_PMK])) { + pmksa.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); + pmksa.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); + pmksa.cache_id = + nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]); + } else { + return -EINVAL; + } + if (info->attrs[NL80211_ATTR_PMK]) { + pmksa.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]); + pmksa.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]); + } if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) @@ -13471,7 +13536,9 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(100 + cr->req_ie_len + cr->resp_ie_len, gfp); + msg = nlmsg_new(100 + cr->req_ie_len + cr->resp_ie_len + + cr->fils_kek_len + cr->pmk_len + + (cr->pmkid ? WLAN_PMKID_LEN : 0), gfp); if (!msg) return; @@ -13496,7 +13563,18 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, nla_put(msg, NL80211_ATTR_REQ_IE, cr->req_ie_len, cr->req_ie)) || (cr->resp_ie && nla_put(msg, NL80211_ATTR_RESP_IE, cr->resp_ie_len, - cr->resp_ie))) + cr->resp_ie)) || + (cr->update_erp_next_seq_num && + nla_put_u16(msg, NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM, + cr->fils_erp_next_seq_num)) || + (cr->status == WLAN_STATUS_SUCCESS && + ((cr->fils_kek && + nla_put(msg, NL80211_ATTR_FILS_KEK, cr->fils_kek_len, + cr->fils_kek)) || + (cr->pmk && + nla_put(msg, NL80211_ATTR_PMK, cr->pmk_len, cr->pmk)) || + (cr->pmkid && + nla_put(msg, NL80211_ATTR_PMKID, WLAN_PMKID_LEN, cr->pmkid))))) goto nla_put_failure; genlmsg_end(msg, hdr); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index ebd7adc27246..6459bb7c21f7 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -805,7 +805,9 @@ void cfg80211_connect_done(struct net_device *dev, } ev = kzalloc(sizeof(*ev) + (params->bssid ? ETH_ALEN : 0) + - params->req_ie_len + params->resp_ie_len, gfp); + params->req_ie_len + params->resp_ie_len + + params->fils_kek_len + params->pmk_len + + (params->pmkid ? WLAN_PMKID_LEN : 0), gfp); if (!ev) { cfg80211_put_bss(wdev->wiphy, params->bss); return; @@ -832,6 +834,27 @@ void cfg80211_connect_done(struct net_device *dev, params->resp_ie_len); next += params->resp_ie_len; } + if (params->fils_kek_len) { + ev->cr.fils_kek = next; + ev->cr.fils_kek_len = params->fils_kek_len; + memcpy((void *)ev->cr.fils_kek, params->fils_kek, + params->fils_kek_len); + next += params->fils_kek_len; + } + if (params->pmk_len) { + ev->cr.pmk = next; + ev->cr.pmk_len = params->pmk_len; + memcpy((void *)ev->cr.pmk, params->pmk, params->pmk_len); + next += params->pmk_len; + } + if (params->pmkid) { + ev->cr.pmkid = next; + memcpy((void *)ev->cr.pmkid, params->pmkid, WLAN_PMKID_LEN); + next += WLAN_PMKID_LEN; + } + ev->cr.update_erp_next_seq_num = params->update_erp_next_seq_num; + if (params->update_erp_next_seq_num) + ev->cr.fils_erp_next_seq_num = params->fils_erp_next_seq_num; if (params->bss) cfg80211_hold_bss(bss_from_pub(params->bss)); ev->cr.bss = params->bss; -- cgit v1.2.3-71-gd317 From aec390b937dcaf11c01483b8c348a2b004119aa7 Mon Sep 17 00:00:00 2001 From: Dong Jia Shi Date: Fri, 17 Mar 2017 04:17:30 +0100 Subject: vfio: ccw: define device_api strings Define vfio-ccw device API strings. CCW vendor driver using mediated device framework should use this string for device_api attribute. Reviewed-by: Pierre Morel Signed-off-by: Dong Jia Shi Acked-by: Alex Williamson Message-Id: <20170317031743.40128-4-bjsdjshi@linux.vnet.ibm.com> Signed-off-by: Cornelia Huck --- include/uapi/linux/vfio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 519eff362c1c..61837890c132 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -212,6 +212,7 @@ struct vfio_device_info { #define VFIO_DEVICE_API_PCI_STRING "vfio-pci" #define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform" #define VFIO_DEVICE_API_AMBA_STRING "vfio-amba" +#define VFIO_DEVICE_API_CCW_STRING "vfio-ccw" /** * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, -- cgit v1.2.3-71-gd317 From 060d2b5afcc4f9e2d61e2b059e648f569b8dba9a Mon Sep 17 00:00:00 2001 From: Dong Jia Shi Date: Fri, 17 Mar 2017 04:17:34 +0100 Subject: vfio: ccw: introduce ccw_io_region To provide user-space a set of interfaces to: 1. pass in a ccw program to perform an I/O operation. 2. read back I/O results of the completed I/O operations. We introduce an MMIO region for the vfio-ccw device here. This region is defined to content: 1. areas to store arguments that an ssch required. 2. areas to store the I/O results. Using pwrite/pread to the device on this region, a user-space program could write/read data to/from the vfio-ccw device. Reviewed-by: Pierre Morel Signed-off-by: Dong Jia Shi Message-Id: <20170317031743.40128-8-bjsdjshi@linux.vnet.ibm.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_ops.c | 47 +++++++++++++++++++++++++++++++++++++ drivers/s390/cio/vfio_ccw_private.h | 4 ++++ include/uapi/linux/vfio_ccw.h | 24 +++++++++++++++++++ 3 files changed, 75 insertions(+) create mode 100644 include/uapi/linux/vfio_ccw.h (limited to 'include/uapi') diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index b8a2fed58f02..6c06805839d8 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -127,6 +127,51 @@ void vfio_ccw_mdev_release(struct mdev_device *mdev) &private->nb); } +static ssize_t vfio_ccw_mdev_read(struct mdev_device *mdev, + char __user *buf, + size_t count, + loff_t *ppos) +{ + struct vfio_ccw_private *private; + struct ccw_io_region *region; + + if (*ppos + count > sizeof(*region)) + return -EINVAL; + + private = dev_get_drvdata(mdev_parent_dev(mdev)); + if (!private) + return -ENODEV; + + region = &private->io_region; + if (copy_to_user(buf, (void *)region + *ppos, count)) + return -EFAULT; + + return count; +} + +static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev, + const char __user *buf, + size_t count, + loff_t *ppos) +{ + struct vfio_ccw_private *private; + struct ccw_io_region *region; + + if (*ppos + count > sizeof(*region)) + return -EINVAL; + + private = dev_get_drvdata(mdev_parent_dev(mdev)); + if (!private) + return -ENODEV; + + region = &private->io_region; + if (copy_from_user((void *)region + *ppos, buf, count)) + return -EFAULT; + region->ret_code = 0; + + return count; +} + static const struct mdev_parent_ops vfio_ccw_mdev_ops = { .owner = THIS_MODULE, .supported_type_groups = mdev_type_groups, @@ -134,6 +179,8 @@ static const struct mdev_parent_ops vfio_ccw_mdev_ops = { .remove = vfio_ccw_mdev_remove, .open = vfio_ccw_mdev_open, .release = vfio_ccw_mdev_release, + .read = vfio_ccw_mdev_read, + .write = vfio_ccw_mdev_write, }; int vfio_ccw_mdev_reg(struct subchannel *sch) diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index 5afb3ba5c7b5..359e96ba9c6c 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -10,6 +10,8 @@ #ifndef _VFIO_CCW_PRIVATE_H_ #define _VFIO_CCW_PRIVATE_H_ +#include + #include "css.h" /** @@ -19,6 +21,7 @@ * @avail: available for creating a mediated device * @mdev: pointer to the mediated device * @nb: notifier for vfio events + * @io_region: MMIO region to input/output I/O arguments/results */ struct vfio_ccw_private { struct subchannel *sch; @@ -26,6 +29,7 @@ struct vfio_ccw_private { atomic_t avail; struct mdev_device *mdev; struct notifier_block nb; + struct ccw_io_region io_region; } __aligned(8); extern int vfio_ccw_mdev_reg(struct subchannel *sch); diff --git a/include/uapi/linux/vfio_ccw.h b/include/uapi/linux/vfio_ccw.h new file mode 100644 index 000000000000..34a7f6f9e065 --- /dev/null +++ b/include/uapi/linux/vfio_ccw.h @@ -0,0 +1,24 @@ +/* + * Interfaces for vfio-ccw + * + * Copyright IBM Corp. 2017 + * + * Author(s): Dong Jia Shi + */ + +#ifndef _VFIO_CCW_H_ +#define _VFIO_CCW_H_ + +#include + +struct ccw_io_region { +#define ORB_AREA_SIZE 12 + __u8 orb_area[ORB_AREA_SIZE]; +#define SCSW_AREA_SIZE 12 + __u8 scsw_area[SCSW_AREA_SIZE]; +#define IRB_AREA_SIZE 96 + __u8 irb_area[IRB_AREA_SIZE]; + __u32 ret_code; +} __packed; + +#endif -- cgit v1.2.3-71-gd317 From e01bcdd61320c91c826376e0a7dd96ef8e85dd18 Mon Sep 17 00:00:00 2001 From: Dong Jia Shi Date: Fri, 17 Mar 2017 04:17:36 +0100 Subject: vfio: ccw: realize VFIO_DEVICE_GET_REGION_INFO ioctl Introduce device information about vfio-ccw: VFIO_DEVICE_FLAGS_CCW. Realize VFIO_DEVICE_GET_REGION_INFO ioctl for vfio-ccw. Reviewed-by: Pierre Morel Signed-off-by: Dong Jia Shi Acked-by: Alex Williamson Message-Id: <20170317031743.40128-10-bjsdjshi@linux.vnet.ibm.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_ops.c | 78 +++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/vfio.h | 11 ++++++ 2 files changed, 89 insertions(+) (limited to 'include/uapi') diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 878c88239fc8..f3300ddded3f 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -188,6 +188,83 @@ static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev, return count; } +static int vfio_ccw_mdev_get_device_info(struct vfio_device_info *info) +{ + info->flags = VFIO_DEVICE_FLAGS_CCW; + info->num_regions = VFIO_CCW_NUM_REGIONS; + info->num_irqs = 0; + + return 0; +} + +static int vfio_ccw_mdev_get_region_info(struct vfio_region_info *info, + u16 *cap_type_id, + void **cap_type) +{ + switch (info->index) { + case VFIO_CCW_CONFIG_REGION_INDEX: + info->offset = 0; + info->size = sizeof(struct ccw_io_region); + info->flags = VFIO_REGION_INFO_FLAG_READ + | VFIO_REGION_INFO_FLAG_WRITE; + return 0; + default: + return -EINVAL; + } +} + +static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev, + unsigned int cmd, + unsigned long arg) +{ + int ret = 0; + unsigned long minsz; + + switch (cmd) { + case VFIO_DEVICE_GET_INFO: + { + struct vfio_device_info info; + + minsz = offsetofend(struct vfio_device_info, num_irqs); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + ret = vfio_ccw_mdev_get_device_info(&info); + if (ret) + return ret; + + return copy_to_user((void __user *)arg, &info, minsz); + } + case VFIO_DEVICE_GET_REGION_INFO: + { + struct vfio_region_info info; + u16 cap_type_id = 0; + void *cap_type = NULL; + + minsz = offsetofend(struct vfio_region_info, offset); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + ret = vfio_ccw_mdev_get_region_info(&info, &cap_type_id, + &cap_type); + if (ret) + return ret; + + return copy_to_user((void __user *)arg, &info, minsz); + } + default: + return -ENOTTY; + } +} + static const struct mdev_parent_ops vfio_ccw_mdev_ops = { .owner = THIS_MODULE, .supported_type_groups = mdev_type_groups, @@ -197,6 +274,7 @@ static const struct mdev_parent_ops vfio_ccw_mdev_ops = { .release = vfio_ccw_mdev_release, .read = vfio_ccw_mdev_read, .write = vfio_ccw_mdev_write, + .ioctl = vfio_ccw_mdev_ioctl, }; int vfio_ccw_mdev_reg(struct subchannel *sch) diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 61837890c132..3fd70ffe9d78 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -198,6 +198,7 @@ struct vfio_device_info { #define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */ #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */ #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ +#define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */ __u32 num_regions; /* Max region index + 1 */ __u32 num_irqs; /* Max IRQ index + 1 */ }; @@ -447,6 +448,16 @@ enum { VFIO_PCI_NUM_IRQS }; +/* + * The vfio-ccw bus driver makes use of the following fixed region. + * Unimplemented regions return a size of zero. + */ + +enum { + VFIO_CCW_CONFIG_REGION_INDEX, + VFIO_CCW_NUM_REGIONS +}; + /** * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12, * struct vfio_pci_hot_reset_info) -- cgit v1.2.3-71-gd317 From 120e214e504fd6d3e33ec4b661193600b2faab95 Mon Sep 17 00:00:00 2001 From: Dong Jia Shi Date: Fri, 17 Mar 2017 04:17:38 +0100 Subject: vfio: ccw: realize VFIO_DEVICE_G(S)ET_IRQ_INFO ioctls Realize VFIO_DEVICE_GET_IRQ_INFO ioctl to retrieve VFIO_CCW_IO_IRQ information. Realize VFIO_DEVICE_SET_IRQS ioctl to set an eventfd fd for VFIO_CCW_IO_IRQ. Once a write operation to the ccw_io_region was performed, trigger a signal on this fd. Reviewed-by: Pierre Morel Signed-off-by: Dong Jia Shi Acked-by: Alex Williamson Message-Id: <20170317031743.40128-12-bjsdjshi@linux.vnet.ibm.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_ops.c | 123 +++++++++++++++++++++++++++++++++++- drivers/s390/cio/vfio_ccw_private.h | 4 ++ include/uapi/linux/vfio.h | 10 ++- 3 files changed, 134 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 125818cdf305..1294c5347410 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -202,6 +202,9 @@ static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev, if (region->ret_code != 0) return region->ret_code; + if (private->io_trigger) + eventfd_signal(private->io_trigger, 1); + return count; } @@ -209,7 +212,7 @@ static int vfio_ccw_mdev_get_device_info(struct vfio_device_info *info) { info->flags = VFIO_DEVICE_FLAGS_CCW | VFIO_DEVICE_FLAGS_RESET; info->num_regions = VFIO_CCW_NUM_REGIONS; - info->num_irqs = 0; + info->num_irqs = VFIO_CCW_NUM_IRQS; return 0; } @@ -230,6 +233,83 @@ static int vfio_ccw_mdev_get_region_info(struct vfio_region_info *info, } } +int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info) +{ + if (info->index != VFIO_CCW_IO_IRQ_INDEX) + return -EINVAL; + + info->count = 1; + info->flags = VFIO_IRQ_INFO_EVENTFD; + + return 0; +} + +static int vfio_ccw_mdev_set_irqs(struct mdev_device *mdev, + uint32_t flags, + void __user *data) +{ + struct vfio_ccw_private *private; + struct eventfd_ctx **ctx; + + if (!(flags & VFIO_IRQ_SET_ACTION_TRIGGER)) + return -EINVAL; + + private = dev_get_drvdata(mdev_parent_dev(mdev)); + if (!private) + return -ENODEV; + + ctx = &private->io_trigger; + + switch (flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { + case VFIO_IRQ_SET_DATA_NONE: + { + if (*ctx) + eventfd_signal(*ctx, 1); + return 0; + } + case VFIO_IRQ_SET_DATA_BOOL: + { + uint8_t trigger; + + if (get_user(trigger, (uint8_t __user *)data)) + return -EFAULT; + + if (trigger && *ctx) + eventfd_signal(*ctx, 1); + return 0; + } + case VFIO_IRQ_SET_DATA_EVENTFD: + { + int32_t fd; + + if (get_user(fd, (int32_t __user *)data)) + return -EFAULT; + + if (fd == -1) { + if (*ctx) + eventfd_ctx_put(*ctx); + *ctx = NULL; + } else if (fd >= 0) { + struct eventfd_ctx *efdctx; + + efdctx = eventfd_ctx_fdget(fd); + if (IS_ERR(efdctx)) + return PTR_ERR(efdctx); + + if (*ctx) + eventfd_ctx_put(*ctx); + + *ctx = efdctx; + } else + return -EINVAL; + + return 0; + } + default: + return -EINVAL; + } +} + static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev, unsigned int cmd, unsigned long arg) @@ -277,6 +357,47 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev, return copy_to_user((void __user *)arg, &info, minsz); } + case VFIO_DEVICE_GET_IRQ_INFO: + { + struct vfio_irq_info info; + + minsz = offsetofend(struct vfio_irq_info, count); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz || info.index >= VFIO_CCW_NUM_IRQS) + return -EINVAL; + + ret = vfio_ccw_mdev_get_irq_info(&info); + if (ret) + return ret; + + if (info.count == -1) + return -EINVAL; + + return copy_to_user((void __user *)arg, &info, minsz); + } + case VFIO_DEVICE_SET_IRQS: + { + struct vfio_irq_set hdr; + size_t data_size; + void __user *data; + + minsz = offsetofend(struct vfio_irq_set, count); + + if (copy_from_user(&hdr, (void __user *)arg, minsz)) + return -EFAULT; + + ret = vfio_set_irqs_validate_and_prepare(&hdr, 1, + VFIO_CCW_NUM_IRQS, + &data_size); + if (ret) + return ret; + + data = (void __user *)(arg + minsz); + return vfio_ccw_mdev_set_irqs(mdev, hdr.flags, data); + } case VFIO_DEVICE_RESET: return vfio_ccw_mdev_reset(mdev); default: diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index 79e53378f212..dddab52913ed 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -11,6 +11,7 @@ #define _VFIO_CCW_PRIVATE_H_ #include +#include #include #include "css.h" @@ -29,6 +30,7 @@ * @cp: channel program for the current I/O operation * @irb: irb info received from interrupt * @scsw: scsw info + * @io_trigger: eventfd ctx for signaling userspace I/O results */ struct vfio_ccw_private { struct subchannel *sch; @@ -43,6 +45,8 @@ struct vfio_ccw_private { struct channel_program cp; struct irb irb; union scsw scsw; + + struct eventfd_ctx *io_trigger; } __aligned(8); extern int vfio_ccw_mdev_reg(struct subchannel *sch); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 3fd70ffe9d78..ae461050661a 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -449,8 +449,9 @@ enum { }; /* - * The vfio-ccw bus driver makes use of the following fixed region. - * Unimplemented regions return a size of zero. + * The vfio-ccw bus driver makes use of the following fixed region and + * IRQ index mapping. Unimplemented regions return a size of zero. + * Unimplemented IRQ types return a count of zero. */ enum { @@ -458,6 +459,11 @@ enum { VFIO_CCW_NUM_REGIONS }; +enum { + VFIO_CCW_IO_IRQ_INDEX, + VFIO_CCW_NUM_IRQS +}; + /** * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12, * struct vfio_pci_hot_reset_info) -- cgit v1.2.3-71-gd317 From 2f9545cec6ffd490b08c5675c94c249f169a7e87 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 23 Mar 2017 15:26:16 -0700 Subject: drm/vmwgfx: Define an overlaid handle_close ioctl. Instead of providing an ioctl for each handle type, provide a single handle_close ioctl, and reuse the UNREF_DMABUF ioctl. Signed-off-by: Thomas Hellstrom Reviewed-by: Sinclair Yeh --- include/uapi/drm/vmwgfx_drm.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h index d325a4107916..d9dfde9aa757 100644 --- a/include/uapi/drm/vmwgfx_drm.h +++ b/include/uapi/drm/vmwgfx_drm.h @@ -41,6 +41,7 @@ extern "C" { #define DRM_VMW_GET_PARAM 0 #define DRM_VMW_ALLOC_DMABUF 1 #define DRM_VMW_UNREF_DMABUF 2 +#define DRM_VMW_HANDLE_CLOSE 2 #define DRM_VMW_CURSOR_BYPASS 3 /* guarded by DRM_VMW_PARAM_NUM_STREAMS != 0*/ #define DRM_VMW_CONTROL_STREAM 4 @@ -1092,6 +1093,29 @@ union drm_vmw_extended_context_arg { struct drm_vmw_context_arg rep; }; +/*************************************************************************/ +/* + * DRM_VMW_HANDLE_CLOSE - Close a user-space handle and release its + * underlying resource. + * + * Note that this ioctl is overlaid on the DRM_VMW_UNREF_DMABUF Ioctl. + * The ioctl arguments therefore need to be identical in layout. + * + */ + +/** + * struct drm_vmw_handle_close_arg + * + * @handle: Handle to close. + * + * Argument to the DRM_VMW_HANDLE_CLOSE Ioctl. + */ +struct drm_vmw_handle_close_arg { + __u32 handle; + __u32 pad64; +}; + + #if defined(__cplusplus) } #endif -- cgit v1.2.3-71-gd317 From 1cf1cae963c2e6032aebe1637e995bc2f5d330f4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 30 Mar 2017 21:45:38 -0700 Subject: bpf: introduce BPF_PROG_TEST_RUN command development and testing of networking bpf programs is quite cumbersome. Despite availability of user space bpf interpreters the kernel is the ultimate authority and execution environment. Current test frameworks for TC include creation of netns, veth, qdiscs and use of various packet generators just to test functionality of a bpf program. XDP testing is even more complicated, since qemu needs to be started with gro/gso disabled and precise queue configuration, transferring of xdp program from host into guest, attaching to virtio/eth0 and generating traffic from the host while capturing the results from the guest. Moreover analyzing performance bottlenecks in XDP program is impossible in virtio environment, since cost of running the program is tiny comparing to the overhead of virtio packet processing, so performance testing can only be done on physical nic with another server generating traffic. Furthermore ongoing changes to user space control plane of production applications cannot be run on the test servers leaving bpf programs stubbed out for testing. Last but not least, the upstream llvm changes are validated by the bpf backend testsuite which has no ability to test the code generated. To improve this situation introduce BPF_PROG_TEST_RUN command to test and performance benchmark bpf programs. Joint work with Daniel Borkmann. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/bpf.h | 7 ++ include/uapi/linux/bpf.h | 12 ++++ kernel/bpf/syscall.c | 27 +++++++- net/Makefile | 2 +- net/bpf/Makefile | 1 + net/bpf/test_run.c | 172 +++++++++++++++++++++++++++++++++++++++++++++++ net/core/filter.c | 5 ++ 7 files changed, 223 insertions(+), 3 deletions(-) create mode 100644 net/bpf/Makefile create mode 100644 net/bpf/test_run.c (limited to 'include/uapi') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2ae39a3e9ead..bbb513da5075 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -169,6 +169,8 @@ struct bpf_verifier_ops { const struct bpf_insn *src, struct bpf_insn *dst, struct bpf_prog *prog); + int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); }; struct bpf_prog_type_list { @@ -233,6 +235,11 @@ typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); +int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); +int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 28317a04c34d..a1d95386f562 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -81,6 +81,7 @@ enum bpf_cmd { BPF_OBJ_GET, BPF_PROG_ATTACH, BPF_PROG_DETACH, + BPF_PROG_TEST_RUN, }; enum bpf_map_type { @@ -189,6 +190,17 @@ union bpf_attr { __u32 attach_type; __u32 attach_flags; }; + + struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ + __u32 prog_fd; + __u32 retval; + __u32 data_size_in; + __u32 data_size_out; + __aligned_u64 data_in; + __aligned_u64 data_out; + __u32 repeat; + __u32 duration; + } test; } __attribute__((aligned(8))); /* BPF helper function descriptions: diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c35ebfe6d84d..ab0cf4c43690 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -973,6 +973,28 @@ static int bpf_prog_detach(const union bpf_attr *attr) } #endif /* CONFIG_CGROUP_BPF */ +#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration + +static int bpf_prog_test_run(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + struct bpf_prog *prog; + int ret = -ENOTSUPP; + + if (CHECK_ATTR(BPF_PROG_TEST_RUN)) + return -EINVAL; + + prog = bpf_prog_get(attr->test.prog_fd); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + if (prog->aux->ops->test_run) + ret = prog->aux->ops->test_run(prog, attr, uattr); + + bpf_prog_put(prog); + return ret; +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { union bpf_attr attr = {}; @@ -1039,7 +1061,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_OBJ_GET: err = bpf_obj_get(&attr); break; - #ifdef CONFIG_CGROUP_BPF case BPF_PROG_ATTACH: err = bpf_prog_attach(&attr); @@ -1048,7 +1069,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz err = bpf_prog_detach(&attr); break; #endif - + case BPF_PROG_TEST_RUN: + err = bpf_prog_test_run(&attr, uattr); + break; default: err = -EINVAL; break; diff --git a/net/Makefile b/net/Makefile index 9b681550e3a3..9086ffbb5085 100644 --- a/net/Makefile +++ b/net/Makefile @@ -12,7 +12,7 @@ obj-$(CONFIG_NET) += $(tmp-y) # LLC has to be linked before the files in net/802/ obj-$(CONFIG_LLC) += llc/ -obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ +obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_XFRM) += xfrm/ diff --git a/net/bpf/Makefile b/net/bpf/Makefile new file mode 100644 index 000000000000..27b2992a0692 --- /dev/null +++ b/net/bpf/Makefile @@ -0,0 +1 @@ +obj-y := test_run.o diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c new file mode 100644 index 000000000000..8a6d0a37c30c --- /dev/null +++ b/net/bpf/test_run.c @@ -0,0 +1,172 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include + +static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx) +{ + u32 ret; + + preempt_disable(); + rcu_read_lock(); + ret = BPF_PROG_RUN(prog, ctx); + rcu_read_unlock(); + preempt_enable(); + + return ret; +} + +static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time) +{ + u64 time_start, time_spent = 0; + u32 ret = 0, i; + + if (!repeat) + repeat = 1; + time_start = ktime_get_ns(); + for (i = 0; i < repeat; i++) { + ret = bpf_test_run_one(prog, ctx); + if (need_resched()) { + if (signal_pending(current)) + break; + time_spent += ktime_get_ns() - time_start; + cond_resched(); + time_start = ktime_get_ns(); + } + } + time_spent += ktime_get_ns() - time_start; + do_div(time_spent, repeat); + *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; + + return ret; +} + +static int bpf_test_finish(union bpf_attr __user *uattr, const void *data, + u32 size, u32 retval, u32 duration) +{ + void __user *data_out = u64_to_user_ptr(uattr->test.data_out); + int err = -EFAULT; + + if (data_out && copy_to_user(data_out, data, size)) + goto out; + if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size))) + goto out; + if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval))) + goto out; + if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration))) + goto out; + err = 0; +out: + return err; +} + +static void *bpf_test_init(const union bpf_attr *kattr, u32 size, + u32 headroom, u32 tailroom) +{ + void __user *data_in = u64_to_user_ptr(kattr->test.data_in); + void *data; + + if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom) + return ERR_PTR(-EINVAL); + + data = kzalloc(size + headroom + tailroom, GFP_USER); + if (!data) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(data + headroom, data_in, size)) { + kfree(data); + return ERR_PTR(-EFAULT); + } + return data; +} + +int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + bool is_l2 = false, is_direct_pkt_access = false; + u32 size = kattr->test.data_size_in; + u32 repeat = kattr->test.repeat; + u32 retval, duration; + struct sk_buff *skb; + void *data; + int ret; + + data = bpf_test_init(kattr, size, NET_SKB_PAD, + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + if (IS_ERR(data)) + return PTR_ERR(data); + + switch (prog->type) { + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: + is_l2 = true; + /* fall through */ + case BPF_PROG_TYPE_LWT_IN: + case BPF_PROG_TYPE_LWT_OUT: + case BPF_PROG_TYPE_LWT_XMIT: + is_direct_pkt_access = true; + break; + default: + break; + } + + skb = build_skb(data, 0); + if (!skb) { + kfree(data); + return -ENOMEM; + } + + skb_reserve(skb, NET_SKB_PAD); + __skb_put(skb, size); + skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev); + skb_reset_network_header(skb); + + if (is_l2) + __skb_push(skb, ETH_HLEN); + if (is_direct_pkt_access) + bpf_compute_data_end(skb); + retval = bpf_test_run(prog, skb, repeat, &duration); + if (!is_l2) + __skb_push(skb, ETH_HLEN); + size = skb->len; + /* bpf program can never convert linear skb to non-linear */ + if (WARN_ON_ONCE(skb_is_nonlinear(skb))) + size = skb_headlen(skb); + ret = bpf_test_finish(uattr, skb->data, size, retval, duration); + kfree_skb(skb); + return ret; +} + +int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + u32 size = kattr->test.data_size_in; + u32 repeat = kattr->test.repeat; + struct xdp_buff xdp = {}; + u32 retval, duration; + void *data; + int ret; + + data = bpf_test_init(kattr, size, XDP_PACKET_HEADROOM, 0); + if (IS_ERR(data)) + return PTR_ERR(data); + + xdp.data_hard_start = data; + xdp.data = data + XDP_PACKET_HEADROOM; + xdp.data_end = xdp.data + size; + + retval = bpf_test_run(prog, &xdp, repeat, &duration); + if (xdp.data != data + XDP_PACKET_HEADROOM) + size = xdp.data_end - xdp.data; + ret = bpf_test_finish(uattr, xdp.data, size, retval, duration); + kfree(data); + return ret; +} diff --git a/net/core/filter.c b/net/core/filter.c index dfb9f61a2fd5..15e9a81ffebe 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3309,24 +3309,28 @@ static const struct bpf_verifier_ops tc_cls_act_ops = { .is_valid_access = tc_cls_act_is_valid_access, .convert_ctx_access = tc_cls_act_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, + .test_run = bpf_prog_test_run_skb, }; static const struct bpf_verifier_ops xdp_ops = { .get_func_proto = xdp_func_proto, .is_valid_access = xdp_is_valid_access, .convert_ctx_access = xdp_convert_ctx_access, + .test_run = bpf_prog_test_run_xdp, }; static const struct bpf_verifier_ops cg_skb_ops = { .get_func_proto = cg_skb_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, + .test_run = bpf_prog_test_run_skb, }; static const struct bpf_verifier_ops lwt_inout_ops = { .get_func_proto = lwt_inout_func_proto, .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, + .test_run = bpf_prog_test_run_skb, }; static const struct bpf_verifier_ops lwt_xmit_ops = { @@ -3334,6 +3338,7 @@ static const struct bpf_verifier_ops lwt_xmit_ops = { .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, + .test_run = bpf_prog_test_run_skb, }; static const struct bpf_verifier_ops cg_sock_ops = { -- cgit v1.2.3-71-gd317 From 47071aee6a1956524b9929b3b821f6d2f8cae23c Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 31 Mar 2017 18:32:10 +0100 Subject: statx: Reserve the top bit of the mask for future struct expansion Reserve the top bit of the mask for future expansion of the statx struct and give an error if statx() sees it set. All the other bits are ignored if we see them set but don't support the bit; we just clear the bit in the returned mask. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/stat.c | 2 ++ include/uapi/linux/stat.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/fs/stat.c b/fs/stat.c index ab27f2868588..0c7e6cdc435c 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -562,6 +562,8 @@ SYSCALL_DEFINE5(statx, struct kstat stat; int error; + if (mask & STATX__RESERVED) + return -EINVAL; if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) return -EINVAL; diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 51a6b86e3700..0869b9eaa8ce 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -152,6 +152,7 @@ struct statx { #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ #define STATX_ALL 0x00000fffU /* All currently supported flags */ +#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ /* * Attributes to be found in stx_attributes -- cgit v1.2.3-71-gd317 From 3209f68b3ca4667069923a325c88b21131bfdf9f Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 31 Mar 2017 18:32:17 +0100 Subject: statx: Include a mask for stx_attributes in struct statx Include a mask in struct stat to indicate which bits of stx_attributes the filesystem actually supports. This would also be useful if we add another system call that allows you to do a 'bulk attribute set' and pass in a statx struct with the masks appropriately set to say what you want to set. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/ext4/inode.c | 6 ++++++ fs/stat.c | 1 + include/linux/stat.h | 1 + include/uapi/linux/stat.h | 4 ++-- samples/statx/test-statx.c | 12 ++++++++---- 5 files changed, 18 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5d02b922afa3..b9ffa9f4191f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5413,6 +5413,12 @@ int ext4_getattr(const struct path *path, struct kstat *stat, if (flags & EXT4_NODUMP_FL) stat->attributes |= STATX_ATTR_NODUMP; + stat->attributes_mask |= (STATX_ATTR_APPEND | + STATX_ATTR_COMPRESSED | + STATX_ATTR_ENCRYPTED | + STATX_ATTR_IMMUTABLE | + STATX_ATTR_NODUMP); + generic_fillattr(inode, stat); return 0; } diff --git a/fs/stat.c b/fs/stat.c index 0c7e6cdc435c..c6c963b2546b 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -527,6 +527,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer) tmp.stx_ino = stat->ino; tmp.stx_size = stat->size; tmp.stx_blocks = stat->blocks; + tmp.stx_attributes_mask = stat->attributes_mask; tmp.stx_atime.tv_sec = stat->atime.tv_sec; tmp.stx_atime.tv_nsec = stat->atime.tv_nsec; tmp.stx_btime.tv_sec = stat->btime.tv_sec; diff --git a/include/linux/stat.h b/include/linux/stat.h index c76e524fb34b..64b6b3aece21 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -26,6 +26,7 @@ struct kstat { unsigned int nlink; uint32_t blksize; /* Preferred I/O size */ u64 attributes; + u64 attributes_mask; #define KSTAT_ATTR_FS_IOC_FLAGS \ (STATX_ATTR_COMPRESSED | \ STATX_ATTR_IMMUTABLE | \ diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 0869b9eaa8ce..d538897b8e08 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -114,7 +114,7 @@ struct statx { __u64 stx_ino; /* Inode number */ __u64 stx_size; /* File size */ __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 __spare1[1]; + __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ /* 0x40 */ struct statx_timestamp stx_atime; /* Last access time */ struct statx_timestamp stx_btime; /* File creation time */ @@ -155,7 +155,7 @@ struct statx { #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ /* - * Attributes to be found in stx_attributes + * Attributes to be found in stx_attributes and masked in stx_attributes_mask. * * These give information about the features or the state of a file that might * be of use to ordinary userspace programs such as GUIs or ls rather than diff --git a/samples/statx/test-statx.c b/samples/statx/test-statx.c index 8571d766331d..d4d77b09412c 100644 --- a/samples/statx/test-statx.c +++ b/samples/statx/test-statx.c @@ -141,8 +141,8 @@ static void dump_statx(struct statx *stx) if (stx->stx_mask & STATX_BTIME) print_time(" Birth: ", &stx->stx_btime); - if (stx->stx_attributes) { - unsigned char bits; + if (stx->stx_attributes_mask) { + unsigned char bits, mbits; int loop, byte; static char attr_representation[64 + 1] = @@ -160,14 +160,18 @@ static void dump_statx(struct statx *stx) printf("Attributes: %016llx (", stx->stx_attributes); for (byte = 64 - 8; byte >= 0; byte -= 8) { bits = stx->stx_attributes >> byte; + mbits = stx->stx_attributes_mask >> byte; for (loop = 7; loop >= 0; loop--) { int bit = byte + loop; - if (bits & 0x80) + if (!(mbits & 0x80)) + putchar('.'); /* Not supported */ + else if (bits & 0x80) putchar(attr_representation[63 - bit]); else - putchar('-'); + putchar('-'); /* Not set */ bits <<= 1; + mbits <<= 1; } if (byte) putchar(' '); -- cgit v1.2.3-71-gd317 From d0353118fd589c127875290017c7fdd266937bee Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 3 Apr 2017 16:42:34 +0200 Subject: media: uapi: Add RGB and YUV bus formats for Synopsys HDMI TX Controller In order to describe the RGB and YUV bus formats used to feed the Synopsys DesignWare HDMI TX Controller, add missing formats to the list of Bus Formats. Documentation for these formats is added in a separate patch. Reviewed-by: Archit Taneja Reviewed-by: Jose Abreu Acked-by: Hans Verkuil Acked-by: Mauro Carvalho Chehab Signed-off-by: Neil Armstrong Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1491230558-10804-3-git-send-email-narmstrong@baylibre.com --- include/uapi/linux/media-bus-format.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h index 2168759c1287..ef6fb307d2ce 100644 --- a/include/uapi/linux/media-bus-format.h +++ b/include/uapi/linux/media-bus-format.h @@ -33,7 +33,7 @@ #define MEDIA_BUS_FMT_FIXED 0x0001 -/* RGB - next is 0x1018 */ +/* RGB - next is 0x101b */ #define MEDIA_BUS_FMT_RGB444_1X12 0x1016 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE 0x1001 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE 0x1002 @@ -57,8 +57,11 @@ #define MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA 0x1012 #define MEDIA_BUS_FMT_ARGB8888_1X32 0x100d #define MEDIA_BUS_FMT_RGB888_1X32_PADHI 0x100f +#define MEDIA_BUS_FMT_RGB101010_1X30 0x1018 +#define MEDIA_BUS_FMT_RGB121212_1X36 0x1019 +#define MEDIA_BUS_FMT_RGB161616_1X48 0x101a -/* YUV (including grey) - next is 0x2026 */ +/* YUV (including grey) - next is 0x202c */ #define MEDIA_BUS_FMT_Y8_1X8 0x2001 #define MEDIA_BUS_FMT_UV8_1X8 0x2015 #define MEDIA_BUS_FMT_UYVY8_1_5X8 0x2002 @@ -90,12 +93,18 @@ #define MEDIA_BUS_FMT_YVYU10_1X20 0x200e #define MEDIA_BUS_FMT_VUY8_1X24 0x2024 #define MEDIA_BUS_FMT_YUV8_1X24 0x2025 +#define MEDIA_BUS_FMT_UYYVYY8_0_5X24 0x2026 #define MEDIA_BUS_FMT_UYVY12_1X24 0x2020 #define MEDIA_BUS_FMT_VYUY12_1X24 0x2021 #define MEDIA_BUS_FMT_YUYV12_1X24 0x2022 #define MEDIA_BUS_FMT_YVYU12_1X24 0x2023 #define MEDIA_BUS_FMT_YUV10_1X30 0x2016 +#define MEDIA_BUS_FMT_UYYVYY10_0_5X30 0x2027 #define MEDIA_BUS_FMT_AYUV8_1X32 0x2017 +#define MEDIA_BUS_FMT_UYYVYY12_0_5X36 0x2028 +#define MEDIA_BUS_FMT_YUV12_1X36 0x2029 +#define MEDIA_BUS_FMT_YUV16_1X48 0x202a +#define MEDIA_BUS_FMT_UYYVYY16_0_5X48 0x202b /* Bayer - next is 0x3021 */ #define MEDIA_BUS_FMT_SBGGR8_1X8 0x3001 -- cgit v1.2.3-71-gd317 From d229d48d183fbc1391908decc7d2bcf09ca2f38f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 1 Apr 2017 17:07:46 +0800 Subject: sctp: add SCTP_PR_STREAM_STATUS sockopt for prsctp Before when implementing sctp prsctp, SCTP_PR_STREAM_STATUS wasn't added, as it needs to save abandoned_(un)sent for every stream. After sctp stream reconf is added in sctp, assoc has structure sctp_stream_out to save per stream info. This patch is to add SCTP_PR_STREAM_STATUS by putting the prsctp per stream statistics into sctp_stream_out. v1->v2: fix an indent issue. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 ++ include/uapi/linux/sctp.h | 1 + net/sctp/chunk.c | 14 +++++++++-- net/sctp/outqueue.c | 10 ++++++++ net/sctp/socket.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 84 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 592decebac75..3e61a54424a1 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1315,6 +1315,8 @@ struct sctp_inithdr_host { struct sctp_stream_out { __u16 ssn; __u8 state; + __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; + __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; }; struct sctp_stream_in { diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 7212870ef5d7..ced9d8b97426 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -115,6 +115,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_PR_SUPPORTED 113 #define SCTP_DEFAULT_PRINFO 114 #define SCTP_PR_ASSOC_STATUS 115 +#define SCTP_PR_STREAM_STATUS 116 #define SCTP_RECONFIG_SUPPORTED 117 #define SCTP_ENABLE_STREAM_RESET 118 #define SCTP_RESET_STREAMS 119 diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index e3621cb4827f..697721a7a3f1 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -306,14 +306,24 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && time_after(jiffies, chunk->msg->expires_at)) { - if (chunk->sent_count) + struct sctp_stream_out *streamout = + &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream]; + + if (chunk->sent_count) { chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; - else + streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++; + } else { chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; + streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; + } return 1; } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && chunk->sent_count > chunk->sinfo.sinfo_timetolive) { + struct sctp_stream_out *streamout = + &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream]; + chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; + streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++; return 1; } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && chunk->msg->expires_at && diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 025ccff67072..3f78d7f06e14 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -353,6 +353,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, struct sctp_chunk *chk, *temp; list_for_each_entry_safe(chk, temp, queue, transmitted_list) { + struct sctp_stream_out *streamout; + if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; @@ -361,8 +363,10 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, sctp_insert_list(&asoc->outqueue.abandoned, &chk->transmitted_list); + streamout = &asoc->stream->out[chk->sinfo.sinfo_stream]; asoc->sent_cnt_removable--; asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; + streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; if (!chk->tsn_gap_acked) { if (chk->transport) @@ -396,6 +400,12 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, q->out_qlen -= chk->skb->len; asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; + if (chk->sinfo.sinfo_stream < asoc->stream->outcnt) { + struct sctp_stream_out *streamout = + &asoc->stream->out[chk->sinfo.sinfo_stream]; + + streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; + } msg_len -= SCTP_DATA_SNDSIZE(chk) + sizeof(struct sk_buff) + diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ccc08fc39722..6489446925e6 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6576,6 +6576,61 @@ out: return retval; } +static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_stream_out *streamout; + struct sctp_association *asoc; + struct sctp_prstatus params; + int retval = -EINVAL; + int policy; + + if (len < sizeof(params)) + goto out; + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) { + retval = -EFAULT; + goto out; + } + + policy = params.sprstat_policy; + if (policy & ~SCTP_PR_SCTP_MASK) + goto out; + + asoc = sctp_id2assoc(sk, params.sprstat_assoc_id); + if (!asoc || params.sprstat_sid >= asoc->stream->outcnt) + goto out; + + streamout = &asoc->stream->out[params.sprstat_sid]; + if (policy == SCTP_PR_SCTP_NONE) { + params.sprstat_abandoned_unsent = 0; + params.sprstat_abandoned_sent = 0; + for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { + params.sprstat_abandoned_unsent += + streamout->abandoned_unsent[policy]; + params.sprstat_abandoned_sent += + streamout->abandoned_sent[policy]; + } + } else { + params.sprstat_abandoned_unsent = + streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)]; + params.sprstat_abandoned_sent = + streamout->abandoned_sent[__SCTP_PR_INDEX(policy)]; + } + + if (put_user(len, optlen) || copy_to_user(optval, ¶ms, len)) { + retval = -EFAULT; + goto out; + } + + retval = 0; + +out: + return retval; +} + static int sctp_getsockopt_reconfig_supported(struct sock *sk, int len, char __user *optval, int __user *optlen) @@ -6825,6 +6880,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_pr_assocstatus(sk, len, optval, optlen); break; + case SCTP_PR_STREAM_STATUS: + retval = sctp_getsockopt_pr_streamstatus(sk, len, optval, + optlen); + break; case SCTP_RECONFIG_SUPPORTED: retval = sctp_getsockopt_reconfig_supported(sk, len, optval, optlen); -- cgit v1.2.3-71-gd317 From 80c9f490f344be7999f57fc31a8ed956f8c65f3b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 28 Mar 2017 14:56:34 -0700 Subject: vfs: add common GETFSMAP ioctl definitions Add the GETFSMAP headers to the VFS kernel headers Signed-off-by: Darrick J. Wong --- include/uapi/linux/fsmap.h | 112 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 include/uapi/linux/fsmap.h (limited to 'include/uapi') diff --git a/include/uapi/linux/fsmap.h b/include/uapi/linux/fsmap.h new file mode 100644 index 000000000000..7e8e5f0bd6d2 --- /dev/null +++ b/include/uapi/linux/fsmap.h @@ -0,0 +1,112 @@ +/* + * FS_IOC_GETFSMAP ioctl infrastructure. + * + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong + */ +#ifndef _LINUX_FSMAP_H +#define _LINUX_FSMAP_H + +#include + +/* + * Structure for FS_IOC_GETFSMAP. + * + * The memory layout for this call are the scalar values defined in + * struct fsmap_head, followed by two struct fsmap that describe + * the lower and upper bound of mappings to return, followed by an + * array of struct fsmap mappings. + * + * fmh_iflags control the output of the call, whereas fmh_oflags report + * on the overall record output. fmh_count should be set to the + * length of the fmh_recs array, and fmh_entries will be set to the + * number of entries filled out during each call. If fmh_count is + * zero, the number of reverse mappings will be returned in + * fmh_entries, though no mappings will be returned. fmh_reserved + * must be set to zero. + * + * The two elements in the fmh_keys array are used to constrain the + * output. The first element in the array should represent the + * lowest disk mapping ("low key") that the user wants to learn + * about. If this value is all zeroes, the filesystem will return + * the first entry it knows about. For a subsequent call, the + * contents of fsmap_head.fmh_recs[fsmap_head.fmh_count - 1] should be + * copied into fmh_keys[0] to have the kernel start where it left off. + * + * The second element in the fmh_keys array should represent the + * highest disk mapping ("high key") that the user wants to learn + * about. If this value is all ones, the filesystem will not stop + * until it runs out of mapping to return or runs out of space in + * fmh_recs. + * + * fmr_device can be either a 32-bit cookie representing a device, or + * a 32-bit dev_t if the FMH_OF_DEV_T flag is set. fmr_physical, + * fmr_offset, and fmr_length are expressed in units of bytes. + * fmr_owner is either an inode number, or a special value if + * FMR_OF_SPECIAL_OWNER is set in fmr_flags. + */ +struct fsmap { + __u32 fmr_device; /* device id */ + __u32 fmr_flags; /* mapping flags */ + __u64 fmr_physical; /* device offset of segment */ + __u64 fmr_owner; /* owner id */ + __u64 fmr_offset; /* file offset of segment */ + __u64 fmr_length; /* length of segment */ + __u64 fmr_reserved[3]; /* must be zero */ +}; + +struct fsmap_head { + __u32 fmh_iflags; /* control flags */ + __u32 fmh_oflags; /* output flags */ + __u32 fmh_count; /* # of entries in array incl. input */ + __u32 fmh_entries; /* # of entries filled in (output). */ + __u64 fmh_reserved[6]; /* must be zero */ + + struct fsmap fmh_keys[2]; /* low and high keys for the mapping search */ + struct fsmap fmh_recs[]; /* returned records */ +}; + +/* Size of an fsmap_head with room for nr records. */ +static inline size_t +fsmap_sizeof( + unsigned int nr) +{ + return sizeof(struct fsmap_head) + nr * sizeof(struct fsmap); +} + +/* Start the next fsmap query at the end of the current query results. */ +static inline void +fsmap_advance( + struct fsmap_head *head) +{ + head->fmh_keys[0] = head->fmh_recs[head->fmh_entries - 1]; +} + +/* fmh_iflags values - set by FS_IOC_GETFSMAP caller in the header. */ +/* no flags defined yet */ +#define FMH_IF_VALID 0 + +/* fmh_oflags values - returned in the header segment only. */ +#define FMH_OF_DEV_T 0x1 /* fmr_device values will be dev_t */ + +/* fmr_flags values - returned for each non-header segment */ +#define FMR_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ +#define FMR_OF_ATTR_FORK 0x2 /* segment = attribute fork */ +#define FMR_OF_EXTENT_MAP 0x4 /* segment = extent map */ +#define FMR_OF_SHARED 0x8 /* segment = shared with another file */ +#define FMR_OF_SPECIAL_OWNER 0x10 /* owner is a special value */ +#define FMR_OF_LAST 0x20 /* segment is the last in the FS */ + +/* Each FS gets to define its own special owner codes. */ +#define FMR_OWNER(type, code) (((__u64)type << 32) | \ + ((__u64)code & 0xFFFFFFFFULL)) +#define FMR_OWNER_TYPE(owner) ((__u32)((__u64)owner >> 32)) +#define FMR_OWNER_CODE(owner) ((__u32)(((__u64)owner & 0xFFFFFFFFULL))) +#define FMR_OWN_FREE FMR_OWNER(0, 1) /* free space */ +#define FMR_OWN_UNKNOWN FMR_OWNER(0, 2) /* unknown owner */ +#define FMR_OWN_METADATA FMR_OWNER(0, 3) /* metadata */ + +#define FS_IOC_GETFSMAP _IOWR('X', 59, struct fsmap_head) + +#endif /* _LINUX_FSMAP_H */ -- cgit v1.2.3-71-gd317 From 5db06a8a98f515f67446a69c57577c4c363ec65d Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Tue, 4 Apr 2017 17:52:21 +0100 Subject: drm: Pass CRTC ID in userspace vblank events With the atomic API, it is possible that a single commit affects multiple crtcs. If the user requests an event with that commit, one event will be sent for each CRTC, but it is not possible to distinguish which crtc an event is for in user space. To solve this, the reserved field in struct drm_vblank_event is repurposed to include the crtc_id which the event is for. The DRM_CAP_CRTC_IN_VBLANK_EVENT is added to allow userspace to query if the crtc field will be set properly. [daniels: Rebased, using Maarten's forward-port.] Signed-off-by: Ander Conselvan de Oliveira Signed-off-by: Daniel Stone Cc: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170404165221.28240-2-daniels@collabora.com --- drivers/gpu/drm/drm_ioctl.c | 3 +++ drivers/gpu/drm/drm_irq.c | 2 ++ include/uapi/drm/drm.h | 3 ++- 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 9f4241f0dd9c..865e3ee4d743 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -286,6 +286,9 @@ static int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_ case DRM_CAP_ADDFB2_MODIFIERS: req->value = dev->mode_config.allow_fb_modifiers; break; + case DRM_CAP_CRTC_IN_VBLANK_EVENT: + req->value = 1; + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index dac1b2593cb1..8c866cac62dd 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1026,6 +1026,7 @@ void drm_crtc_arm_vblank_event(struct drm_crtc *crtc, e->pipe = pipe; e->event.sequence = drm_vblank_count(dev, pipe); + e->event.crtc_id = crtc->base.id; list_add_tail(&e->base.link, &dev->vblank_event_list); } EXPORT_SYMBOL(drm_crtc_arm_vblank_event); @@ -1056,6 +1057,7 @@ void drm_crtc_send_vblank_event(struct drm_crtc *crtc, now = get_drm_timestamp(); } e->pipe = pipe; + e->event.crtc_id = crtc->base.id; send_vblank_event(dev, e, seq, &now); } EXPORT_SYMBOL(drm_crtc_send_vblank_event); diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index b2c52843bc70..42d9f64ce416 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -647,6 +647,7 @@ struct drm_gem_open { #define DRM_CAP_CURSOR_HEIGHT 0x9 #define DRM_CAP_ADDFB2_MODIFIERS 0x10 #define DRM_CAP_PAGE_FLIP_TARGET 0x11 +#define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 /** DRM_IOCTL_GET_CAP ioctl argument type */ struct drm_get_cap { @@ -851,7 +852,7 @@ struct drm_event_vblank { __u32 tv_sec; __u32 tv_usec; __u32 sequence; - __u32 reserved; + __u32 crtc_id; /* 0 on older kernels that do not support this */ }; /* typedef area */ -- cgit v1.2.3-71-gd317 From 6563c91fd645556c7801748f15bc727c77fcd311 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Wed, 1 Mar 2017 16:44:09 -0800 Subject: KEYS: Add KEYCTL_RESTRICT_KEYRING Keyrings recently gained restrict_link capabilities that allow individual keys to be validated prior to linking. This functionality was only available using internal kernel APIs. With the KEYCTL_RESTRICT_KEYRING command existing keyrings can be configured to check the content of keys before they are linked, and then allow or disallow linkage of that key to the keyring. To restrict a keyring, call: keyctl(KEYCTL_RESTRICT_KEYRING, key_serial_t keyring, const char *type, const char *restriction) where 'type' is the name of a registered key type and 'restriction' is a string describing how key linkage is to be restricted. The restriction option syntax is specific to each key type. Signed-off-by: Mat Martineau --- Documentation/security/keys.txt | 25 ++++++++++ include/linux/key.h | 6 ++- include/uapi/linux/keyctl.h | 1 + security/keys/compat.c | 4 ++ security/keys/internal.h | 3 ++ security/keys/keyctl.c | 58 ++++++++++++++++++++++ security/keys/keyring.c | 105 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 201 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt index 5fe04a7cc03d..5f554aab8751 100644 --- a/Documentation/security/keys.txt +++ b/Documentation/security/keys.txt @@ -857,6 +857,31 @@ The keyctl syscall functions are: supported, error ENOKEY if the key could not be found, or error EACCES if the key is not readable by the caller. + (*) Restrict keyring linkage + + long keyctl(KEYCTL_RESTRICT_KEYRING, key_serial_t keyring, + const char *type, const char *restriction); + + An existing keyring can restrict linkage of additional keys by evaluating + the contents of the key according to a restriction scheme. + + "keyring" is the key ID for an existing keyring to apply a restriction + to. It may be empty or may already have keys linked. Existing linked keys + will remain in the keyring even if the new restriction would reject them. + + "type" is a registered key type. + + "restriction" is a string describing how key linkage is to be restricted. + The format varies depending on the key type, and the string is passed to + the lookup_restriction() function for the requested type. It may specify + a method and relevant data for the restriction such as signature + verification or constraints on key payload. If the requested key type is + later unregistered, no keys may be added to the keyring after the key type + is removed. + + To apply a keyring restriction the process must have Set Attribute + permission and the keyring must not be previously restricted. + =============== KERNEL SERVICES =============== diff --git a/include/linux/key.h b/include/linux/key.h index d2916363689c..0c9b93b0d1f7 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -219,7 +219,8 @@ struct key { /* This is set on a keyring to restrict the addition of a link to a key * to it. If this structure isn't provided then it is assumed that the * keyring is open to any addition. It is ignored for non-keyring - * keys. + * keys. Only set this value using keyring_restrict(), keyring_alloc(), + * or key_alloc(). * * This is intended for use with rings of trusted keys whereby addition * to the keyring needs to be controlled. KEY_ALLOC_BYPASS_RESTRICTION @@ -328,6 +329,9 @@ extern key_ref_t keyring_search(key_ref_t keyring, extern int keyring_add_key(struct key *keyring, struct key *key); +extern int keyring_restrict(key_ref_t keyring, const char *type, + const char *restriction); + extern struct key *key_lookup(key_serial_t id); static inline key_serial_t key_serial(const struct key *key) diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index 86eddd6241f3..ff79c44e49a3 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -60,6 +60,7 @@ #define KEYCTL_INVALIDATE 21 /* invalidate a key */ #define KEYCTL_GET_PERSISTENT 22 /* get a user's persistent keyring */ #define KEYCTL_DH_COMPUTE 23 /* Compute Diffie-Hellman values */ +#define KEYCTL_RESTRICT_KEYRING 29 /* Restrict keys allowed to link to a keyring */ /* keyctl structures */ struct keyctl_dh_params { diff --git a/security/keys/compat.c b/security/keys/compat.c index 36c80bf5b89c..bb98f2b8dd7d 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -136,6 +136,10 @@ COMPAT_SYSCALL_DEFINE5(keyctl, u32, option, return keyctl_dh_compute(compat_ptr(arg2), compat_ptr(arg3), arg4, compat_ptr(arg5)); + case KEYCTL_RESTRICT_KEYRING: + return keyctl_restrict_keyring(arg2, compat_ptr(arg3), + compat_ptr(arg4)); + default: return -EOPNOTSUPP; } diff --git a/security/keys/internal.h b/security/keys/internal.h index 24762ae9a198..6ce016314897 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -252,6 +252,9 @@ struct iov_iter; extern long keyctl_instantiate_key_common(key_serial_t, struct iov_iter *, key_serial_t); +extern long keyctl_restrict_keyring(key_serial_t id, + const char __user *_type, + const char __user *_restriction); #ifdef CONFIG_PERSISTENT_KEYRINGS extern long keyctl_get_persistent(uid_t, key_serial_t); extern unsigned persistent_keyring_expiry; diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 52c34532c785..6ee2826a2d06 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1582,6 +1582,59 @@ error_keyring: return ret; } +/* + * Apply a restriction to a given keyring. + * + * The caller must have Setattr permission to change keyring restrictions. + * + * The requested type name may be a NULL pointer to reject all attempts + * to link to the keyring. If _type is non-NULL, _restriction can be + * NULL or a pointer to a string describing the restriction. If _type is + * NULL, _restriction must also be NULL. + * + * Returns 0 if successful. + */ +long keyctl_restrict_keyring(key_serial_t id, const char __user *_type, + const char __user *_restriction) +{ + key_ref_t key_ref; + bool link_reject = !_type; + char type[32]; + char *restriction = NULL; + long ret; + + key_ref = lookup_user_key(id, 0, KEY_NEED_SETATTR); + if (IS_ERR(key_ref)) + return PTR_ERR(key_ref); + + if (_type) { + ret = key_get_type_from_user(type, _type, sizeof(type)); + if (ret < 0) + goto error; + } + + if (_restriction) { + if (!_type) { + ret = -EINVAL; + goto error; + } + + restriction = strndup_user(_restriction, PAGE_SIZE); + if (IS_ERR(restriction)) { + ret = PTR_ERR(restriction); + goto error; + } + } + + ret = keyring_restrict(key_ref, link_reject ? NULL : type, restriction); + kfree(restriction); + +error: + key_ref_put(key_ref); + + return ret; +} + /* * The key control system call */ @@ -1693,6 +1746,11 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3, (char __user *) arg3, (size_t) arg4, (void __user *) arg5); + case KEYCTL_RESTRICT_KEYRING: + return keyctl_restrict_keyring((key_serial_t) arg2, + (const char __user *) arg3, + (const char __user *) arg4); + default: return -EOPNOTSUPP; } diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 838334fec6ce..4d1678e4586f 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -947,6 +947,111 @@ key_ref_t keyring_search(key_ref_t keyring, } EXPORT_SYMBOL(keyring_search); +static struct key_restriction *keyring_restriction_alloc( + key_restrict_link_func_t check) +{ + struct key_restriction *keyres = + kzalloc(sizeof(struct key_restriction), GFP_KERNEL); + + if (!keyres) + return ERR_PTR(-ENOMEM); + + keyres->check = check; + + return keyres; +} + +/* + * Semaphore to serialise restriction setup to prevent reference count + * cycles through restriction key pointers. + */ +static DECLARE_RWSEM(keyring_serialise_restrict_sem); + +/* + * Check for restriction cycles that would prevent keyring garbage collection. + * keyring_serialise_restrict_sem must be held. + */ +static bool keyring_detect_restriction_cycle(const struct key *dest_keyring, + struct key_restriction *keyres) +{ + while (keyres && keyres->key && + keyres->key->type == &key_type_keyring) { + if (keyres->key == dest_keyring) + return true; + + keyres = keyres->key->restrict_link; + } + + return false; +} + +/** + * keyring_restrict - Look up and apply a restriction to a keyring + * + * @keyring: The keyring to be restricted + * @restriction: The restriction options to apply to the keyring + */ +int keyring_restrict(key_ref_t keyring_ref, const char *type, + const char *restriction) +{ + struct key *keyring; + struct key_type *restrict_type = NULL; + struct key_restriction *restrict_link; + int ret = 0; + + keyring = key_ref_to_ptr(keyring_ref); + key_check(keyring); + + if (keyring->type != &key_type_keyring) + return -ENOTDIR; + + if (!type) { + restrict_link = keyring_restriction_alloc(restrict_link_reject); + } else { + restrict_type = key_type_lookup(type); + + if (IS_ERR(restrict_type)) + return PTR_ERR(restrict_type); + + if (!restrict_type->lookup_restriction) { + ret = -ENOENT; + goto error; + } + + restrict_link = restrict_type->lookup_restriction(restriction); + } + + if (IS_ERR(restrict_link)) { + ret = PTR_ERR(restrict_link); + goto error; + } + + down_write(&keyring->sem); + down_write(&keyring_serialise_restrict_sem); + + if (keyring->restrict_link) + ret = -EEXIST; + else if (keyring_detect_restriction_cycle(keyring, restrict_link)) + ret = -EDEADLK; + else + keyring->restrict_link = restrict_link; + + up_write(&keyring_serialise_restrict_sem); + up_write(&keyring->sem); + + if (ret < 0) { + key_put(restrict_link->key); + kfree(restrict_link); + } + +error: + if (restrict_type) + key_type_put(restrict_type); + + return ret; +} +EXPORT_SYMBOL(keyring_restrict); + /* * Search the given keyring for a key that might be updated. * -- cgit v1.2.3-71-gd317 From f1c316a3ab9d24df6022682422fe897492f2c0c8 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Fri, 19 Aug 2016 20:39:09 +0200 Subject: KEYS: add SP800-56A KDF support for DH SP800-56A defines the use of DH with key derivation function based on a counter. The input to the KDF is defined as (DH shared secret || other information). The value for the "other information" is to be provided by the caller. The KDF is implemented using the hash support from the kernel crypto API. The implementation uses the symmetric hash support as the input to the hash operation is usually very small. The caller is allowed to specify the hash name that he wants to use to derive the key material allowing the use of all supported hashes provided with the kernel crypto API. As the KDF implements the proper truncation of the DH shared secret to the requested size, this patch fills the caller buffer up to its size. The patch is tested with a new test added to the keyutils user space code which uses a CAVS test vector testing the compliance with SP800-56A. Signed-off-by: Stephan Mueller Signed-off-by: David Howells --- Documentation/security/keys.txt | 34 +++++-- include/linux/compat.h | 7 ++ include/uapi/linux/keyctl.h | 7 ++ security/keys/Kconfig | 1 + security/keys/Makefile | 3 +- security/keys/compat.c | 5 +- security/keys/compat_dh.c | 38 +++++++ security/keys/dh.c | 220 +++++++++++++++++++++++++++++++++++++--- security/keys/internal.h | 24 ++++- security/keys/keyctl.c | 2 +- 10 files changed, 315 insertions(+), 26 deletions(-) create mode 100644 security/keys/compat_dh.c (limited to 'include/uapi') diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt index 5f554aab8751..cd5019934d7f 100644 --- a/Documentation/security/keys.txt +++ b/Documentation/security/keys.txt @@ -827,7 +827,7 @@ The keyctl syscall functions are: long keyctl(KEYCTL_DH_COMPUTE, struct keyctl_dh_params *params, char *buffer, size_t buflen, - void *reserved); + struct keyctl_kdf_params *kdf); The params struct contains serial numbers for three keys: @@ -844,18 +844,36 @@ The keyctl syscall functions are: public key. If the base is the remote public key, the result is the shared secret. - The reserved argument must be set to NULL. + If the parameter kdf is NULL, the following applies: - The buffer length must be at least the length of the prime, or zero. + - The buffer length must be at least the length of the prime, or zero. - If the buffer length is nonzero, the length of the result is - returned when it is successfully calculated and copied in to the - buffer. When the buffer length is zero, the minimum required - buffer length is returned. + - If the buffer length is nonzero, the length of the result is + returned when it is successfully calculated and copied in to the + buffer. When the buffer length is zero, the minimum required + buffer length is returned. + + The kdf parameter allows the caller to apply a key derivation function + (KDF) on the Diffie-Hellman computation where only the result + of the KDF is returned to the caller. The KDF is characterized with + struct keyctl_kdf_params as follows: + + - char *hashname specifies the NUL terminated string identifying + the hash used from the kernel crypto API and applied for the KDF + operation. The KDF implemenation complies with SP800-56A as well + as with SP800-108 (the counter KDF). + + - char *otherinfo specifies the OtherInfo data as documented in + SP800-56A section 5.8.1.2. The length of the buffer is given with + otherinfolen. The format of OtherInfo is defined by the caller. + The otherinfo pointer may be NULL if no OtherInfo shall be used. This function will return error EOPNOTSUPP if the key type is not supported, error ENOKEY if the key could not be found, or error - EACCES if the key is not readable by the caller. + EACCES if the key is not readable by the caller. In addition, the + function will return EMSGSIZE when the parameter kdf is non-NULL + and either the buffer length or the OtherInfo length exceeds the + allowed length. (*) Restrict keyring linkage diff --git a/include/linux/compat.h b/include/linux/compat.h index aef47be2a5c1..993c87182e02 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -295,6 +295,13 @@ struct compat_old_sigaction { }; #endif +struct compat_keyctl_kdf_params { + compat_uptr_t hashname; + compat_uptr_t otherinfo; + __u32 otherinfolen; + __u32 __spare[8]; +}; + struct compat_statfs; struct compat_statfs64; struct compat_old_linux_dirent; diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index ff79c44e49a3..201c6644b237 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -69,4 +69,11 @@ struct keyctl_dh_params { __s32 base; }; +struct keyctl_kdf_params { + char *hashname; + char *otherinfo; + __u32 otherinfolen; + __u32 __spare[8]; +}; + #endif /* _LINUX_KEYCTL_H */ diff --git a/security/keys/Kconfig b/security/keys/Kconfig index d942c7c2bc0a..4ac1b83a23f8 100644 --- a/security/keys/Kconfig +++ b/security/keys/Kconfig @@ -90,6 +90,7 @@ config KEY_DH_OPERATIONS bool "Diffie-Hellman operations on retained keys" depends on KEYS select MPILIB + select CRYPTO_HASH help This option provides support for calculating Diffie-Hellman public keys and shared secrets using values stored as keys diff --git a/security/keys/Makefile b/security/keys/Makefile index 1fd4a16e6daf..57dff0c15809 100644 --- a/security/keys/Makefile +++ b/security/keys/Makefile @@ -15,7 +15,8 @@ obj-y := \ request_key.o \ request_key_auth.o \ user_defined.o -obj-$(CONFIG_KEYS_COMPAT) += compat.o +compat-obj-$(CONFIG_KEY_DH_OPERATIONS) += compat_dh.o +obj-$(CONFIG_KEYS_COMPAT) += compat.o $(compat-obj-y) obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_SYSCTL) += sysctl.o obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o diff --git a/security/keys/compat.c b/security/keys/compat.c index bb98f2b8dd7d..e87c89c0177c 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -133,8 +133,9 @@ COMPAT_SYSCALL_DEFINE5(keyctl, u32, option, return keyctl_get_persistent(arg2, arg3); case KEYCTL_DH_COMPUTE: - return keyctl_dh_compute(compat_ptr(arg2), compat_ptr(arg3), - arg4, compat_ptr(arg5)); + return compat_keyctl_dh_compute(compat_ptr(arg2), + compat_ptr(arg3), + arg4, compat_ptr(arg5)); case KEYCTL_RESTRICT_KEYRING: return keyctl_restrict_keyring(arg2, compat_ptr(arg3), diff --git a/security/keys/compat_dh.c b/security/keys/compat_dh.c new file mode 100644 index 000000000000..a6a659b6bcb6 --- /dev/null +++ b/security/keys/compat_dh.c @@ -0,0 +1,38 @@ +/* 32-bit compatibility syscall for 64-bit systems for DH operations + * + * Copyright (C) 2016 Stephan Mueller + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +#include "internal.h" + +/* + * Perform the DH computation or DH based key derivation. + * + * If successful, 0 will be returned. + */ +long compat_keyctl_dh_compute(struct keyctl_dh_params __user *params, + char __user *buffer, size_t buflen, + struct compat_keyctl_kdf_params __user *kdf) +{ + struct keyctl_kdf_params kdfcopy; + struct compat_keyctl_kdf_params compat_kdfcopy; + + if (!kdf) + return __keyctl_dh_compute(params, buffer, buflen, NULL); + + if (copy_from_user(&compat_kdfcopy, kdf, sizeof(compat_kdfcopy)) != 0) + return -EFAULT; + + kdfcopy.hashname = compat_ptr(compat_kdfcopy.hashname); + kdfcopy.otherinfo = compat_ptr(compat_kdfcopy.otherinfo); + kdfcopy.otherinfolen = compat_kdfcopy.otherinfolen; + + return __keyctl_dh_compute(params, buffer, buflen, &kdfcopy); +} diff --git a/security/keys/dh.c b/security/keys/dh.c index 893af4c45038..e603bd912e4c 100644 --- a/security/keys/dh.c +++ b/security/keys/dh.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include "internal.h" @@ -77,9 +79,146 @@ error: return ret; } -long keyctl_dh_compute(struct keyctl_dh_params __user *params, - char __user *buffer, size_t buflen, - void __user *reserved) +struct kdf_sdesc { + struct shash_desc shash; + char ctx[]; +}; + +static int kdf_alloc(struct kdf_sdesc **sdesc_ret, char *hashname) +{ + struct crypto_shash *tfm; + struct kdf_sdesc *sdesc; + int size; + + /* allocate synchronous hash */ + tfm = crypto_alloc_shash(hashname, 0, 0); + if (IS_ERR(tfm)) { + pr_info("could not allocate digest TFM handle %s\n", hashname); + return PTR_ERR(tfm); + } + + size = sizeof(struct shash_desc) + crypto_shash_descsize(tfm); + sdesc = kmalloc(size, GFP_KERNEL); + if (!sdesc) + return -ENOMEM; + sdesc->shash.tfm = tfm; + sdesc->shash.flags = 0x0; + + *sdesc_ret = sdesc; + + return 0; +} + +static void kdf_dealloc(struct kdf_sdesc *sdesc) +{ + if (!sdesc) + return; + + if (sdesc->shash.tfm) + crypto_free_shash(sdesc->shash.tfm); + + kzfree(sdesc); +} + +/* convert 32 bit integer into its string representation */ +static inline void crypto_kw_cpu_to_be32(u32 val, u8 *buf) +{ + __be32 *a = (__be32 *)buf; + + *a = cpu_to_be32(val); +} + +/* + * Implementation of the KDF in counter mode according to SP800-108 section 5.1 + * as well as SP800-56A section 5.8.1 (Single-step KDF). + * + * SP800-56A: + * The src pointer is defined as Z || other info where Z is the shared secret + * from DH and other info is an arbitrary string (see SP800-56A section + * 5.8.1.2). + */ +static int kdf_ctr(struct kdf_sdesc *sdesc, const u8 *src, unsigned int slen, + u8 *dst, unsigned int dlen) +{ + struct shash_desc *desc = &sdesc->shash; + unsigned int h = crypto_shash_digestsize(desc->tfm); + int err = 0; + u8 *dst_orig = dst; + u32 i = 1; + u8 iteration[sizeof(u32)]; + + while (dlen) { + err = crypto_shash_init(desc); + if (err) + goto err; + + crypto_kw_cpu_to_be32(i, iteration); + err = crypto_shash_update(desc, iteration, sizeof(u32)); + if (err) + goto err; + + if (src && slen) { + err = crypto_shash_update(desc, src, slen); + if (err) + goto err; + } + + if (dlen < h) { + u8 tmpbuffer[h]; + + err = crypto_shash_final(desc, tmpbuffer); + if (err) + goto err; + memcpy(dst, tmpbuffer, dlen); + memzero_explicit(tmpbuffer, h); + return 0; + } else { + err = crypto_shash_final(desc, dst); + if (err) + goto err; + + dlen -= h; + dst += h; + i++; + } + } + + return 0; + +err: + memzero_explicit(dst_orig, dlen); + return err; +} + +static int keyctl_dh_compute_kdf(struct kdf_sdesc *sdesc, + char __user *buffer, size_t buflen, + uint8_t *kbuf, size_t kbuflen) +{ + uint8_t *outbuf = NULL; + int ret; + + outbuf = kmalloc(buflen, GFP_KERNEL); + if (!outbuf) { + ret = -ENOMEM; + goto err; + } + + ret = kdf_ctr(sdesc, kbuf, kbuflen, outbuf, buflen); + if (ret) + goto err; + + ret = buflen; + if (copy_to_user(buffer, outbuf, buflen) != 0) + ret = -EFAULT; + +err: + kzfree(outbuf); + return ret; +} + +long __keyctl_dh_compute(struct keyctl_dh_params __user *params, + char __user *buffer, size_t buflen, + struct keyctl_kdf_params *kdfcopy) { long ret; MPI base, private, prime, result; @@ -88,6 +227,7 @@ long keyctl_dh_compute(struct keyctl_dh_params __user *params, uint8_t *kbuf; ssize_t keylen; size_t resultlen; + struct kdf_sdesc *sdesc = NULL; if (!params || (!buffer && buflen)) { ret = -EINVAL; @@ -98,12 +238,34 @@ long keyctl_dh_compute(struct keyctl_dh_params __user *params, goto out; } - if (reserved) { - ret = -EINVAL; - goto out; + if (kdfcopy) { + char *hashname; + + if (buflen > KEYCTL_KDF_MAX_OUTPUT_LEN || + kdfcopy->otherinfolen > KEYCTL_KDF_MAX_OI_LEN) { + ret = -EMSGSIZE; + goto out; + } + + /* get KDF name string */ + hashname = strndup_user(kdfcopy->hashname, CRYPTO_MAX_ALG_NAME); + if (IS_ERR(hashname)) { + ret = PTR_ERR(hashname); + goto out; + } + + /* allocate KDF from the kernel crypto API */ + ret = kdf_alloc(&sdesc, hashname); + kfree(hashname); + if (ret) + goto out; } - keylen = mpi_from_key(pcopy.prime, buflen, &prime); + /* + * If the caller requests postprocessing with a KDF, allow an + * arbitrary output buffer size since the KDF ensures proper truncation. + */ + keylen = mpi_from_key(pcopy.prime, kdfcopy ? SIZE_MAX : buflen, &prime); if (keylen < 0 || !prime) { /* buflen == 0 may be used to query the required buffer size, * which is the prime key length. @@ -133,12 +295,25 @@ long keyctl_dh_compute(struct keyctl_dh_params __user *params, goto error3; } - kbuf = kmalloc(resultlen, GFP_KERNEL); + /* allocate space for DH shared secret and SP800-56A otherinfo */ + kbuf = kmalloc(kdfcopy ? (resultlen + kdfcopy->otherinfolen) : resultlen, + GFP_KERNEL); if (!kbuf) { ret = -ENOMEM; goto error4; } + /* + * Concatenate SP800-56A otherinfo past DH shared secret -- the + * input to the KDF is (DH shared secret || otherinfo) + */ + if (kdfcopy && kdfcopy->otherinfo && + copy_from_user(kbuf + resultlen, kdfcopy->otherinfo, + kdfcopy->otherinfolen) != 0) { + ret = -EFAULT; + goto error5; + } + ret = do_dh(result, base, private, prime); if (ret) goto error5; @@ -147,12 +322,17 @@ long keyctl_dh_compute(struct keyctl_dh_params __user *params, if (ret != 0) goto error5; - ret = nbytes; - if (copy_to_user(buffer, kbuf, nbytes) != 0) - ret = -EFAULT; + if (kdfcopy) { + ret = keyctl_dh_compute_kdf(sdesc, buffer, buflen, kbuf, + resultlen + kdfcopy->otherinfolen); + } else { + ret = nbytes; + if (copy_to_user(buffer, kbuf, nbytes) != 0) + ret = -EFAULT; + } error5: - kfree(kbuf); + kzfree(kbuf); error4: mpi_free(result); error3: @@ -162,5 +342,21 @@ error2: error1: mpi_free(prime); out: + kdf_dealloc(sdesc); return ret; } + +long keyctl_dh_compute(struct keyctl_dh_params __user *params, + char __user *buffer, size_t buflen, + struct keyctl_kdf_params __user *kdf) +{ + struct keyctl_kdf_params kdfcopy; + + if (!kdf) + return __keyctl_dh_compute(params, buffer, buflen, NULL); + + if (copy_from_user(&kdfcopy, kdf, sizeof(kdfcopy)) != 0) + return -EFAULT; + + return __keyctl_dh_compute(params, buffer, buflen, &kdfcopy); +} diff --git a/security/keys/internal.h b/security/keys/internal.h index 6ce016314897..c0f8682eba69 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -18,6 +18,7 @@ #include #include #include +#include struct iovec; @@ -267,15 +268,34 @@ static inline long keyctl_get_persistent(uid_t uid, key_serial_t destring) #ifdef CONFIG_KEY_DH_OPERATIONS extern long keyctl_dh_compute(struct keyctl_dh_params __user *, char __user *, - size_t, void __user *); + size_t, struct keyctl_kdf_params __user *); +extern long __keyctl_dh_compute(struct keyctl_dh_params __user *, char __user *, + size_t, struct keyctl_kdf_params *); +#ifdef CONFIG_KEYS_COMPAT +extern long compat_keyctl_dh_compute(struct keyctl_dh_params __user *params, + char __user *buffer, size_t buflen, + struct compat_keyctl_kdf_params __user *kdf); +#endif +#define KEYCTL_KDF_MAX_OUTPUT_LEN 1024 /* max length of KDF output */ +#define KEYCTL_KDF_MAX_OI_LEN 64 /* max length of otherinfo */ #else static inline long keyctl_dh_compute(struct keyctl_dh_params __user *params, char __user *buffer, size_t buflen, - void __user *reserved) + struct keyctl_kdf_params __user *kdf) +{ + return -EOPNOTSUPP; +} + +#ifdef CONFIG_KEYS_COMPAT +static inline long compat_keyctl_dh_compute( + struct keyctl_dh_params __user *params, + char __user *buffer, size_t buflen, + struct keyctl_kdf_params __user *kdf) { return -EOPNOTSUPP; } #endif +#endif /* * Debugging key validation diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 6ee2826a2d06..10fcea154c0f 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1744,7 +1744,7 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3, case KEYCTL_DH_COMPUTE: return keyctl_dh_compute((struct keyctl_dh_params __user *) arg2, (char __user *) arg3, (size_t) arg4, - (void __user *) arg5); + (struct keyctl_kdf_params __user *) arg5); case KEYCTL_RESTRICT_KEYRING: return keyctl_restrict_keyring((key_serial_t) arg2, -- cgit v1.2.3-71-gd317 From 1f37b177fd36790be4f281d538a8c9de67013606 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 2 Apr 2017 14:30:06 -0700 Subject: phy/ethtool: Add missing SPEED_ strings Add all the currently available SPEED_ strings. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 14 ++++++++++++++ include/uapi/linux/ethtool.h | 1 + 2 files changed, 15 insertions(+) (limited to 'include/uapi') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 867c42154087..6811d1ef4ef2 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -50,8 +50,22 @@ static const char *phy_speed_to_str(int speed) return "1Gbps"; case SPEED_2500: return "2.5Gbps"; + case SPEED_5000: + return "5Gbps"; case SPEED_10000: return "10Gbps"; + case SPEED_20000: + return "20Gbps"; + case SPEED_25000: + return "25Gbps"; + case SPEED_40000: + return "40Gbps"; + case SPEED_50000: + return "50Gbps"; + case SPEED_56000: + return "56Gbps"; + case SPEED_100000: + return "100Gbps"; case SPEED_UNKNOWN: return "Unknown"; default: diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 3dc91a46e8b8..5f4ea28eabe4 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1487,6 +1487,7 @@ enum ethtool_link_mode_bit_indices { */ /* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal. */ +/* Update drivers/net/phy/phy.c:phy_speed_to_str() when adding new values */ #define SPEED_10 10 #define SPEED_100 100 #define SPEED_1000 1000 -- cgit v1.2.3-71-gd317 From 457c79e54487b076cafa0e1ec5f177e751c54087 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 3 Apr 2017 18:13:32 -0700 Subject: netlink/diag: report flags for netlink sockets cb_running is reported in /proc/self/net/netlink and it is reported by the ss tool, when it gets information from the proc files. sock_diag is a new interface which is used instead of proc files, so it looks reasonable that this interface has to report no less information about sockets than proc files. We use these flags to dump and restore netlink sockets. Signed-off-by: Andrei Vagin Signed-off-by: David S. Miller --- include/uapi/linux/netlink_diag.h | 10 ++++++++++ net/netlink/af_netlink.c | 8 -------- net/netlink/af_netlink.h | 8 ++++++++ net/netlink/diag.c | 25 +++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h index 76b4d87c83a8..6dcd4de3397b 100644 --- a/include/uapi/linux/netlink_diag.h +++ b/include/uapi/linux/netlink_diag.h @@ -38,6 +38,7 @@ enum { NETLINK_DIAG_GROUPS, NETLINK_DIAG_RX_RING, NETLINK_DIAG_TX_RING, + NETLINK_DIAG_FLAGS, __NETLINK_DIAG_MAX, }; @@ -52,5 +53,14 @@ enum { /* deprecated since 4.6 */ #define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */ #endif +#define NDIAG_SHOW_FLAGS 0x00000008 /* show flags of a netlink socket */ + +/* flags */ +#define NDIAG_FLAG_CB_RUNNING 0x00000001 +#define NDIAG_FLAG_PKTINFO 0x00000002 +#define NDIAG_FLAG_BROADCAST_ERROR 0x00000004 +#define NDIAG_FLAG_NO_ENOBUFS 0x00000008 +#define NDIAG_FLAG_LISTEN_ALL_NSID 0x00000010 +#define NDIAG_FLAG_CAP_ACK 0x00000020 #endif diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 596eaff66649..fc232441cf23 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -78,14 +78,6 @@ struct listeners { /* state bits */ #define NETLINK_S_CONGESTED 0x0 -/* flags */ -#define NETLINK_F_KERNEL_SOCKET 0x1 -#define NETLINK_F_RECV_PKTINFO 0x2 -#define NETLINK_F_BROADCAST_SEND_ERROR 0x4 -#define NETLINK_F_RECV_NO_ENOBUFS 0x8 -#define NETLINK_F_LISTEN_ALL_NSID 0x10 -#define NETLINK_F_CAP_ACK 0x20 - static inline int netlink_is_kernel(struct sock *sk) { return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 4fdb38318977..f792f8d7f982 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -6,6 +6,14 @@ #include #include +/* flags */ +#define NETLINK_F_KERNEL_SOCKET 0x1 +#define NETLINK_F_RECV_PKTINFO 0x2 +#define NETLINK_F_BROADCAST_SEND_ERROR 0x4 +#define NETLINK_F_RECV_NO_ENOBUFS 0x8 +#define NETLINK_F_LISTEN_ALL_NSID 0x10 +#define NETLINK_F_CAP_ACK 0x20 + #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) #define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) diff --git a/net/netlink/diag.c b/net/netlink/diag.c index a5546249fb10..8faa20b4d457 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -19,6 +19,27 @@ static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) nlk->groups); } +static int sk_diag_put_flags(struct sock *sk, struct sk_buff *skb) +{ + struct netlink_sock *nlk = nlk_sk(sk); + u32 flags = 0; + + if (nlk->cb_running) + flags |= NDIAG_FLAG_CB_RUNNING; + if (nlk->flags & NETLINK_F_RECV_PKTINFO) + flags |= NDIAG_FLAG_PKTINFO; + if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) + flags |= NDIAG_FLAG_BROADCAST_ERROR; + if (nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) + flags |= NDIAG_FLAG_NO_ENOBUFS; + if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID) + flags |= NDIAG_FLAG_LISTEN_ALL_NSID; + if (nlk->flags & NETLINK_F_CAP_ACK) + flags |= NDIAG_FLAG_CAP_ACK; + + return nla_put_u32(skb, NETLINK_DIAG_FLAGS, flags); +} + static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct netlink_diag_req *req, u32 portid, u32 seq, u32 flags, int sk_ino) @@ -52,6 +73,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) goto out_nlmsg_trim; + if ((req->ndiag_show & NDIAG_SHOW_FLAGS) && + sk_diag_put_flags(sk, skb)) + goto out_nlmsg_trim; + nlmsg_end(skb, nlh); return 0; -- cgit v1.2.3-71-gd317 From def12888c161e6fec0702e5ec9c3962846e3a21d Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 4 Apr 2017 09:23:42 -0400 Subject: rtnl: Add support for netdev event to link messages When netdev events happen, a rtnetlink_event() handler will send messages for every event in it's white list. These messages contain current information about a particular device, but they do not include the iformation about which event just happened. The consumer of the message has to try to infer this information. In some cases (ex: NETDEV_NOTIFY_PEERS), that is not possible. This patch adds a new extension to RTM_NEWLINK message called IFLA_EVENT that would have an encoding of the which event triggered this message. This would allow the the message consumer to easily determine if it is interested in a particular event or not. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 3 +- include/uapi/linux/if_link.h | 21 ++++++++++ net/core/dev.c | 2 +- net/core/rtnetlink.c | 92 +++++++++++++++++++++++++++++++++++++++----- 4 files changed, 107 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 57e54847b0b9..0459018173cf 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -18,7 +18,8 @@ extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags); struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, - unsigned change, gfp_t flags); + unsigned change, unsigned long event, + gfp_t flags); void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8b405afb2376..97f6d302f627 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -157,6 +157,7 @@ enum { IFLA_GSO_MAX_SIZE, IFLA_PAD, IFLA_XDP, + IFLA_EVENT, __IFLA_MAX }; @@ -899,4 +900,24 @@ enum { #define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1) +enum { + IFLA_EVENT_UNSPEC, + IFLA_EVENT_REBOOT, + IFLA_EVENT_CHANGE_MTU, + IFLA_EVENT_CHANGE_ADDR, + IFLA_EVENT_CHANGE_NAME, + IFLA_EVENT_FEAT_CHANGE, + IFLA_EVENT_BONDING_FAILOVER, + IFLA_EVENT_POST_TYPE_CHANGE, + IFLA_EVENT_NOTIFY_PEERS, + IFLA_EVENT_CHANGE_UPPER, + IFLA_EVENT_RESEND_IGMP, + IFLA_EVENT_PRE_CHANGE_MTU, + IFLA_EVENT_CHANGE_INFO_DATA, + IFLA_EVENT_PRE_CHANGE_UPPER, + IFLA_EVENT_CHANGE_LOWER_STATE, + IFLA_EVENT_UDP_TUNNEL_PUSH_INFO, + IFLA_EVENT_CHANGE_TX_QUEUE_LEN, +}; + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/net/core/dev.c b/net/core/dev.c index ef9fe60ee294..7efb4178ffef 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6840,7 +6840,7 @@ static void rollback_registered_many(struct list_head *head) if (!dev->rtnl_link_ops || dev->rtnl_link_state == RTNL_LINK_INITIALIZED) - skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, + skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, GFP_KERNEL); /* diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 58419da7961b..b2bd4c9ee860 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -944,6 +944,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ + rtnl_xdp_size(dev) /* IFLA_XDP */ + + nla_total_size(4) /* IFLA_EVENT */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ } @@ -1276,9 +1277,70 @@ err_cancel: return err; } +static int rtnl_fill_link_event(struct sk_buff *skb, unsigned long event) +{ + u32 rtnl_event; + + switch (event) { + case NETDEV_REBOOT: + rtnl_event = IFLA_EVENT_REBOOT; + break; + case NETDEV_CHANGEMTU: + rtnl_event = IFLA_EVENT_CHANGE_MTU; + break; + case NETDEV_CHANGEADDR: + rtnl_event = IFLA_EVENT_CHANGE_ADDR; + break; + case NETDEV_CHANGENAME: + rtnl_event = IFLA_EVENT_CHANGE_NAME; + break; + case NETDEV_FEAT_CHANGE: + rtnl_event = IFLA_EVENT_FEAT_CHANGE; + break; + case NETDEV_BONDING_FAILOVER: + rtnl_event = IFLA_EVENT_BONDING_FAILOVER; + break; + case NETDEV_POST_TYPE_CHANGE: + rtnl_event = IFLA_EVENT_POST_TYPE_CHANGE; + break; + case NETDEV_NOTIFY_PEERS: + rtnl_event = IFLA_EVENT_NOTIFY_PEERS; + break; + case NETDEV_CHANGEUPPER: + rtnl_event = IFLA_EVENT_CHANGE_UPPER; + break; + case NETDEV_RESEND_IGMP: + rtnl_event = IFLA_EVENT_RESEND_IGMP; + break; + case NETDEV_PRECHANGEMTU: + rtnl_event = IFLA_EVENT_PRE_CHANGE_MTU; + break; + case NETDEV_CHANGEINFODATA: + rtnl_event = IFLA_EVENT_CHANGE_INFO_DATA; + break; + case NETDEV_PRECHANGEUPPER: + rtnl_event = IFLA_EVENT_PRE_CHANGE_UPPER; + break; + case NETDEV_CHANGELOWERSTATE: + rtnl_event = IFLA_EVENT_CHANGE_LOWER_STATE; + break; + case NETDEV_UDP_TUNNEL_PUSH_INFO: + rtnl_event = IFLA_EVENT_UDP_TUNNEL_PUSH_INFO; + break; + case NETDEV_CHANGE_TX_QUEUE_LEN: + rtnl_event = IFLA_EVENT_CHANGE_TX_QUEUE_LEN; + break; + default: + return 0; + } + + return nla_put_u32(skb, IFLA_EVENT, rtnl_event); +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, - unsigned int flags, u32 ext_filter_mask) + unsigned int flags, u32 ext_filter_mask, + unsigned long event) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; @@ -1327,6 +1389,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) goto nla_put_failure; + if (rtnl_fill_link_event(skb, event)) + goto nla_put_failure; + if (rtnl_fill_link_ifmap(skb, dev)) goto nla_put_failure; @@ -1461,6 +1526,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINK_NETNSID] = { .type = NLA_S32 }, [IFLA_PROTO_DOWN] = { .type = NLA_U8 }, [IFLA_XDP] = { .type = NLA_NESTED }, + [IFLA_EVENT] = { .type = NLA_U32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -1619,7 +1685,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, flags, - ext_filter_mask); + ext_filter_mask, 0); /* If we ran out of room on the first message, * we're in trouble */ @@ -2710,7 +2776,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh) return -ENOBUFS; err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, 0, 0, ext_filter_mask); + nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); @@ -2782,7 +2848,8 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) } struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, - unsigned int change, gfp_t flags) + unsigned int change, + unsigned long event, gfp_t flags) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -2793,7 +2860,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, if (skb == NULL) goto errout; - err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0); + err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -2814,18 +2881,25 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags) rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags); } -void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, - gfp_t flags) +static void rtmsg_ifinfo_event(int type, struct net_device *dev, + unsigned int change, unsigned long event, + gfp_t flags) { struct sk_buff *skb; if (dev->reg_state != NETREG_REGISTERED) return; - skb = rtmsg_ifinfo_build_skb(type, dev, change, flags); + skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags); if (skb) rtmsg_ifinfo_send(skb, dev, flags); } + +void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, + gfp_t flags) +{ + rtmsg_ifinfo_event(type, dev, change, 0, flags); +} EXPORT_SYMBOL(rtmsg_ifinfo); static int nlmsg_populate_fdb_fill(struct sk_buff *skb, @@ -4132,7 +4206,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_CHANGELOWERSTATE: case NETDEV_UDP_TUNNEL_PUSH_INFO: case NETDEV_CHANGE_TX_QUEUE_LEN: - rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); + rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, event, GFP_KERNEL); break; default: break; -- cgit v1.2.3-71-gd317 From 5e91144dd702d068b22a75911c06104e56cb4858 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 8 Nov 2016 16:50:42 +0900 Subject: drm/tegra: Add tiling FB modifiers Add FB modifiers to allow user-space to specify that a surface is in one of the two tiling formats supported by Tegra chips, and add support in the tegradrm driver to handle them properly. This is necessary for the display controller to directly display buffers generated by the GPU. This feature is intended to replace the dedicated IOCTL enabled by TEGRA_STAGING and to provide a non-staging alternative to that solution. Signed-off-by: Alexandre Courbot Acked-by: Daniel Vetter Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/drm.c | 2 ++ drivers/gpu/drm/tegra/fb.c | 23 +++++++++++++++++++--- include/uapi/drm/drm_fourcc.h | 45 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 948b529d4097..3f8bd7bd6532 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -164,6 +164,8 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags) drm->mode_config.max_width = 4096; drm->mode_config.max_height = 4096; + drm->mode_config.allow_fb_modifiers = true; + drm->mode_config.funcs = &tegra_drm_mode_funcs; err = tegra_drm_fb_prepare(drm); diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c index f142f6a4db25..d53f49f60b6f 100644 --- a/drivers/gpu/drm/tegra/fb.c +++ b/drivers/gpu/drm/tegra/fb.c @@ -52,9 +52,26 @@ int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer, struct tegra_bo_tiling *tiling) { struct tegra_fb *fb = to_tegra_fb(framebuffer); - - /* TODO: handle YUV formats? */ - *tiling = fb->planes[0]->tiling; + uint64_t modifier = fb->base.modifier; + + switch (fourcc_mod_tegra_mod(modifier)) { + case NV_FORMAT_MOD_TEGRA_TILED: + tiling->mode = TEGRA_BO_TILING_MODE_TILED; + tiling->value = 0; + break; + + case NV_FORMAT_MOD_TEGRA_16BX2_BLOCK(0): + tiling->mode = TEGRA_BO_TILING_MODE_BLOCK; + tiling->value = fourcc_mod_tegra_param(modifier); + if (tiling->value > 5) + return -EINVAL; + break; + + default: + /* TODO: handle YUV formats? */ + *tiling = fb->planes[0]->tiling; + break; + } return 0; } diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index ef20abb8119b..983a4f3ba5e1 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -292,6 +292,51 @@ extern "C" { */ #define DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED fourcc_mod_code(VIVANTE, 4) + +/* NVIDIA Tegra frame buffer modifiers */ + +/* + * Some modifiers take parameters, for example the number of vertical GOBs in + * a block. Reserve the lower 32 bits for parameters + */ +#define __fourcc_mod_tegra_mode_shift 32 +#define fourcc_mod_tegra_code(val, params) \ + fourcc_mod_code(NV, ((((__u64)val) << __fourcc_mod_tegra_mode_shift) | params)) +#define fourcc_mod_tegra_mod(m) \ + (m & ~((1ULL << __fourcc_mod_tegra_mode_shift) - 1)) +#define fourcc_mod_tegra_param(m) \ + (m & ((1ULL << __fourcc_mod_tegra_mode_shift) - 1)) + +/* + * Tegra Tiled Layout, used by Tegra 2, 3 and 4. + * + * Pixels are arranged in simple tiles of 16 x 16 bytes. + */ +#define NV_FORMAT_MOD_TEGRA_TILED fourcc_mod_tegra_code(1, 0) + +/* + * Tegra 16Bx2 Block Linear layout, used by TK1/TX1 + * + * Pixels are arranged in 64x8 Groups Of Bytes (GOBs). GOBs are then stacked + * vertically by a power of 2 (1 to 32 GOBs) to form a block. + * + * Within a GOB, data is ordered as 16B x 2 lines sectors laid in Z-shape. + * + * Parameter 'v' is the log2 encoding of the number of GOBs stacked vertically. + * Valid values are: + * + * 0 == ONE_GOB + * 1 == TWO_GOBS + * 2 == FOUR_GOBS + * 3 == EIGHT_GOBS + * 4 == SIXTEEN_GOBS + * 5 == THIRTYTWO_GOBS + * + * Chapter 20 "Pixel Memory Formats" of the Tegra X1 TRM describes this format + * in full detail. + */ +#define NV_FORMAT_MOD_TEGRA_16BX2_BLOCK(v) fourcc_mod_tegra_code(2, v) + #if defined(__cplusplus) } #endif -- cgit v1.2.3-71-gd317 From f656edd5fb33d889561978b81ec2897087c2f4ca Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Fri, 31 Mar 2017 22:06:11 +0900 Subject: ALSA: fireface: add hwdep interface This commit adds hwdep interface so as the other drivers for audio and music units on IEEE 1394 have. This interface is designed for mixer/control applications. By using this interface, an application can get information about firewire node, can lock/unlock kernel streaming and can get notification at starting/stopping kernel streaming. Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- include/uapi/sound/asound.h | 3 +- include/uapi/sound/firewire.h | 2 +- sound/firewire/Kconfig | 1 + sound/firewire/fireface/Makefile | 2 +- sound/firewire/fireface/ff-hwdep.c | 191 ++++++++++++++++++++++++++++++++++++ sound/firewire/fireface/ff-pcm.c | 20 +++- sound/firewire/fireface/ff-stream.c | 39 ++++++++ sound/firewire/fireface/ff.c | 5 + sound/firewire/fireface/ff.h | 13 +++ 9 files changed, 270 insertions(+), 6 deletions(-) create mode 100644 sound/firewire/fireface/ff-hwdep.c (limited to 'include/uapi') diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index fd7b561af768..fd41697cb4d3 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -108,9 +108,10 @@ enum { SNDRV_HWDEP_IFACE_FW_TASCAM, /* TASCAM FireWire series */ SNDRV_HWDEP_IFACE_LINE6, /* Line6 USB processors */ SNDRV_HWDEP_IFACE_FW_MOTU, /* MOTU FireWire series */ + SNDRV_HWDEP_IFACE_FW_FIREFACE, /* RME Fireface series */ /* Don't forget to change the following: */ - SNDRV_HWDEP_IFACE_LAST = SNDRV_HWDEP_IFACE_FW_MOTU + SNDRV_HWDEP_IFACE_LAST = SNDRV_HWDEP_IFACE_FW_FIREFACE }; struct snd_hwdep_info { diff --git a/include/uapi/sound/firewire.h b/include/uapi/sound/firewire.h index 29afc5eab42d..622900488bdc 100644 --- a/include/uapi/sound/firewire.h +++ b/include/uapi/sound/firewire.h @@ -73,7 +73,7 @@ union snd_firewire_event { #define SNDRV_FIREWIRE_TYPE_DIGI00X 5 #define SNDRV_FIREWIRE_TYPE_TASCAM 6 #define SNDRV_FIREWIRE_TYPE_MOTU 7 -/* RME... */ +#define SNDRV_FIREWIRE_TYPE_FIREFACE 8 struct snd_firewire_get_info { unsigned int type; /* SNDRV_FIREWIRE_TYPE_xxx */ diff --git a/sound/firewire/Kconfig b/sound/firewire/Kconfig index b75a82288f74..70f02eea4a3e 100644 --- a/sound/firewire/Kconfig +++ b/sound/firewire/Kconfig @@ -155,6 +155,7 @@ config SND_FIREWIRE_MOTU config SND_FIREFACE tristate "RME Fireface series support" select SND_FIREWIRE_LIB + select SND_HWDEP help Say Y here to include support for RME fireface series. diff --git a/sound/firewire/fireface/Makefile b/sound/firewire/fireface/Makefile index e62693811519..8d6c612a15a0 100644 --- a/sound/firewire/fireface/Makefile +++ b/sound/firewire/fireface/Makefile @@ -1,3 +1,3 @@ snd-fireface-objs := ff.o ff-transaction.o ff-midi.o ff-proc.o amdtp-ff.o \ - ff-stream.o ff-pcm.o + ff-stream.o ff-pcm.o ff-hwdep.o obj-$(CONFIG_SND_FIREFACE) += snd-fireface.o diff --git a/sound/firewire/fireface/ff-hwdep.c b/sound/firewire/fireface/ff-hwdep.c new file mode 100644 index 000000000000..3ee04b054585 --- /dev/null +++ b/sound/firewire/fireface/ff-hwdep.c @@ -0,0 +1,191 @@ +/* + * ff-hwdep.c - a part of driver for RME Fireface series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +/* + * This codes give three functionality. + * + * 1.get firewire node information + * 2.get notification about starting/stopping stream + * 3.lock/unlock stream + */ + +#include "ff.h" + +static long hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count, + loff_t *offset) +{ + struct snd_ff *ff = hwdep->private_data; + DEFINE_WAIT(wait); + union snd_firewire_event event; + + spin_lock_irq(&ff->lock); + + while (!ff->dev_lock_changed) { + prepare_to_wait(&ff->hwdep_wait, &wait, TASK_INTERRUPTIBLE); + spin_unlock_irq(&ff->lock); + schedule(); + finish_wait(&ff->hwdep_wait, &wait); + if (signal_pending(current)) + return -ERESTARTSYS; + spin_lock_irq(&ff->lock); + } + + memset(&event, 0, sizeof(event)); + if (ff->dev_lock_changed) { + event.lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS; + event.lock_status.status = (ff->dev_lock_count > 0); + ff->dev_lock_changed = false; + + count = min_t(long, count, sizeof(event.lock_status)); + } + + spin_unlock_irq(&ff->lock); + + if (copy_to_user(buf, &event, count)) + return -EFAULT; + + return count; +} + +static unsigned int hwdep_poll(struct snd_hwdep *hwdep, struct file *file, + poll_table *wait) +{ + struct snd_ff *ff = hwdep->private_data; + unsigned int events; + + poll_wait(file, &ff->hwdep_wait, wait); + + spin_lock_irq(&ff->lock); + if (ff->dev_lock_changed) + events = POLLIN | POLLRDNORM; + else + events = 0; + spin_unlock_irq(&ff->lock); + + return events; +} + +static int hwdep_get_info(struct snd_ff *ff, void __user *arg) +{ + struct fw_device *dev = fw_parent_device(ff->unit); + struct snd_firewire_get_info info; + + memset(&info, 0, sizeof(info)); + info.type = SNDRV_FIREWIRE_TYPE_FIREFACE; + info.card = dev->card->index; + *(__be32 *)&info.guid[0] = cpu_to_be32(dev->config_rom[3]); + *(__be32 *)&info.guid[4] = cpu_to_be32(dev->config_rom[4]); + strlcpy(info.device_name, dev_name(&dev->device), + sizeof(info.device_name)); + + if (copy_to_user(arg, &info, sizeof(info))) + return -EFAULT; + + return 0; +} + +static int hwdep_lock(struct snd_ff *ff) +{ + int err; + + spin_lock_irq(&ff->lock); + + if (ff->dev_lock_count == 0) { + ff->dev_lock_count = -1; + err = 0; + } else { + err = -EBUSY; + } + + spin_unlock_irq(&ff->lock); + + return err; +} + +static int hwdep_unlock(struct snd_ff *ff) +{ + int err; + + spin_lock_irq(&ff->lock); + + if (ff->dev_lock_count == -1) { + ff->dev_lock_count = 0; + err = 0; + } else { + err = -EBADFD; + } + + spin_unlock_irq(&ff->lock); + + return err; +} + +static int hwdep_release(struct snd_hwdep *hwdep, struct file *file) +{ + struct snd_ff *ff = hwdep->private_data; + + spin_lock_irq(&ff->lock); + if (ff->dev_lock_count == -1) + ff->dev_lock_count = 0; + spin_unlock_irq(&ff->lock); + + return 0; +} + +static int hwdep_ioctl(struct snd_hwdep *hwdep, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct snd_ff *ff = hwdep->private_data; + + switch (cmd) { + case SNDRV_FIREWIRE_IOCTL_GET_INFO: + return hwdep_get_info(ff, (void __user *)arg); + case SNDRV_FIREWIRE_IOCTL_LOCK: + return hwdep_lock(ff); + case SNDRV_FIREWIRE_IOCTL_UNLOCK: + return hwdep_unlock(ff); + default: + return -ENOIOCTLCMD; + } +} + +#ifdef CONFIG_COMPAT +static int hwdep_compat_ioctl(struct snd_hwdep *hwdep, struct file *file, + unsigned int cmd, unsigned long arg) +{ + return hwdep_ioctl(hwdep, file, cmd, + (unsigned long)compat_ptr(arg)); +} +#else +#define hwdep_compat_ioctl NULL +#endif + +int snd_ff_create_hwdep_devices(struct snd_ff *ff) +{ + static const struct snd_hwdep_ops hwdep_ops = { + .read = hwdep_read, + .release = hwdep_release, + .poll = hwdep_poll, + .ioctl = hwdep_ioctl, + .ioctl_compat = hwdep_compat_ioctl, + }; + struct snd_hwdep *hwdep; + int err; + + err = snd_hwdep_new(ff->card, ff->card->driver, 0, &hwdep); + if (err < 0) + return err; + + strcpy(hwdep->name, ff->card->driver); + hwdep->iface = SNDRV_HWDEP_IFACE_FW_FIREFACE; + hwdep->ops = hwdep_ops; + hwdep->private_data = ff; + hwdep->exclusive = true; + + return 0; +} diff --git a/sound/firewire/fireface/ff-pcm.c b/sound/firewire/fireface/ff-pcm.c index d282467d39a6..93cee1978e8e 100644 --- a/sound/firewire/fireface/ff-pcm.c +++ b/sound/firewire/fireface/ff-pcm.c @@ -154,13 +154,21 @@ static int pcm_open(struct snd_pcm_substream *substream) enum snd_ff_clock_src src; int i, err; - err = pcm_init_hw_params(ff, substream); + err = snd_ff_stream_lock_try(ff); if (err < 0) return err; + err = pcm_init_hw_params(ff, substream); + if (err < 0) { + snd_ff_stream_lock_release(ff); + return err; + } + err = ff->spec->protocol->get_clock(ff, &rate, &src); - if (err < 0) + if (err < 0) { + snd_ff_stream_lock_release(ff); return err; + } if (src != SND_FF_CLOCK_SRC_INTERNAL) { for (i = 0; i < CIP_SFC_COUNT; ++i) { @@ -171,8 +179,10 @@ static int pcm_open(struct snd_pcm_substream *substream) * The unit is configured at sampling frequency which packet * streaming engine can't support. */ - if (i >= CIP_SFC_COUNT) + if (i >= CIP_SFC_COUNT) { + snd_ff_stream_lock_release(ff); return -EIO; + } substream->runtime->hw.rate_min = rate; substream->runtime->hw.rate_max = rate; @@ -192,6 +202,10 @@ static int pcm_open(struct snd_pcm_substream *substream) static int pcm_close(struct snd_pcm_substream *substream) { + struct snd_ff *ff = substream->private_data; + + snd_ff_stream_lock_release(ff); + return 0; } diff --git a/sound/firewire/fireface/ff-stream.c b/sound/firewire/fireface/ff-stream.c index 0ef6177aff20..78880922120e 100644 --- a/sound/firewire/fireface/ff-stream.c +++ b/sound/firewire/fireface/ff-stream.c @@ -241,3 +241,42 @@ void snd_ff_stream_update_duplex(struct snd_ff *ff) fw_iso_resources_update(&ff->tx_resources); fw_iso_resources_update(&ff->rx_resources); } + +void snd_ff_stream_lock_changed(struct snd_ff *ff) +{ + ff->dev_lock_changed = true; + wake_up(&ff->hwdep_wait); +} + +int snd_ff_stream_lock_try(struct snd_ff *ff) +{ + int err; + + spin_lock_irq(&ff->lock); + + /* user land lock this */ + if (ff->dev_lock_count < 0) { + err = -EBUSY; + goto end; + } + + /* this is the first time */ + if (ff->dev_lock_count++ == 0) + snd_ff_stream_lock_changed(ff); + err = 0; +end: + spin_unlock_irq(&ff->lock); + return err; +} + +void snd_ff_stream_lock_release(struct snd_ff *ff) +{ + spin_lock_irq(&ff->lock); + + if (WARN_ON(ff->dev_lock_count <= 0)) + goto end; + if (--ff->dev_lock_count == 0) + snd_ff_stream_lock_changed(ff); +end: + spin_unlock_irq(&ff->lock); +} diff --git a/sound/firewire/fireface/ff.c b/sound/firewire/fireface/ff.c index ff62d16fec0f..f57b434144dc 100644 --- a/sound/firewire/fireface/ff.c +++ b/sound/firewire/fireface/ff.c @@ -76,6 +76,10 @@ static void do_registration(struct work_struct *work) if (err < 0) goto error; + err = snd_ff_create_hwdep_devices(ff); + if (err < 0) + goto error; + err = snd_card_register(ff->card); if (err < 0) goto error; @@ -108,6 +112,7 @@ static int snd_ff_probe(struct fw_unit *unit, mutex_init(&ff->mutex); spin_lock_init(&ff->lock); + init_waitqueue_head(&ff->hwdep_wait); ff->spec = (const struct snd_ff_spec *)entry->driver_data; diff --git a/sound/firewire/fireface/ff.h b/sound/firewire/fireface/ff.h index 0d5228c905ea..a143b5ab8b71 100644 --- a/sound/firewire/fireface/ff.h +++ b/sound/firewire/fireface/ff.h @@ -17,12 +17,15 @@ #include #include #include +#include #include #include #include #include #include +#include +#include #include "../lib.h" #include "../amdtp-stream.h" @@ -77,6 +80,10 @@ struct snd_ff { struct amdtp_stream rx_stream; struct fw_iso_resources tx_resources; struct fw_iso_resources rx_resources; + + int dev_lock_count; + bool dev_lock_changed; + wait_queue_head_t hwdep_wait; }; enum snd_ff_clock_src { @@ -122,10 +129,16 @@ int snd_ff_stream_start_duplex(struct snd_ff *ff, unsigned int rate); void snd_ff_stream_stop_duplex(struct snd_ff *ff); void snd_ff_stream_update_duplex(struct snd_ff *ff); +void snd_ff_stream_lock_changed(struct snd_ff *ff); +int snd_ff_stream_lock_try(struct snd_ff *ff); +void snd_ff_stream_lock_release(struct snd_ff *ff); + void snd_ff_proc_init(struct snd_ff *ff); int snd_ff_create_midi_devices(struct snd_ff *ff); int snd_ff_create_pcm_devices(struct snd_ff *ff); +int snd_ff_create_hwdep_devices(struct snd_ff *ff); + #endif -- cgit v1.2.3-71-gd317 From 47a4693e1d3eb09e523c223753fb5a97721f49b8 Mon Sep 17 00:00:00 2001 From: Yi Min Zhao Date: Fri, 10 Mar 2017 09:29:38 +0100 Subject: KVM: s390: introduce AIS capability Introduce a cap to enable AIS facility bit, and add documentation for this capability. Signed-off-by: Yi Min Zhao Signed-off-by: Fei Li Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- Documentation/virtual/kvm/api.txt | 8 ++++++++ arch/s390/kvm/kvm-s390.c | 15 +++++++++++++++ include/uapi/linux/kvm.h | 1 + 3 files changed, 24 insertions(+) (limited to 'include/uapi') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 725250858479..598278cd0dc5 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4110,6 +4110,14 @@ Returns: 0 on success; -EINVAL if the machine does not support Allows use of guarded storage for the KVM guest. +7.10 KVM_CAP_S390_AIS + +Architectures: s390 +Parameters: none + +Allow use of adapter-interruption suppression. +Returns: 0 on success; -EBUSY if a VCPU has already been created. + 8. Other capabilities. ---------------------- diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 977cc1660a83..11b7d6638991 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -380,6 +380,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_SKEYS: case KVM_CAP_S390_IRQ_STATE: case KVM_CAP_S390_USER_INSTR0: + case KVM_CAP_S390_AIS: r = 1; break; case KVM_CAP_S390_MEM_OP: @@ -544,6 +545,20 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", r ? "(not available)" : "(success)"); break; + case KVM_CAP_S390_AIS: + mutex_lock(&kvm->lock); + if (kvm->created_vcpus) { + r = -EBUSY; + } else { + set_kvm_facility(kvm->arch.model.fac_mask, 72); + set_kvm_facility(kvm->arch.model.fac_list, 72); + kvm->arch.float_int.ais_enabled = 1; + r = 0; + } + mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "ENABLE: AIS %s", + r ? "(not available)" : "(success)"); + break; case KVM_CAP_S390_GS: r = -EINVAL; mutex_lock(&kvm->lock); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index c9d522765f8f..33dd2a4e36dc 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -884,6 +884,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_MMU_HASH_V3 135 #define KVM_CAP_IMMEDIATE_EXIT 136 #define KVM_CAP_S390_GS 137 +#define KVM_CAP_S390_AIS 138 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-71-gd317 From e3689e470fa0d9ebaa9d13d069e8d73c8d82a11d Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Tue, 7 Mar 2017 10:02:53 -0700 Subject: drm/msm: Add MSM_PARAM_GMEM_BASE User space needs to know where the GMEM whole starts so that they can set up the addressing correctly. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 3 +++ include/uapi/drm/msm_drm.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 9a92bcf982b8..4cac22633ce4 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -35,6 +35,9 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) case MSM_PARAM_GMEM_SIZE: *value = adreno_gpu->gmem; return 0; + case MSM_PARAM_GMEM_BASE: + *value = 0x100000; + return 0; case MSM_PARAM_CHIP_ID: *value = adreno_gpu->rev.patchid | (adreno_gpu->rev.minor << 8) | diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 4d5d6a2bc59e..a4a189a240d7 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -72,6 +72,7 @@ struct drm_msm_timespec { #define MSM_PARAM_CHIP_ID 0x03 #define MSM_PARAM_MAX_FREQ 0x04 #define MSM_PARAM_TIMESTAMP 0x05 +#define MSM_PARAM_GMEM_BASE 0x06 struct drm_msm_param { __u32 pipe; /* in, MSM_PIPE_x */ -- cgit v1.2.3-71-gd317 From 5daab9db7b65df87da26fd8cfa695fb9546a1ddb Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Wed, 5 Apr 2017 19:00:55 -0700 Subject: New getsockopt option to get socket cookie Introduce a new getsockopt operation to retrieve the socket cookie for a specific socket based on the socket fd. It returns a unique non-decreasing cookie for each socket. Tested: https://android-review.googlesource.com/#/c/358163/ Acked-by: Willem de Bruijn Signed-off-by: Chenbo Feng Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/avr32/include/uapi/asm/socket.h | 2 ++ arch/frv/include/uapi/asm/socket.h | 2 ++ arch/ia64/include/uapi/asm/socket.h | 2 ++ arch/m32r/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 2 ++ arch/mn10300/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/powerpc/include/uapi/asm/socket.h | 2 ++ arch/s390/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ arch/xtensa/include/uapi/asm/socket.h | 2 ++ include/uapi/asm-generic/socket.h | 2 ++ net/core/sock.c | 8 ++++++++ 14 files changed, 34 insertions(+) (limited to 'include/uapi') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 1bb8cac61a28..148d7a32754e 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -103,4 +103,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index f824eeb0f2e4..2434d08ad8d6 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -96,4 +96,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _UAPI__ASM_AVR32_SOCKET_H */ diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index a8ad9bebfc47..1ccf45657472 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -96,5 +96,7 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index 6af3253e4209..2c3f4b48042a 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -105,4 +105,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index e98b6bb897c0..ae6548d29a18 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -96,4 +96,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index ae2b62e39d4d..3418ec9c1c50 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -114,4 +114,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index e4ac1843ee01..4526e92301a6 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -96,4 +96,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index f754c793e82a..514701840bd9 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -95,4 +95,6 @@ #define SO_INCOMING_NAPI_ID 0x4031 +#define SO_COOKIE 0x4032 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index 5f84af7dcb2e..58e2ec0310fc 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -103,4 +103,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 25ac4960e707..e8e5ecf673fd 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -102,4 +102,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index b05513acd589..3f4ad19d9ec7 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -92,6 +92,8 @@ #define SO_INCOMING_NAPI_ID 0x003a +#define SO_COOKIE 0x003b + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index 786606c81edd..1eb6d2fe70d3 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -107,4 +107,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index c98a52fb572a..2b488565599d 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -98,4 +98,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/net/core/sock.c b/net/core/sock.c index 392f9b6f96e2..a06bb7a2a689 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1083,6 +1083,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, union { int val; + u64 val64; struct linger ling; struct timeval tm; } v; @@ -1340,6 +1341,13 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; #endif + case SO_COOKIE: + lv = sizeof(u64); + if (len < lv) + return -EINVAL; + v.val64 = sock_gen_cookie(sk); + break; + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). -- cgit v1.2.3-71-gd317 From 261a0a54d1ebcd04fb4a1c3f971b2c3b7ae06925 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 6 Apr 2017 16:10:42 +0200 Subject: netlink: uapi: use hex numbers for NLM_F_* flags It's rather confusing that the netlink message flags are numbered 1, 2, 4, 8, 16, 32, , 0x100. Make that more understandable by numbering the lower ones with hex constants as well. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/uapi/linux/netlink.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index f3946a27bd07..b2c9c26ea30f 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -50,12 +50,12 @@ struct nlmsghdr { /* Flags values */ -#define NLM_F_REQUEST 1 /* It is request message. */ -#define NLM_F_MULTI 2 /* Multipart message, terminated by NLMSG_DONE */ -#define NLM_F_ACK 4 /* Reply with ack, with zero or error code */ -#define NLM_F_ECHO 8 /* Echo this request */ -#define NLM_F_DUMP_INTR 16 /* Dump was inconsistent due to sequence change */ -#define NLM_F_DUMP_FILTERED 32 /* Dump was filtered as requested */ +#define NLM_F_REQUEST 0x01 /* It is request message. */ +#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */ +#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */ +#define NLM_F_ECHO 0x08 /* Echo this request */ +#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */ +#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */ /* Modifiers to GET request */ #define NLM_F_ROOT 0x100 /* specify tree root */ -- cgit v1.2.3-71-gd317 From f580ff0e404e5aad83f02093cd22f2475cad6e71 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Mar 2017 16:10:32 +0200 Subject: aspeed-lpc-ctrl: include linux/types.h for uapi header The newly added header file triggers a sanity check: usr/include/linux/aspeed-lpc-ctrl.h:44: found __[us]{8,16,32,64} type without #include We should include linux/types.h explicitly to ensure the header can be included from user space. Fixes: 6c4e97678501 ("drivers/misc: Add Aspeed LPC control driver") Signed-off-by: Arnd Bergmann Acked-by: Joel Stanley Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/aspeed-lpc-ctrl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/aspeed-lpc-ctrl.h b/include/uapi/linux/aspeed-lpc-ctrl.h index f96fa995a3f0..c328c976c684 100644 --- a/include/uapi/linux/aspeed-lpc-ctrl.h +++ b/include/uapi/linux/aspeed-lpc-ctrl.h @@ -11,6 +11,7 @@ #define _UAPI_LINUX_ASPEED_LPC_CTRL_H #include +#include /* Window types */ #define ASPEED_LPC_CTRL_WINDOW_FLASH 1 -- cgit v1.2.3-71-gd317 From 3fe17e6826162021d5e9274949571b19fc94826b Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 27 Sep 2016 21:08:05 +0200 Subject: KVM: arm/arm64: Add ARM user space interrupt signaling ABI We have 2 modes for dealing with interrupts in the ARM world. We can either handle them all using hardware acceleration through the vgic or we can emulate a gic in user space and only drive CPU IRQ pins from there. Unfortunately, when driving IRQs from user space, we never tell user space about events from devices emulated inside the kernel, which may result in interrupt line state changes, so we lose out on for example timer and PMU events if we run with user space gic emulation. Define an ABI to publish such device output levels to userspace. Reviewed-by: Alexander Graf Reviewed-by: Marc Zyngier Signed-off-by: Alexander Graf Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 42 +++++++++++++++++++++++++++++++++++++++ arch/arm/include/uapi/asm/kvm.h | 2 ++ arch/arm64/include/uapi/asm/kvm.h | 2 ++ include/uapi/linux/kvm.h | 8 ++++++++ 4 files changed, 54 insertions(+) (limited to 'include/uapi') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 3c248f772ae6..3b4e76e5201e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4147,3 +4147,45 @@ This capability, if KVM_CHECK_EXTENSION indicates that it is available, means that that the kernel can support guests using the hashed page table MMU defined in Power ISA V3.00 (as implemented in the POWER9 processor), including in-memory segment tables. + + +8.5 KVM_CAP_ARM_USER_IRQ + +Architectures: arm, arm64 +This capability, if KVM_CHECK_EXTENSION indicates that it is available, means +that if userspace creates a VM without an in-kernel interrupt controller, it +will be notified of changes to the output level of in-kernel emulated devices, +which can generate virtual interrupts, presented to the VM. +For such VMs, on every return to userspace, the kernel +updates the vcpu's run->s.regs.device_irq_level field to represent the actual +output level of the device. + +Whenever kvm detects a change in the device output level, kvm guarantees at +least one return to userspace before running the VM. This exit could either +be a KVM_EXIT_INTR or any other exit event, like KVM_EXIT_MMIO. This way, +userspace can always sample the device output level and re-compute the state of +the userspace interrupt controller. Userspace should always check the state +of run->s.regs.device_irq_level on every kvm exit. +The value in run->s.regs.device_irq_level can represent both level and edge +triggered interrupt signals, depending on the device. Edge triggered interrupt +signals will exit to userspace with the bit in run->s.regs.device_irq_level +set exactly once per edge signal. + +The field run->s.regs.device_irq_level is available independent of +run->kvm_valid_regs or run->kvm_dirty_regs bits. + +If KVM_CAP_ARM_USER_IRQ is supported, the KVM_CHECK_EXTENSION ioctl returns a +number larger than 0 indicating the version of this capability is implemented +and thereby which bits in in run->s.regs.device_irq_level can signal values. + +Currently the following bits are defined for the device_irq_level bitmap: + + KVM_CAP_ARM_USER_IRQ >= 1: + + KVM_ARM_DEV_EL1_VTIMER - EL1 virtual timer + KVM_ARM_DEV_EL1_PTIMER - EL1 physical timer + KVM_ARM_DEV_PMU - ARM PMU overflow interrupt signal + +Future versions of kvm may implement additional events. These will get +indicated by returning a higher number from KVM_CHECK_EXTENSION and will be +listed above. diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 6ebd3e6a1fd1..a5838d605e7b 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -114,6 +114,8 @@ struct kvm_debug_exit_arch { }; struct kvm_sync_regs { + /* Used with KVM_CAP_ARM_USER_IRQ */ + __u64 device_irq_level; }; struct kvm_arch_memory_slot { diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index c2860358ae3e..cd6bea495e63 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -143,6 +143,8 @@ struct kvm_debug_exit_arch { #define KVM_GUESTDBG_USE_HW (1 << 17) struct kvm_sync_regs { + /* Used with KVM_CAP_ARM_USER_IRQ */ + __u64 device_irq_level; }; struct kvm_arch_memory_slot { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f51d5082a377..6d6b9b237f0b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -883,6 +883,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_MMU_RADIX 134 #define KVM_CAP_PPC_MMU_HASH_V3 135 #define KVM_CAP_IMMEDIATE_EXIT 136 +#define KVM_CAP_ARM_USER_IRQ 137 #ifdef KVM_CAP_IRQ_ROUTING @@ -1354,4 +1355,11 @@ struct kvm_assigned_msix_entry { #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) +/* Available with KVM_CAP_ARM_USER_IRQ */ + +/* Bits for run->s.regs.device_irq_level */ +#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) +#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) +#define KVM_ARM_DEV_PMU (1 << 2) + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3-71-gd317 From bf74b20d00b13919db7ae5d1015636e76f56f6ae Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 9 Apr 2017 14:45:21 -0700 Subject: Revert "rtnl: Add support for netdev event to link messages" This reverts commit def12888c161e6fec0702e5ec9c3962846e3a21d. As per discussion between Roopa Prabhu and David Ahern, it is advisable that we instead have the code collect the setlink triggered events into a bitmask emitted in the IFLA_EVENT netlink attribute. Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 3 +- include/uapi/linux/if_link.h | 21 ---------- net/core/dev.c | 2 +- net/core/rtnetlink.c | 92 +++++--------------------------------------- 4 files changed, 11 insertions(+), 107 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 0459018173cf..57e54847b0b9 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -18,8 +18,7 @@ extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags); struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, - unsigned change, unsigned long event, - gfp_t flags); + unsigned change, gfp_t flags); void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 97f6d302f627..8b405afb2376 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -157,7 +157,6 @@ enum { IFLA_GSO_MAX_SIZE, IFLA_PAD, IFLA_XDP, - IFLA_EVENT, __IFLA_MAX }; @@ -900,24 +899,4 @@ enum { #define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1) -enum { - IFLA_EVENT_UNSPEC, - IFLA_EVENT_REBOOT, - IFLA_EVENT_CHANGE_MTU, - IFLA_EVENT_CHANGE_ADDR, - IFLA_EVENT_CHANGE_NAME, - IFLA_EVENT_FEAT_CHANGE, - IFLA_EVENT_BONDING_FAILOVER, - IFLA_EVENT_POST_TYPE_CHANGE, - IFLA_EVENT_NOTIFY_PEERS, - IFLA_EVENT_CHANGE_UPPER, - IFLA_EVENT_RESEND_IGMP, - IFLA_EVENT_PRE_CHANGE_MTU, - IFLA_EVENT_CHANGE_INFO_DATA, - IFLA_EVENT_PRE_CHANGE_UPPER, - IFLA_EVENT_CHANGE_LOWER_STATE, - IFLA_EVENT_UDP_TUNNEL_PUSH_INFO, - IFLA_EVENT_CHANGE_TX_QUEUE_LEN, -}; - #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/net/core/dev.c b/net/core/dev.c index 7efb4178ffef..ef9fe60ee294 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6840,7 +6840,7 @@ static void rollback_registered_many(struct list_head *head) if (!dev->rtnl_link_ops || dev->rtnl_link_state == RTNL_LINK_INITIALIZED) - skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, + skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, GFP_KERNEL); /* diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b2bd4c9ee860..58419da7961b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -944,7 +944,6 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ + rtnl_xdp_size(dev) /* IFLA_XDP */ - + nla_total_size(4) /* IFLA_EVENT */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ } @@ -1277,70 +1276,9 @@ err_cancel: return err; } -static int rtnl_fill_link_event(struct sk_buff *skb, unsigned long event) -{ - u32 rtnl_event; - - switch (event) { - case NETDEV_REBOOT: - rtnl_event = IFLA_EVENT_REBOOT; - break; - case NETDEV_CHANGEMTU: - rtnl_event = IFLA_EVENT_CHANGE_MTU; - break; - case NETDEV_CHANGEADDR: - rtnl_event = IFLA_EVENT_CHANGE_ADDR; - break; - case NETDEV_CHANGENAME: - rtnl_event = IFLA_EVENT_CHANGE_NAME; - break; - case NETDEV_FEAT_CHANGE: - rtnl_event = IFLA_EVENT_FEAT_CHANGE; - break; - case NETDEV_BONDING_FAILOVER: - rtnl_event = IFLA_EVENT_BONDING_FAILOVER; - break; - case NETDEV_POST_TYPE_CHANGE: - rtnl_event = IFLA_EVENT_POST_TYPE_CHANGE; - break; - case NETDEV_NOTIFY_PEERS: - rtnl_event = IFLA_EVENT_NOTIFY_PEERS; - break; - case NETDEV_CHANGEUPPER: - rtnl_event = IFLA_EVENT_CHANGE_UPPER; - break; - case NETDEV_RESEND_IGMP: - rtnl_event = IFLA_EVENT_RESEND_IGMP; - break; - case NETDEV_PRECHANGEMTU: - rtnl_event = IFLA_EVENT_PRE_CHANGE_MTU; - break; - case NETDEV_CHANGEINFODATA: - rtnl_event = IFLA_EVENT_CHANGE_INFO_DATA; - break; - case NETDEV_PRECHANGEUPPER: - rtnl_event = IFLA_EVENT_PRE_CHANGE_UPPER; - break; - case NETDEV_CHANGELOWERSTATE: - rtnl_event = IFLA_EVENT_CHANGE_LOWER_STATE; - break; - case NETDEV_UDP_TUNNEL_PUSH_INFO: - rtnl_event = IFLA_EVENT_UDP_TUNNEL_PUSH_INFO; - break; - case NETDEV_CHANGE_TX_QUEUE_LEN: - rtnl_event = IFLA_EVENT_CHANGE_TX_QUEUE_LEN; - break; - default: - return 0; - } - - return nla_put_u32(skb, IFLA_EVENT, rtnl_event); -} - static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, - unsigned int flags, u32 ext_filter_mask, - unsigned long event) + unsigned int flags, u32 ext_filter_mask) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; @@ -1389,9 +1327,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) goto nla_put_failure; - if (rtnl_fill_link_event(skb, event)) - goto nla_put_failure; - if (rtnl_fill_link_ifmap(skb, dev)) goto nla_put_failure; @@ -1526,7 +1461,6 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINK_NETNSID] = { .type = NLA_S32 }, [IFLA_PROTO_DOWN] = { .type = NLA_U8 }, [IFLA_XDP] = { .type = NLA_NESTED }, - [IFLA_EVENT] = { .type = NLA_U32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -1685,7 +1619,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, flags, - ext_filter_mask, 0); + ext_filter_mask); /* If we ran out of room on the first message, * we're in trouble */ @@ -2776,7 +2710,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh) return -ENOBUFS; err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0); + nlh->nlmsg_seq, 0, 0, ext_filter_mask); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); @@ -2848,8 +2782,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) } struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, - unsigned int change, - unsigned long event, gfp_t flags) + unsigned int change, gfp_t flags) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -2860,7 +2793,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, if (skb == NULL) goto errout; - err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event); + err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -2881,25 +2814,18 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags) rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags); } -static void rtmsg_ifinfo_event(int type, struct net_device *dev, - unsigned int change, unsigned long event, - gfp_t flags) +void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, + gfp_t flags) { struct sk_buff *skb; if (dev->reg_state != NETREG_REGISTERED) return; - skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags); + skb = rtmsg_ifinfo_build_skb(type, dev, change, flags); if (skb) rtmsg_ifinfo_send(skb, dev, flags); } - -void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, - gfp_t flags) -{ - rtmsg_ifinfo_event(type, dev, change, 0, flags); -} EXPORT_SYMBOL(rtmsg_ifinfo); static int nlmsg_populate_fdb_fill(struct sk_buff *skb, @@ -4206,7 +4132,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_CHANGELOWERSTATE: case NETDEV_UDP_TUNNEL_PUSH_INFO: case NETDEV_CHANGE_TX_QUEUE_LEN: - rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, event, GFP_KERNEL); + rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); break; default: break; -- cgit v1.2.3-71-gd317 From 3c60a531b9e175693a2d61f6bfd7ffacce4146cd Mon Sep 17 00:00:00 2001 From: Alexander Alemayhu Date: Sat, 8 Apr 2017 22:08:10 +0200 Subject: bpf: fix comment typo o s/bpf_bpf_get_socket_cookie/bpf_get_socket_cookie Signed-off-by: Alexander Alemayhu Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 2 +- tools/include/uapi/linux/bpf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a1d95386f562..1e062bb54eec 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -472,7 +472,7 @@ union bpf_attr { * > 0 length of the string including the trailing NUL on success * < 0 error * - * u64 bpf_bpf_get_socket_cookie(skb) + * u64 bpf_get_socket_cookie(skb) * Get the cookie for the socket stored inside sk_buff. * @skb: pointer to skb * Return: 8 Bytes non-decreasing number on success or 0 if the socket diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a1d95386f562..1e062bb54eec 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -472,7 +472,7 @@ union bpf_attr { * > 0 length of the string including the trailing NUL on success * < 0 error * - * u64 bpf_bpf_get_socket_cookie(skb) + * u64 bpf_get_socket_cookie(skb) * Get the cookie for the socket stored inside sk_buff. * @skb: pointer to skb * Return: 8 Bytes non-decreasing number on success or 0 if the socket -- cgit v1.2.3-71-gd317 From 5df082e2312c73e6f775c046286455acea9371ea Mon Sep 17 00:00:00 2001 From: Evgeni Raikhel Date: Thu, 2 Mar 2017 20:43:19 -0300 Subject: [media] Documentation: Intel SR300 Depth camera INZI format Provide the frame structure and data layout of V4L2-PIX-FMT-INZI format utilized by Intel SR300 Depth camera. Signed-off-by: Evgeni Raikhel Acked-by: Hans Verkuil Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- Documentation/media/uapi/v4l/depth-formats.rst | 1 + Documentation/media/uapi/v4l/pixfmt-inzi.rst | 81 ++++++++++++++++++++++++++ drivers/media/v4l2-core/v4l2-ioctl.c | 1 + include/uapi/linux/videodev2.h | 1 + 4 files changed, 84 insertions(+) create mode 100644 Documentation/media/uapi/v4l/pixfmt-inzi.rst (limited to 'include/uapi') diff --git a/Documentation/media/uapi/v4l/depth-formats.rst b/Documentation/media/uapi/v4l/depth-formats.rst index 82f183870aae..d1641e9687a6 100644 --- a/Documentation/media/uapi/v4l/depth-formats.rst +++ b/Documentation/media/uapi/v4l/depth-formats.rst @@ -12,4 +12,5 @@ Depth data provides distance to points, mapped onto the image plane .. toctree:: :maxdepth: 1 + pixfmt-inzi pixfmt-z16 diff --git a/Documentation/media/uapi/v4l/pixfmt-inzi.rst b/Documentation/media/uapi/v4l/pixfmt-inzi.rst new file mode 100644 index 000000000000..9849e799f205 --- /dev/null +++ b/Documentation/media/uapi/v4l/pixfmt-inzi.rst @@ -0,0 +1,81 @@ +.. -*- coding: utf-8; mode: rst -*- + +.. _V4L2-PIX-FMT-INZI: + +************************** +V4L2_PIX_FMT_INZI ('INZI') +************************** + +Infrared 10-bit linked with Depth 16-bit images + + +Description +=========== + +Proprietary multi-planar format used by Intel SR300 Depth cameras, comprise of +Infrared image followed by Depth data. The pixel definition is 32-bpp, +with the Depth and Infrared Data split into separate continuous planes of +identical dimensions. + + + +The first plane - Infrared data - is stored according to +:ref:`V4L2_PIX_FMT_Y10 ` greyscale format. +Each pixel is 16-bit cell, with actual data stored in the 10 LSBs +with values in range 0 to 1023. +The six remaining MSBs are padded with zeros. + + +The second plane provides 16-bit per-pixel Depth data arranged in +:ref:`V4L2-PIX-FMT-Z16 ` format. + + +**Frame Structure.** +Each cell is a 16-bit word with more significant data stored at higher +memory address (byte order is little-endian). + +.. raw:: latex + + \newline\newline\begin{adjustbox}{width=\columnwidth} + +.. tabularcolumns:: |p{4.0cm}|p{4.0cm}|p{4.0cm}|p{4.0cm}|p{4.0cm}|p{4.0cm}| + +.. flat-table:: + :header-rows: 0 + :stub-columns: 1 + :widths: 1 1 1 1 1 1 + + * - Ir\ :sub:`0,0` + - Ir\ :sub:`0,1` + - Ir\ :sub:`0,2` + - ... + - ... + - ... + * - :cspan:`5` ... + * - :cspan:`5` Infrared Data + * - :cspan:`5` ... + * - ... + - ... + - ... + - Ir\ :sub:`n-1,n-3` + - Ir\ :sub:`n-1,n-2` + - Ir\ :sub:`n-1,n-1` + * - Depth\ :sub:`0,0` + - Depth\ :sub:`0,1` + - Depth\ :sub:`0,2` + - ... + - ... + - ... + * - :cspan:`5` ... + * - :cspan:`5` Depth Data + * - :cspan:`5` ... + * - ... + - ... + - ... + - Depth\ :sub:`n-1,n-3` + - Depth\ :sub:`n-1,n-2` + - Depth\ :sub:`n-1,n-1` + +.. raw:: latex + + \end{adjustbox}\newline\newline diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 0c3f238a2e76..93e8f42b0d63 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -1131,6 +1131,7 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt) case V4L2_PIX_FMT_Y8I: descr = "Interleaved 8-bit Greyscale"; break; case V4L2_PIX_FMT_Y12I: descr = "Interleaved 12-bit Greyscale"; break; case V4L2_PIX_FMT_Z16: descr = "16-bit Depth"; break; + case V4L2_PIX_FMT_INZI: descr = "Planar 10:16 Greyscale Depth"; break; case V4L2_PIX_FMT_PAL8: descr = "8-bit Palette"; break; case V4L2_PIX_FMT_UV8: descr = "8-bit Chrominance UV 4-4"; break; case V4L2_PIX_FMT_YVU410: descr = "Planar YVU 4:1:0"; break; diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 316be62f3a45..16edbd9eeca6 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -660,6 +660,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_Y12I v4l2_fourcc('Y', '1', '2', 'I') /* Greyscale 12-bit L/R interleaved */ #define V4L2_PIX_FMT_Z16 v4l2_fourcc('Z', '1', '6', ' ') /* Depth data 16-bit */ #define V4L2_PIX_FMT_MT21C v4l2_fourcc('M', 'T', '2', '1') /* Mediatek compressed block mode */ +#define V4L2_PIX_FMT_INZI v4l2_fourcc('I', 'N', 'Z', 'I') /* Intel Planar Greyscale 10-bit and Depth 16-bit */ /* SDR formats - used only for Software Defined Radio devices */ #define V4L2_SDR_FMT_CU8 v4l2_fourcc('C', 'U', '0', '8') /* IQ u8 */ -- cgit v1.2.3-71-gd317 From 4473710df1f8779c59b33737eeaa151596907761 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 6 Apr 2017 16:16:08 +0800 Subject: crypto: user - Prepare for CRYPTO_MAX_ALG_NAME expansion This patch hard-codes CRYPTO_MAX_NAME in the user-space API to 64, which is the current value of CRYPTO_MAX_ALG_NAME. This patch also replaces all remaining occurences of CRYPTO_MAX_ALG_NAME in the user-space API with CRYPTO_MAX_NAME. This way the user-space API will not be modified when we raise the value of CRYPTO_MAX_ALG_NAME. Furthermore, the code has been updated to handle names longer than the user-space API. They will be truncated. Signed-off-by: Herbert Xu Acked-by: Alexander Sverdlin Tested-by: Alexander Sverdlin --- crypto/crypto_user.c | 18 +++++++++--------- include/uapi/linux/cryptouser.h | 10 +++++----- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/uapi') diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index a90404a0c5ff..89acaab1d909 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -83,7 +83,7 @@ static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_cipher rcipher; - strncpy(rcipher.type, "cipher", sizeof(rcipher.type)); + strlcpy(rcipher.type, "cipher", sizeof(rcipher.type)); rcipher.blocksize = alg->cra_blocksize; rcipher.min_keysize = alg->cra_cipher.cia_min_keysize; @@ -102,7 +102,7 @@ static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_comp rcomp; - strncpy(rcomp.type, "compression", sizeof(rcomp.type)); + strlcpy(rcomp.type, "compression", sizeof(rcomp.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS, sizeof(struct crypto_report_comp), &rcomp)) goto nla_put_failure; @@ -116,7 +116,7 @@ static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_acomp racomp; - strncpy(racomp.type, "acomp", sizeof(racomp.type)); + strlcpy(racomp.type, "acomp", sizeof(racomp.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP, sizeof(struct crypto_report_acomp), &racomp)) @@ -131,7 +131,7 @@ static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_akcipher rakcipher; - strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); + strlcpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER, sizeof(struct crypto_report_akcipher), &rakcipher)) @@ -146,7 +146,7 @@ static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_kpp rkpp; - strncpy(rkpp.type, "kpp", sizeof(rkpp.type)); + strlcpy(rkpp.type, "kpp", sizeof(rkpp.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_KPP, sizeof(struct crypto_report_kpp), &rkpp)) @@ -160,10 +160,10 @@ nla_put_failure: static int crypto_report_one(struct crypto_alg *alg, struct crypto_user_alg *ualg, struct sk_buff *skb) { - strncpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); - strncpy(ualg->cru_driver_name, alg->cra_driver_name, + strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); + strlcpy(ualg->cru_driver_name, alg->cra_driver_name, sizeof(ualg->cru_driver_name)); - strncpy(ualg->cru_module_name, module_name(alg->cra_module), + strlcpy(ualg->cru_module_name, module_name(alg->cra_module), sizeof(ualg->cru_module_name)); ualg->cru_type = 0; @@ -176,7 +176,7 @@ static int crypto_report_one(struct crypto_alg *alg, if (alg->cra_flags & CRYPTO_ALG_LARVAL) { struct crypto_report_larval rl; - strncpy(rl.type, "larval", sizeof(rl.type)); + strlcpy(rl.type, "larval", sizeof(rl.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_LARVAL, sizeof(struct crypto_report_larval), &rl)) goto nla_put_failure; diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 11d21fce14d6..b4def5c630e7 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -31,7 +31,7 @@ enum { #define CRYPTO_MSG_MAX (__CRYPTO_MSG_MAX - 1) #define CRYPTO_NR_MSGTYPES (CRYPTO_MSG_MAX + 1 - CRYPTO_MSG_BASE) -#define CRYPTO_MAX_NAME CRYPTO_MAX_ALG_NAME +#define CRYPTO_MAX_NAME 64 /* Netlink message attributes. */ enum crypto_attr_type_t { @@ -53,9 +53,9 @@ enum crypto_attr_type_t { }; struct crypto_user_alg { - char cru_name[CRYPTO_MAX_ALG_NAME]; - char cru_driver_name[CRYPTO_MAX_ALG_NAME]; - char cru_module_name[CRYPTO_MAX_ALG_NAME]; + char cru_name[CRYPTO_MAX_NAME]; + char cru_driver_name[CRYPTO_MAX_NAME]; + char cru_module_name[CRYPTO_MAX_NAME]; __u32 cru_type; __u32 cru_mask; __u32 cru_refcnt; @@ -73,7 +73,7 @@ struct crypto_report_hash { }; struct crypto_report_cipher { - char type[CRYPTO_MAX_ALG_NAME]; + char type[CRYPTO_MAX_NAME]; unsigned int blocksize; unsigned int min_keysize; unsigned int max_keysize; -- cgit v1.2.3-71-gd317 From 04dffe11054f29716cf8058c6bf9d142d44aaaa5 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 5 Nov 2016 08:39:58 -0200 Subject: [media] serio.h: add SERIO_RAINSHADOW_CEC ID Add a new serio ID for the RainShadow Tech USB HDMI CEC adapter. Signed-off-by: Hans Verkuil Acked-by: Dmitry Torokhov Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/serio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/serio.h b/include/uapi/linux/serio.h index ccd0ccd00f47..ac217c6f0151 100644 --- a/include/uapi/linux/serio.h +++ b/include/uapi/linux/serio.h @@ -80,5 +80,6 @@ #define SERIO_WACOM_IV 0x3e #define SERIO_EGALAX 0x3f #define SERIO_PULSE8_CEC 0x40 +#define SERIO_RAINSHADOW_CEC 0x41 #endif /* _UAPI_SERIO_H */ -- cgit v1.2.3-71-gd317 From 86c7eb411b2450dafee3b0ec5c4c191f83e6511f Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 10 Apr 2017 04:51:44 -0300 Subject: [media] videodev2.h: fix outdated comment The XV601/709 Y'CbCr encoding was changed to limited range, but the comment still indicates full range. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 16edbd9eeca6..8d351f5df2aa 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -362,8 +362,7 @@ enum v4l2_quantization { /* * The default for R'G'B' quantization is always full range, except * for the BT2020 colorspace. For Y'CbCr the quantization is always - * limited range, except for COLORSPACE_JPEG, XV601 or XV709: those - * are full range. + * limited range, except for COLORSPACE_JPEG: this is full range. */ V4L2_QUANTIZATION_DEFAULT = 0, V4L2_QUANTIZATION_FULL_RANGE = 1, -- cgit v1.2.3-71-gd317 From 2008c1544c73d5190f81ef1790fa5bd2fade5bd0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 4 Apr 2017 21:09:20 +0300 Subject: Revert "virtio_pci: don't duplicate the msix_enable flag in struct pci_dev" This reverts commit 53a020c661741f3b87ad3ac6fa545088aaebac9b. The cleanup seems to be one of the changes that broke hybernation for some users. We are still not sure why but revert helps. Tested-by: Mike Galbraith Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 5 +++-- drivers/virtio/virtio_pci_common.h | 2 ++ drivers/virtio/virtio_pci_legacy.c | 2 +- drivers/virtio/virtio_pci_modern.c | 2 +- include/uapi/linux/virtio_pci.h | 2 +- 5 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 4608fd9aaa6c..3921b0a2439e 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -125,7 +125,7 @@ void vp_del_vqs(struct virtio_device *vdev) vp_remove_vqs(vdev); - if (vp_dev->pci_dev->msix_enabled) { + if (vp_dev->msix_enabled) { for (i = 0; i < vp_dev->msix_vectors; i++) free_cpumask_var(vp_dev->msix_affinity_masks[i]); @@ -249,6 +249,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, allocated_vectors++; } + vp_dev->msix_enabled = 1; return 0; out_remove_vqs: @@ -343,7 +344,7 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu) if (!vq->callback) return -EINVAL; - if (vp_dev->pci_dev->msix_enabled) { + if (vp_dev->msix_enabled) { int vec = vp_dev->msix_vector_map[vq->index]; struct cpumask *mask = vp_dev->msix_affinity_masks[vec]; unsigned int irq = pci_irq_vector(vp_dev->pci_dev, vec); diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index ac8c9d788964..c8074997fd28 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -64,6 +64,8 @@ struct virtio_pci_device { /* the IO mapping for the PCI config space */ void __iomem *ioaddr; + /* MSI-X support */ + int msix_enabled; cpumask_var_t *msix_affinity_masks; /* Name strings for interrupts. This size should be enough, * and I'm too lazy to allocate each name separately. */ diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index f7362c5fe18a..5dd01f09608b 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -165,7 +165,7 @@ static void del_vq(struct virtqueue *vq) iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); - if (vp_dev->pci_dev->msix_enabled) { + if (vp_dev->msix_enabled) { iowrite16(VIRTIO_MSI_NO_VECTOR, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); /* Flush the write out to device */ diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 7bc3004b840e..7ce36daccc31 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -411,7 +411,7 @@ static void del_vq(struct virtqueue *vq) vp_iowrite16(vq->index, &vp_dev->common->queue_select); - if (vp_dev->pci_dev->msix_enabled) { + if (vp_dev->msix_enabled) { vp_iowrite16(VIRTIO_MSI_NO_VECTOR, &vp_dev->common->queue_msix_vector); /* Flush the write out to device */ diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 15b4385a2be1..90007a1abcab 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -79,7 +79,7 @@ * configuration space */ #define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24 : 20) /* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */ -#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->pci_dev->msix_enabled) +#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled) /* Virtio ABI version, this must match exactly */ #define VIRTIO_PCI_ABI_VERSION 0 -- cgit v1.2.3-71-gd317 From 1f7da4f87a594764c3a9d7f0d67ae5f996cbc3c9 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Fri, 17 Feb 2017 11:20:38 +0000 Subject: usb: gadget: f_fs: Fix ExtCompat documentation in uapi header The code was fixed in commit 53642399aa71 ("usb: gadget: f_fs: Fix wrong check on reserved1 wof OS_DESC_EXT_COMPAT") but the in-header documentation kept referencing 0 as the expected value. Reference 1 instead as per original commit message. Signed-off-by: Vincent Pelletier Signed-off-by: Felipe Balbi --- include/uapi/linux/usb/functionfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h index b2a31a55a612..062606f02309 100644 --- a/include/uapi/linux/usb/functionfs.h +++ b/include/uapi/linux/usb/functionfs.h @@ -158,7 +158,7 @@ struct usb_ext_prop_desc { * |-----+-----------------------+------+-------------------------------------| * | 0 | bFirstInterfaceNumber | U8 | index of the interface or of the 1st| * | | | | interface in an IAD group | - * | 1 | Reserved | U8 | 0 | + * | 1 | Reserved | U8 | 1 | * | 2 | CompatibleID | U8[8]| compatible ID string | * | 10 | SubCompatibleID | U8[8]| subcompatible ID string | * | 18 | Reserved | U8[6]| 0 | -- cgit v1.2.3-71-gd317 From 52eabba5bcdb2853dec6ef007ba427b092034738 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 2 Mar 2017 16:24:34 -0700 Subject: switchtec: Add IOCTLs to the Switchtec driver Add a couple of special IOCTLs to: * Inform userspace of firmware partition locations * Pass event counts and allow userspace to wait on events * Translate PFF numbers used by the switch to port numbers [Dan Carpenter : fix off-by-one in ioctl_event_ctl()] Tested-by: Krishna Dhulipala Signed-off-by: Logan Gunthorpe Signed-off-by: Stephen Bates Signed-off-by: Bjorn Helgaas Reviewed-by: Wei Zhang Reviewed-by: Jens Axboe --- Documentation/ioctl/ioctl-number.txt | 1 + Documentation/switchtec.txt | 27 ++ MAINTAINERS | 1 + drivers/pci/switch/switchtec.c | 481 +++++++++++++++++++++++++++++++++++ include/uapi/linux/switchtec_ioctl.h | 132 ++++++++++ 5 files changed, 642 insertions(+) create mode 100644 include/uapi/linux/switchtec_ioctl.h (limited to 'include/uapi') diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 08244bea5048..0682bd3eaa8a 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -191,6 +191,7 @@ Code Seq#(hex) Include File Comments 'W' 00-1F linux/watchdog.h conflict! 'W' 00-1F linux/wanrouter.h conflict! (pre 3.9) 'W' 00-3F sound/asound.h conflict! +'W' 40-5F drivers/pci/switch/switchtec.c 'X' all fs/xfs/xfs_fs.h conflict! and fs/xfs/linux-2.6/xfs_ioctl32.h and include/linux/falloc.h diff --git a/Documentation/switchtec.txt b/Documentation/switchtec.txt index 4bced4c78446..a0a9c7b3d4d5 100644 --- a/Documentation/switchtec.txt +++ b/Documentation/switchtec.txt @@ -51,3 +51,30 @@ The char device has the following semantics: * The poll call will also be supported for userspace applications that need to do other things while waiting for the command to complete. + +The following IOCTLs are also supported by the device: + +* SWITCHTEC_IOCTL_FLASH_INFO - Retrieve firmware length and number + of partitions in the device. + +* SWITCHTEC_IOCTL_FLASH_PART_INFO - Retrieve address and lengeth for + any specified partition in flash. + +* SWITCHTEC_IOCTL_EVENT_SUMMARY - Read a structure of bitmaps + indicating all uncleared events. + +* SWITCHTEC_IOCTL_EVENT_CTL - Get the current count, clear and set flags + for any event. This ioctl takes in a switchtec_ioctl_event_ctl struct + with the event_id, index and flags set (index being the partition or PFF + number for non-global events). It returns whether the event has + occurred, the number of times and any event specific data. The flags + can be used to clear the count or enable and disable actions to + happen when the event occurs. + By using the SWITCHTEC_IOCTL_EVENT_FLAG_EN_POLL flag, + you can set an event to trigger a poll command to return with + POLLPRI. In this way, userspace can wait for events to occur. + +* SWITCHTEC_IOCTL_PFF_TO_PORT and SWITCHTEC_IOCTL_PORT_TO_PFF convert + between PCI Function Framework number (used by the event system) + and Switchtec Logic Port ID and Partition number (which is more + user friendly). diff --git a/MAINTAINERS b/MAINTAINERS index 76ccc5a805dd..3744019a8853 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9664,6 +9664,7 @@ S: Maintained F: Documentation/switchtec.txt F: Documentation/ABI/testing/sysfs-class-switchtec F: drivers/pci/switch/switchtec* +F: include/uapi/linux/switchtec_ioctl.h PCI DRIVER FOR NVIDIA TEGRA M: Thierry Reding diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index 15ff61c7718b..cc6e085008fb 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -13,6 +13,8 @@ * */ +#include + #include #include #include @@ -778,6 +780,431 @@ static unsigned int switchtec_dev_poll(struct file *filp, poll_table *wait) return ret; } +static int ioctl_flash_info(struct switchtec_dev *stdev, + struct switchtec_ioctl_flash_info __user *uinfo) +{ + struct switchtec_ioctl_flash_info info = {0}; + struct flash_info_regs __iomem *fi = stdev->mmio_flash_info; + + info.flash_length = ioread32(&fi->flash_length); + info.num_partitions = SWITCHTEC_IOCTL_NUM_PARTITIONS; + + if (copy_to_user(uinfo, &info, sizeof(info))) + return -EFAULT; + + return 0; +} + +static void set_fw_info_part(struct switchtec_ioctl_flash_part_info *info, + struct partition_info __iomem *pi) +{ + info->address = ioread32(&pi->address); + info->length = ioread32(&pi->length); +} + +static int ioctl_flash_part_info(struct switchtec_dev *stdev, + struct switchtec_ioctl_flash_part_info __user *uinfo) +{ + struct switchtec_ioctl_flash_part_info info = {0}; + struct flash_info_regs __iomem *fi = stdev->mmio_flash_info; + u32 active_addr = -1; + + if (copy_from_user(&info, uinfo, sizeof(info))) + return -EFAULT; + + switch (info.flash_partition) { + case SWITCHTEC_IOCTL_PART_CFG0: + active_addr = ioread32(&fi->active_cfg); + set_fw_info_part(&info, &fi->cfg0); + break; + case SWITCHTEC_IOCTL_PART_CFG1: + active_addr = ioread32(&fi->active_cfg); + set_fw_info_part(&info, &fi->cfg1); + break; + case SWITCHTEC_IOCTL_PART_IMG0: + active_addr = ioread32(&fi->active_img); + set_fw_info_part(&info, &fi->img0); + break; + case SWITCHTEC_IOCTL_PART_IMG1: + active_addr = ioread32(&fi->active_img); + set_fw_info_part(&info, &fi->img1); + break; + case SWITCHTEC_IOCTL_PART_NVLOG: + set_fw_info_part(&info, &fi->nvlog); + break; + case SWITCHTEC_IOCTL_PART_VENDOR0: + set_fw_info_part(&info, &fi->vendor[0]); + break; + case SWITCHTEC_IOCTL_PART_VENDOR1: + set_fw_info_part(&info, &fi->vendor[1]); + break; + case SWITCHTEC_IOCTL_PART_VENDOR2: + set_fw_info_part(&info, &fi->vendor[2]); + break; + case SWITCHTEC_IOCTL_PART_VENDOR3: + set_fw_info_part(&info, &fi->vendor[3]); + break; + case SWITCHTEC_IOCTL_PART_VENDOR4: + set_fw_info_part(&info, &fi->vendor[4]); + break; + case SWITCHTEC_IOCTL_PART_VENDOR5: + set_fw_info_part(&info, &fi->vendor[5]); + break; + case SWITCHTEC_IOCTL_PART_VENDOR6: + set_fw_info_part(&info, &fi->vendor[6]); + break; + case SWITCHTEC_IOCTL_PART_VENDOR7: + set_fw_info_part(&info, &fi->vendor[7]); + break; + default: + return -EINVAL; + } + + if (info.address == active_addr) + info.active = 1; + + if (copy_to_user(uinfo, &info, sizeof(info))) + return -EFAULT; + + return 0; +} + +static int ioctl_event_summary(struct switchtec_dev *stdev, + struct switchtec_user *stuser, + struct switchtec_ioctl_event_summary __user *usum) +{ + struct switchtec_ioctl_event_summary s = {0}; + int i; + u32 reg; + + s.global = ioread32(&stdev->mmio_sw_event->global_summary); + s.part_bitmap = ioread32(&stdev->mmio_sw_event->part_event_bitmap); + s.local_part = ioread32(&stdev->mmio_part_cfg->part_event_summary); + + for (i = 0; i < stdev->partition_count; i++) { + reg = ioread32(&stdev->mmio_part_cfg_all[i].part_event_summary); + s.part[i] = reg; + } + + for (i = 0; i < SWITCHTEC_MAX_PFF_CSR; i++) { + reg = ioread16(&stdev->mmio_pff_csr[i].vendor_id); + if (reg != MICROSEMI_VENDOR_ID) + break; + + reg = ioread32(&stdev->mmio_pff_csr[i].pff_event_summary); + s.pff[i] = reg; + } + + if (copy_to_user(usum, &s, sizeof(s))) + return -EFAULT; + + stuser->event_cnt = atomic_read(&stdev->event_cnt); + + return 0; +} + +static u32 __iomem *global_ev_reg(struct switchtec_dev *stdev, + size_t offset, int index) +{ + return (void __iomem *)stdev->mmio_sw_event + offset; +} + +static u32 __iomem *part_ev_reg(struct switchtec_dev *stdev, + size_t offset, int index) +{ + return (void __iomem *)&stdev->mmio_part_cfg_all[index] + offset; +} + +static u32 __iomem *pff_ev_reg(struct switchtec_dev *stdev, + size_t offset, int index) +{ + return (void __iomem *)&stdev->mmio_pff_csr[index] + offset; +} + +#define EV_GLB(i, r)[i] = {offsetof(struct sw_event_regs, r), global_ev_reg} +#define EV_PAR(i, r)[i] = {offsetof(struct part_cfg_regs, r), part_ev_reg} +#define EV_PFF(i, r)[i] = {offsetof(struct pff_csr_regs, r), pff_ev_reg} + +const struct event_reg { + size_t offset; + u32 __iomem *(*map_reg)(struct switchtec_dev *stdev, + size_t offset, int index); +} event_regs[] = { + EV_GLB(SWITCHTEC_IOCTL_EVENT_STACK_ERROR, stack_error_event_hdr), + EV_GLB(SWITCHTEC_IOCTL_EVENT_PPU_ERROR, ppu_error_event_hdr), + EV_GLB(SWITCHTEC_IOCTL_EVENT_ISP_ERROR, isp_error_event_hdr), + EV_GLB(SWITCHTEC_IOCTL_EVENT_SYS_RESET, sys_reset_event_hdr), + EV_GLB(SWITCHTEC_IOCTL_EVENT_FW_EXC, fw_exception_hdr), + EV_GLB(SWITCHTEC_IOCTL_EVENT_FW_NMI, fw_nmi_hdr), + EV_GLB(SWITCHTEC_IOCTL_EVENT_FW_NON_FATAL, fw_non_fatal_hdr), + EV_GLB(SWITCHTEC_IOCTL_EVENT_FW_FATAL, fw_fatal_hdr), + EV_GLB(SWITCHTEC_IOCTL_EVENT_TWI_MRPC_COMP, twi_mrpc_comp_hdr), + EV_GLB(SWITCHTEC_IOCTL_EVENT_TWI_MRPC_COMP_ASYNC, + twi_mrpc_comp_async_hdr), + EV_GLB(SWITCHTEC_IOCTL_EVENT_CLI_MRPC_COMP, cli_mrpc_comp_hdr), + EV_GLB(SWITCHTEC_IOCTL_EVENT_CLI_MRPC_COMP_ASYNC, + cli_mrpc_comp_async_hdr), + EV_GLB(SWITCHTEC_IOCTL_EVENT_GPIO_INT, gpio_interrupt_hdr), + EV_PAR(SWITCHTEC_IOCTL_EVENT_PART_RESET, part_reset_hdr), + EV_PAR(SWITCHTEC_IOCTL_EVENT_MRPC_COMP, mrpc_comp_hdr), + EV_PAR(SWITCHTEC_IOCTL_EVENT_MRPC_COMP_ASYNC, mrpc_comp_async_hdr), + EV_PAR(SWITCHTEC_IOCTL_EVENT_DYN_PART_BIND_COMP, dyn_binding_hdr), + EV_PFF(SWITCHTEC_IOCTL_EVENT_AER_IN_P2P, aer_in_p2p_hdr), + EV_PFF(SWITCHTEC_IOCTL_EVENT_AER_IN_VEP, aer_in_vep_hdr), + EV_PFF(SWITCHTEC_IOCTL_EVENT_DPC, dpc_hdr), + EV_PFF(SWITCHTEC_IOCTL_EVENT_CTS, cts_hdr), + EV_PFF(SWITCHTEC_IOCTL_EVENT_HOTPLUG, hotplug_hdr), + EV_PFF(SWITCHTEC_IOCTL_EVENT_IER, ier_hdr), + EV_PFF(SWITCHTEC_IOCTL_EVENT_THRESH, threshold_hdr), + EV_PFF(SWITCHTEC_IOCTL_EVENT_POWER_MGMT, power_mgmt_hdr), + EV_PFF(SWITCHTEC_IOCTL_EVENT_TLP_THROTTLING, tlp_throttling_hdr), + EV_PFF(SWITCHTEC_IOCTL_EVENT_FORCE_SPEED, force_speed_hdr), + EV_PFF(SWITCHTEC_IOCTL_EVENT_CREDIT_TIMEOUT, credit_timeout_hdr), + EV_PFF(SWITCHTEC_IOCTL_EVENT_LINK_STATE, link_state_hdr), +}; + +static u32 __iomem *event_hdr_addr(struct switchtec_dev *stdev, + int event_id, int index) +{ + size_t off; + + if (event_id < 0 || event_id >= SWITCHTEC_IOCTL_MAX_EVENTS) + return ERR_PTR(-EINVAL); + + off = event_regs[event_id].offset; + + if (event_regs[event_id].map_reg == part_ev_reg) { + if (index == SWITCHTEC_IOCTL_EVENT_LOCAL_PART_IDX) + index = stdev->partition; + else if (index < 0 || index >= stdev->partition_count) + return ERR_PTR(-EINVAL); + } else if (event_regs[event_id].map_reg == pff_ev_reg) { + if (index < 0 || index >= stdev->pff_csr_count) + return ERR_PTR(-EINVAL); + } + + return event_regs[event_id].map_reg(stdev, off, index); +} + +static int event_ctl(struct switchtec_dev *stdev, + struct switchtec_ioctl_event_ctl *ctl) +{ + int i; + u32 __iomem *reg; + u32 hdr; + + reg = event_hdr_addr(stdev, ctl->event_id, ctl->index); + if (IS_ERR(reg)) + return PTR_ERR(reg); + + hdr = ioread32(reg); + for (i = 0; i < ARRAY_SIZE(ctl->data); i++) + ctl->data[i] = ioread32(®[i + 1]); + + ctl->occurred = hdr & SWITCHTEC_EVENT_OCCURRED; + ctl->count = (hdr >> 5) & 0xFF; + + if (!(ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_CLEAR)) + hdr &= ~SWITCHTEC_EVENT_CLEAR; + if (ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_EN_POLL) + hdr |= SWITCHTEC_EVENT_EN_IRQ; + if (ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_DIS_POLL) + hdr &= ~SWITCHTEC_EVENT_EN_IRQ; + if (ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_EN_LOG) + hdr |= SWITCHTEC_EVENT_EN_LOG; + if (ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_DIS_LOG) + hdr &= ~SWITCHTEC_EVENT_EN_LOG; + if (ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_EN_CLI) + hdr |= SWITCHTEC_EVENT_EN_CLI; + if (ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_DIS_CLI) + hdr &= ~SWITCHTEC_EVENT_EN_CLI; + if (ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_EN_FATAL) + hdr |= SWITCHTEC_EVENT_FATAL; + if (ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_DIS_FATAL) + hdr &= ~SWITCHTEC_EVENT_FATAL; + + if (ctl->flags) + iowrite32(hdr, reg); + + ctl->flags = 0; + if (hdr & SWITCHTEC_EVENT_EN_IRQ) + ctl->flags |= SWITCHTEC_IOCTL_EVENT_FLAG_EN_POLL; + if (hdr & SWITCHTEC_EVENT_EN_LOG) + ctl->flags |= SWITCHTEC_IOCTL_EVENT_FLAG_EN_LOG; + if (hdr & SWITCHTEC_EVENT_EN_CLI) + ctl->flags |= SWITCHTEC_IOCTL_EVENT_FLAG_EN_CLI; + if (hdr & SWITCHTEC_EVENT_FATAL) + ctl->flags |= SWITCHTEC_IOCTL_EVENT_FLAG_EN_FATAL; + + return 0; +} + +static int ioctl_event_ctl(struct switchtec_dev *stdev, + struct switchtec_ioctl_event_ctl __user *uctl) +{ + int ret; + int nr_idxs; + struct switchtec_ioctl_event_ctl ctl; + + if (copy_from_user(&ctl, uctl, sizeof(ctl))) + return -EFAULT; + + if (ctl.event_id >= SWITCHTEC_IOCTL_MAX_EVENTS) + return -EINVAL; + + if (ctl.flags & SWITCHTEC_IOCTL_EVENT_FLAG_UNUSED) + return -EINVAL; + + if (ctl.index == SWITCHTEC_IOCTL_EVENT_IDX_ALL) { + if (event_regs[ctl.event_id].map_reg == global_ev_reg) + nr_idxs = 1; + else if (event_regs[ctl.event_id].map_reg == part_ev_reg) + nr_idxs = stdev->partition_count; + else if (event_regs[ctl.event_id].map_reg == pff_ev_reg) + nr_idxs = stdev->pff_csr_count; + else + return -EINVAL; + + for (ctl.index = 0; ctl.index < nr_idxs; ctl.index++) { + ret = event_ctl(stdev, &ctl); + if (ret < 0) + return ret; + } + } else { + ret = event_ctl(stdev, &ctl); + if (ret < 0) + return ret; + } + + if (copy_to_user(uctl, &ctl, sizeof(ctl))) + return -EFAULT; + + return 0; +} + +static int ioctl_pff_to_port(struct switchtec_dev *stdev, + struct switchtec_ioctl_pff_port *up) +{ + int i, part; + u32 reg; + struct part_cfg_regs *pcfg; + struct switchtec_ioctl_pff_port p; + + if (copy_from_user(&p, up, sizeof(p))) + return -EFAULT; + + p.port = -1; + for (part = 0; part < stdev->partition_count; part++) { + pcfg = &stdev->mmio_part_cfg_all[part]; + p.partition = part; + + reg = ioread32(&pcfg->usp_pff_inst_id); + if (reg == p.pff) { + p.port = 0; + break; + } + + reg = ioread32(&pcfg->vep_pff_inst_id); + if (reg == p.pff) { + p.port = SWITCHTEC_IOCTL_PFF_VEP; + break; + } + + for (i = 0; i < ARRAY_SIZE(pcfg->dsp_pff_inst_id); i++) { + reg = ioread32(&pcfg->dsp_pff_inst_id[i]); + if (reg != p.pff) + continue; + + p.port = i + 1; + break; + } + + if (p.port != -1) + break; + } + + if (copy_to_user(up, &p, sizeof(p))) + return -EFAULT; + + return 0; +} + +static int ioctl_port_to_pff(struct switchtec_dev *stdev, + struct switchtec_ioctl_pff_port *up) +{ + struct switchtec_ioctl_pff_port p; + struct part_cfg_regs *pcfg; + + if (copy_from_user(&p, up, sizeof(p))) + return -EFAULT; + + if (p.partition == SWITCHTEC_IOCTL_EVENT_LOCAL_PART_IDX) + pcfg = stdev->mmio_part_cfg; + else if (p.partition < stdev->partition_count) + pcfg = &stdev->mmio_part_cfg_all[p.partition]; + else + return -EINVAL; + + switch (p.port) { + case 0: + p.pff = ioread32(&pcfg->usp_pff_inst_id); + break; + case SWITCHTEC_IOCTL_PFF_VEP: + p.pff = ioread32(&pcfg->vep_pff_inst_id); + break; + default: + if (p.port > ARRAY_SIZE(pcfg->dsp_pff_inst_id)) + return -EINVAL; + p.pff = ioread32(&pcfg->dsp_pff_inst_id[p.port - 1]); + break; + } + + if (copy_to_user(up, &p, sizeof(p))) + return -EFAULT; + + return 0; +} + +static long switchtec_dev_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct switchtec_user *stuser = filp->private_data; + struct switchtec_dev *stdev = stuser->stdev; + int rc; + void __user *argp = (void __user *)arg; + + rc = lock_mutex_and_test_alive(stdev); + if (rc) + return rc; + + switch (cmd) { + case SWITCHTEC_IOCTL_FLASH_INFO: + rc = ioctl_flash_info(stdev, argp); + break; + case SWITCHTEC_IOCTL_FLASH_PART_INFO: + rc = ioctl_flash_part_info(stdev, argp); + break; + case SWITCHTEC_IOCTL_EVENT_SUMMARY: + rc = ioctl_event_summary(stdev, stuser, argp); + break; + case SWITCHTEC_IOCTL_EVENT_CTL: + rc = ioctl_event_ctl(stdev, argp); + break; + case SWITCHTEC_IOCTL_PFF_TO_PORT: + rc = ioctl_pff_to_port(stdev, argp); + break; + case SWITCHTEC_IOCTL_PORT_TO_PFF: + rc = ioctl_port_to_pff(stdev, argp); + break; + default: + rc = -ENOTTY; + break; + } + + mutex_unlock(&stdev->mrpc_mutex); + return rc; +} + static const struct file_operations switchtec_fops = { .owner = THIS_MODULE, .open = switchtec_dev_open, @@ -785,6 +1212,8 @@ static const struct file_operations switchtec_fops = { .write = switchtec_dev_write, .read = switchtec_dev_read, .poll = switchtec_dev_poll, + .unlocked_ioctl = switchtec_dev_ioctl, + .compat_ioctl = switchtec_dev_ioctl, }; static void stdev_release(struct device *dev) @@ -871,11 +1300,52 @@ err_put: return ERR_PTR(rc); } +static int mask_event(struct switchtec_dev *stdev, int eid, int idx) +{ + size_t off = event_regs[eid].offset; + u32 __iomem *hdr_reg; + u32 hdr; + + hdr_reg = event_regs[eid].map_reg(stdev, off, idx); + hdr = ioread32(hdr_reg); + + if (!(hdr & SWITCHTEC_EVENT_OCCURRED && hdr & SWITCHTEC_EVENT_EN_IRQ)) + return 0; + + dev_dbg(&stdev->dev, "%s: %d %d %x\n", __func__, eid, idx, hdr); + hdr &= ~(SWITCHTEC_EVENT_EN_IRQ | SWITCHTEC_EVENT_OCCURRED); + iowrite32(hdr, hdr_reg); + + return 1; +} + +static int mask_all_events(struct switchtec_dev *stdev, int eid) +{ + int idx; + int count = 0; + + if (event_regs[eid].map_reg == part_ev_reg) { + for (idx = 0; idx < stdev->partition_count; idx++) + count += mask_event(stdev, eid, idx); + } else if (event_regs[eid].map_reg == pff_ev_reg) { + for (idx = 0; idx < stdev->pff_csr_count; idx++) { + if (!stdev->pff_local[idx]) + continue; + count += mask_event(stdev, eid, idx); + } + } else { + count += mask_event(stdev, eid, 0); + } + + return count; +} + static irqreturn_t switchtec_event_isr(int irq, void *dev) { struct switchtec_dev *stdev = dev; u32 reg; irqreturn_t ret = IRQ_NONE; + int eid, event_count = 0; reg = ioread32(&stdev->mmio_part_cfg->mrpc_comp_hdr); if (reg & SWITCHTEC_EVENT_OCCURRED) { @@ -885,6 +1355,17 @@ static irqreturn_t switchtec_event_isr(int irq, void *dev) iowrite32(reg, &stdev->mmio_part_cfg->mrpc_comp_hdr); } + for (eid = 0; eid < SWITCHTEC_IOCTL_MAX_EVENTS; eid++) + event_count += mask_all_events(stdev, eid); + + if (event_count) { + atomic_inc(&stdev->event_cnt); + wake_up_interruptible(&stdev->event_wq); + dev_dbg(&stdev->dev, "%s: %d events\n", __func__, + event_count); + return IRQ_HANDLED; + } + return ret; } diff --git a/include/uapi/linux/switchtec_ioctl.h b/include/uapi/linux/switchtec_ioctl.h new file mode 100644 index 000000000000..3e824e1a6495 --- /dev/null +++ b/include/uapi/linux/switchtec_ioctl.h @@ -0,0 +1,132 @@ +/* + * Microsemi Switchtec PCIe Driver + * Copyright (c) 2017, Microsemi Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef _UAPI_LINUX_SWITCHTEC_IOCTL_H +#define _UAPI_LINUX_SWITCHTEC_IOCTL_H + +#include + +#define SWITCHTEC_IOCTL_PART_CFG0 0 +#define SWITCHTEC_IOCTL_PART_CFG1 1 +#define SWITCHTEC_IOCTL_PART_IMG0 2 +#define SWITCHTEC_IOCTL_PART_IMG1 3 +#define SWITCHTEC_IOCTL_PART_NVLOG 4 +#define SWITCHTEC_IOCTL_PART_VENDOR0 5 +#define SWITCHTEC_IOCTL_PART_VENDOR1 6 +#define SWITCHTEC_IOCTL_PART_VENDOR2 7 +#define SWITCHTEC_IOCTL_PART_VENDOR3 8 +#define SWITCHTEC_IOCTL_PART_VENDOR4 9 +#define SWITCHTEC_IOCTL_PART_VENDOR5 10 +#define SWITCHTEC_IOCTL_PART_VENDOR6 11 +#define SWITCHTEC_IOCTL_PART_VENDOR7 12 +#define SWITCHTEC_IOCTL_NUM_PARTITIONS 13 + +struct switchtec_ioctl_flash_info { + __u64 flash_length; + __u32 num_partitions; + __u32 padding; +}; + +struct switchtec_ioctl_flash_part_info { + __u32 flash_partition; + __u32 address; + __u32 length; + __u32 active; +}; + +struct switchtec_ioctl_event_summary { + __u64 global; + __u64 part_bitmap; + __u32 local_part; + __u32 padding; + __u32 part[48]; + __u32 pff[48]; +}; + +#define SWITCHTEC_IOCTL_EVENT_STACK_ERROR 0 +#define SWITCHTEC_IOCTL_EVENT_PPU_ERROR 1 +#define SWITCHTEC_IOCTL_EVENT_ISP_ERROR 2 +#define SWITCHTEC_IOCTL_EVENT_SYS_RESET 3 +#define SWITCHTEC_IOCTL_EVENT_FW_EXC 4 +#define SWITCHTEC_IOCTL_EVENT_FW_NMI 5 +#define SWITCHTEC_IOCTL_EVENT_FW_NON_FATAL 6 +#define SWITCHTEC_IOCTL_EVENT_FW_FATAL 7 +#define SWITCHTEC_IOCTL_EVENT_TWI_MRPC_COMP 8 +#define SWITCHTEC_IOCTL_EVENT_TWI_MRPC_COMP_ASYNC 9 +#define SWITCHTEC_IOCTL_EVENT_CLI_MRPC_COMP 10 +#define SWITCHTEC_IOCTL_EVENT_CLI_MRPC_COMP_ASYNC 11 +#define SWITCHTEC_IOCTL_EVENT_GPIO_INT 12 +#define SWITCHTEC_IOCTL_EVENT_PART_RESET 13 +#define SWITCHTEC_IOCTL_EVENT_MRPC_COMP 14 +#define SWITCHTEC_IOCTL_EVENT_MRPC_COMP_ASYNC 15 +#define SWITCHTEC_IOCTL_EVENT_DYN_PART_BIND_COMP 16 +#define SWITCHTEC_IOCTL_EVENT_AER_IN_P2P 17 +#define SWITCHTEC_IOCTL_EVENT_AER_IN_VEP 18 +#define SWITCHTEC_IOCTL_EVENT_DPC 19 +#define SWITCHTEC_IOCTL_EVENT_CTS 20 +#define SWITCHTEC_IOCTL_EVENT_HOTPLUG 21 +#define SWITCHTEC_IOCTL_EVENT_IER 22 +#define SWITCHTEC_IOCTL_EVENT_THRESH 23 +#define SWITCHTEC_IOCTL_EVENT_POWER_MGMT 24 +#define SWITCHTEC_IOCTL_EVENT_TLP_THROTTLING 25 +#define SWITCHTEC_IOCTL_EVENT_FORCE_SPEED 26 +#define SWITCHTEC_IOCTL_EVENT_CREDIT_TIMEOUT 27 +#define SWITCHTEC_IOCTL_EVENT_LINK_STATE 28 +#define SWITCHTEC_IOCTL_MAX_EVENTS 29 + +#define SWITCHTEC_IOCTL_EVENT_LOCAL_PART_IDX -1 +#define SWITCHTEC_IOCTL_EVENT_IDX_ALL -2 + +#define SWITCHTEC_IOCTL_EVENT_FLAG_CLEAR (1 << 0) +#define SWITCHTEC_IOCTL_EVENT_FLAG_EN_POLL (1 << 1) +#define SWITCHTEC_IOCTL_EVENT_FLAG_EN_LOG (1 << 2) +#define SWITCHTEC_IOCTL_EVENT_FLAG_EN_CLI (1 << 3) +#define SWITCHTEC_IOCTL_EVENT_FLAG_EN_FATAL (1 << 4) +#define SWITCHTEC_IOCTL_EVENT_FLAG_DIS_POLL (1 << 5) +#define SWITCHTEC_IOCTL_EVENT_FLAG_DIS_LOG (1 << 6) +#define SWITCHTEC_IOCTL_EVENT_FLAG_DIS_CLI (1 << 7) +#define SWITCHTEC_IOCTL_EVENT_FLAG_DIS_FATAL (1 << 8) +#define SWITCHTEC_IOCTL_EVENT_FLAG_UNUSED (~0x1ff) + +struct switchtec_ioctl_event_ctl { + __u32 event_id; + __s32 index; + __u32 flags; + __u32 occurred; + __u32 count; + __u32 data[5]; +}; + +#define SWITCHTEC_IOCTL_PFF_VEP 100 +struct switchtec_ioctl_pff_port { + __u32 pff; + __u32 partition; + __u32 port; +}; + +#define SWITCHTEC_IOCTL_FLASH_INFO \ + _IOR('W', 0x40, struct switchtec_ioctl_flash_info) +#define SWITCHTEC_IOCTL_FLASH_PART_INFO \ + _IOWR('W', 0x41, struct switchtec_ioctl_flash_part_info) +#define SWITCHTEC_IOCTL_EVENT_SUMMARY \ + _IOR('W', 0x42, struct switchtec_ioctl_event_summary) +#define SWITCHTEC_IOCTL_EVENT_CTL \ + _IOWR('W', 0x43, struct switchtec_ioctl_event_ctl) +#define SWITCHTEC_IOCTL_PFF_TO_PORT \ + _IOWR('W', 0x44, struct switchtec_ioctl_pff_port) +#define SWITCHTEC_IOCTL_PORT_TO_PFF \ + _IOWR('W', 0x45, struct switchtec_ioctl_pff_port) + +#endif -- cgit v1.2.3-71-gd317 From 2d4bc93368f5a0ddb57c8c885cdad9c9b7a10ed5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Apr 2017 14:34:04 +0200 Subject: netlink: extended ACK reporting Add the base infrastructure and UAPI for netlink extended ACK reporting. All "manual" calls to netlink_ack() pass NULL for now and thus don't get extended ACK reporting. Big thanks goes to Pablo Neira Ayuso for not only bringing up the whole topic at netconf (again) but also coming up with the nlattr passing trick and various other ideas. Signed-off-by: Johannes Berg Reviewed-by: David Ahern Signed-off-by: David S. Miller --- crypto/crypto_user.c | 3 +- drivers/infiniband/core/netlink.c | 5 +-- drivers/scsi/scsi_netlink.c | 2 +- include/linux/netlink.h | 26 +++++++++++++- include/net/netlink.h | 3 +- include/uapi/linux/netlink.h | 32 ++++++++++++++++++ kernel/audit.c | 2 +- net/core/rtnetlink.c | 3 +- net/core/sock_diag.c | 3 +- net/decnet/netfilter/dn_rtmsg.c | 2 +- net/hsr/hsr_netlink.c | 4 +-- net/netfilter/ipset/ip_set_core.c | 2 +- net/netfilter/nfnetlink.c | 22 ++++++------ net/netlink/af_netlink.c | 71 ++++++++++++++++++++++++++++++++++----- net/netlink/af_netlink.h | 1 + net/netlink/genetlink.c | 3 +- net/xfrm/xfrm_user.c | 3 +- 17 files changed, 153 insertions(+), 34 deletions(-) (limited to 'include/uapi') diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index a90404a0c5ff..4a44830741c1 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -483,7 +483,8 @@ static const struct crypto_link { [CRYPTO_MSG_DELRNG - CRYPTO_MSG_BASE] = { .doit = crypto_del_rng }, }; -static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct nlattr *attrs[CRYPTOCFGA_MAX+1]; const struct crypto_link *link; diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 10469b0088b5..b784055423c8 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -146,7 +146,8 @@ nla_put_failure: } EXPORT_SYMBOL(ibnl_put_attr); -static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct ibnl_client *client; int type = nlh->nlmsg_type; @@ -209,7 +210,7 @@ static void ibnl_rcv_reply_skb(struct sk_buff *skb) if (nlh->nlmsg_flags & NLM_F_REQUEST) return; - ibnl_rcv_msg(skb, nlh); + ibnl_rcv_msg(skb, nlh, NULL); msglen = NLMSG_ALIGN(nlh->nlmsg_len); if (msglen > skb->len) diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c index 109802f776ed..50e624fb8307 100644 --- a/drivers/scsi/scsi_netlink.c +++ b/drivers/scsi/scsi_netlink.c @@ -111,7 +111,7 @@ scsi_nl_rcv_msg(struct sk_buff *skb) next_msg: if ((err) || (nlh->nlmsg_flags & NLM_F_ACK)) - netlink_ack(skb, nlh, err); + netlink_ack(skb, nlh, err, NULL); skb_pull(skb, rlen); } diff --git a/include/linux/netlink.h b/include/linux/netlink.h index da14ab61f363..60e7137f840d 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -62,11 +62,35 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) return __netlink_kernel_create(net, unit, THIS_MODULE, cfg); } +/** + * struct netlink_ext_ack - netlink extended ACK report struct + * @_msg: message string to report - don't access directly, use + * %NL_SET_ERR_MSG + * @bad_attr: attribute with error + */ +struct netlink_ext_ack { + const char *_msg; + const struct nlattr *bad_attr; +}; + +/* Always use this macro, this allows later putting the + * message into a separate section or such for things + * like translation or listing all possible messages. + * Currently string formatting is not supported (due + * to the lack of an output buffer.) + */ +#define NL_SET_ERR_MSG(extack, msg) do { \ + static const char _msg[] = (msg); \ + \ + (extack)->_msg = _msg; \ +} while (0) + extern void netlink_kernel_release(struct sock *sk); extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups); extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group); -extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); +extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, + const struct netlink_ext_ack *extack); extern int netlink_has_listeners(struct sock *sk, unsigned int group); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); diff --git a/include/net/netlink.h b/include/net/netlink.h index b239fcd33d80..a064ec3e2ee1 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -233,7 +233,8 @@ struct nl_info { }; int netlink_rcv_skb(struct sk_buff *skb, - int (*cb)(struct sk_buff *, struct nlmsghdr *)); + int (*cb)(struct sk_buff *, struct nlmsghdr *, + struct netlink_ext_ack *)); int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, unsigned int group, int report, gfp_t flags); diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index b2c9c26ea30f..7df88770e029 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -69,6 +69,10 @@ struct nlmsghdr { #define NLM_F_CREATE 0x400 /* Create, if it does not exist */ #define NLM_F_APPEND 0x800 /* Add to end of list */ +/* Flags for ACK message */ +#define NLM_F_CAPPED 0x100 /* request was capped */ +#define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */ + /* 4.4BSD ADD NLM_F_CREATE|NLM_F_EXCL 4.4BSD CHANGE NLM_F_REPLACE @@ -101,6 +105,33 @@ struct nlmsghdr { struct nlmsgerr { int error; struct nlmsghdr msg; + /* + * followed by the message contents unless NETLINK_CAP_ACK was set + * or the ACK indicates success (error == 0) + * message length is aligned with NLMSG_ALIGN() + */ + /* + * followed by TLVs defined in enum nlmsgerr_attrs + * if NETLINK_EXT_ACK was set + */ +}; + +/** + * enum nlmsgerr_attrs - nlmsgerr attributes + * @NLMSGERR_ATTR_UNUSED: unused + * @NLMSGERR_ATTR_MSG: error message string (string) + * @NLMSGERR_ATTR_OFFS: offset of the invalid attribute in the original + * message, counting from the beginning of the header (u32) + * @__NLMSGERR_ATTR_MAX: number of attributes + * @NLMSGERR_ATTR_MAX: highest attribute number + */ +enum nlmsgerr_attrs { + NLMSGERR_ATTR_UNUSED, + NLMSGERR_ATTR_MSG, + NLMSGERR_ATTR_OFFS, + + __NLMSGERR_ATTR_MAX, + NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1 }; #define NETLINK_ADD_MEMBERSHIP 1 @@ -115,6 +146,7 @@ struct nlmsgerr { #define NETLINK_LISTEN_ALL_NSID 8 #define NETLINK_LIST_MEMBERSHIPS 9 #define NETLINK_CAP_ACK 10 +#define NETLINK_EXT_ACK 11 struct nl_pktinfo { __u32 group; diff --git a/kernel/audit.c b/kernel/audit.c index 2f4964cfde0b..d54bf5932374 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1402,7 +1402,7 @@ static void audit_receive_skb(struct sk_buff *skb) err = audit_receive_msg(skb, nlh); /* if err or if this message says it wants a response */ if (err || (nlh->nlmsg_flags & NLM_F_ACK)) - netlink_ack(skb, nlh, err); + netlink_ack(skb, nlh, err, NULL); nlh = nlmsg_next(nlh, &len); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c138b6b75e59..3cc4a627a537 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4046,7 +4046,8 @@ out: /* Process one rtnetlink message. */ -static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); rtnl_doit_func doit; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index fb9d0e2fd148..217f4e3b82f6 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -238,7 +238,8 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } -static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { int ret; diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 85f2fdc360c2..c8bf5136a72b 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -96,7 +96,7 @@ static unsigned int dnrmg_hook(void *priv, } -#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) +#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err), NULL); return; } while (0) static inline void dnrmg_receive_user_skb(struct sk_buff *skb) { diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 1ab30e7d3f99..81dac16933fc 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -350,7 +350,7 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) return 0; invalid: - netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL); + netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL); return 0; nla_put_failure: @@ -432,7 +432,7 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) return 0; invalid: - netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL); + netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL); return 0; nla_put_failure: diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index c296f9b606d4..26356bf8cebf 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1305,7 +1305,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) * manually :-( */ if (nlh->nlmsg_flags & NLM_F_ACK) - netlink_ack(cb->skb, nlh, ret); + netlink_ack(cb->skb, nlh, ret, NULL); return ret; } } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 68eda920160e..181d3bb800e6 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -148,7 +148,8 @@ int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, EXPORT_SYMBOL_GPL(nfnetlink_unicast); /* Process one complete nfnetlink message. */ -static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); const struct nfnl_callback *nc; @@ -261,7 +262,7 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) struct nfnl_err *nfnl_err, *next; list_for_each_entry_safe(nfnl_err, next, err_list, head) { - netlink_ack(skb, nfnl_err->nlh, nfnl_err->err); + netlink_ack(skb, nfnl_err->nlh, nfnl_err->err, NULL); nfnl_err_del(nfnl_err); } } @@ -284,13 +285,13 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, int err; if (subsys_id >= NFNL_SUBSYS_COUNT) - return netlink_ack(skb, nlh, -EINVAL); + return netlink_ack(skb, nlh, -EINVAL, NULL); replay: status = 0; skb = netlink_skb_clone(oskb, GFP_KERNEL); if (!skb) - return netlink_ack(oskb, nlh, -ENOMEM); + return netlink_ack(oskb, nlh, -ENOMEM, NULL); nfnl_lock(subsys_id); ss = nfnl_dereference_protected(subsys_id); @@ -304,20 +305,20 @@ replay: #endif { nfnl_unlock(subsys_id); - netlink_ack(oskb, nlh, -EOPNOTSUPP); + netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); return kfree_skb(skb); } } if (!ss->commit || !ss->abort) { nfnl_unlock(subsys_id); - netlink_ack(oskb, nlh, -EOPNOTSUPP); + netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); return kfree_skb(skb); } if (genid && ss->valid_genid && !ss->valid_genid(net, genid)) { nfnl_unlock(subsys_id); - netlink_ack(oskb, nlh, -ERESTART); + netlink_ack(oskb, nlh, -ERESTART, NULL); return kfree_skb(skb); } @@ -407,7 +408,8 @@ ack: * pointing to the batch header. */ nfnl_err_reset(&err_list); - netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM); + netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM, + NULL); status |= NFNL_BATCH_FAILURE; goto done; } @@ -467,7 +469,7 @@ static void nfnetlink_rcv_skb_batch(struct sk_buff *skb, struct nlmsghdr *nlh) err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy); if (err < 0) { - netlink_ack(skb, nlh, err); + netlink_ack(skb, nlh, err, NULL); return; } if (cda[NFNL_BATCH_GENID]) @@ -493,7 +495,7 @@ static void nfnetlink_rcv(struct sk_buff *skb) return; if (!netlink_net_capable(skb, CAP_NET_ADMIN)) { - netlink_ack(skb, nlh, -EPERM); + netlink_ack(skb, nlh, -EPERM, NULL); return; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index fc232441cf23..c1564768000e 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1652,6 +1652,13 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, nlk->flags &= ~NETLINK_F_CAP_ACK; err = 0; break; + case NETLINK_EXT_ACK: + if (val) + nlk->flags |= NETLINK_F_EXT_ACK; + else + nlk->flags &= ~NETLINK_F_EXT_ACK; + err = 0; + break; default: err = -ENOPROTOOPT; } @@ -1736,6 +1743,15 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, return -EFAULT; err = 0; break; + case NETLINK_EXT_ACK: + if (len < sizeof(int)) + return -EINVAL; + len = sizeof(int); + val = nlk->flags & NETLINK_F_EXT_ACK ? 1 : 0; + if (put_user(len, optlen) || put_user(val, optval)) + return -EFAULT; + err = 0; + break; default: err = -ENOPROTOOPT; } @@ -2267,21 +2283,40 @@ error_free: } EXPORT_SYMBOL(__netlink_dump_start); -void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) +void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, + const struct netlink_ext_ack *extack) { struct sk_buff *skb; struct nlmsghdr *rep; struct nlmsgerr *errmsg; size_t payload = sizeof(*errmsg); + size_t tlvlen = 0; struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk); + unsigned int flags = 0; /* Error messages get the original request appened, unless the user - * requests to cap the error message. + * requests to cap the error message, and get extra error data if + * requested. */ - if (!(nlk->flags & NETLINK_F_CAP_ACK) && err) - payload += nlmsg_len(nlh); + if (err) { + if (!(nlk->flags & NETLINK_F_CAP_ACK)) + payload += nlmsg_len(nlh); + else + flags |= NLM_F_CAPPED; + if (nlk->flags & NETLINK_F_EXT_ACK && extack) { + if (extack->_msg) + tlvlen += nla_total_size(strlen(extack->_msg) + 1); + if (extack->bad_attr) + tlvlen += nla_total_size(sizeof(u32)); + } + } else { + flags |= NLM_F_CAPPED; + } - skb = nlmsg_new(payload, GFP_KERNEL); + if (tlvlen) + flags |= NLM_F_ACK_TLVS; + + skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); if (!skb) { struct sock *sk; @@ -2297,17 +2332,35 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) } rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, - NLMSG_ERROR, payload, 0); + NLMSG_ERROR, payload, flags); errmsg = nlmsg_data(rep); errmsg->error = err; memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh)); + + if (err && nlk->flags & NETLINK_F_EXT_ACK && extack) { + if (extack->_msg) + WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, + extack->_msg)); + if (extack->bad_attr && + !WARN_ON((u8 *)extack->bad_attr < in_skb->data || + (u8 *)extack->bad_attr >= in_skb->data + + in_skb->len)) + WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, + (u8 *)extack->bad_attr - + in_skb->data)); + } + + nlmsg_end(skb, rep); + netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); } EXPORT_SYMBOL(netlink_ack); int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, - struct nlmsghdr *)) + struct nlmsghdr *, + struct netlink_ext_ack *)) { + struct netlink_ext_ack extack = {}; struct nlmsghdr *nlh; int err; @@ -2328,13 +2381,13 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, if (nlh->nlmsg_type < NLMSG_MIN_TYPE) goto ack; - err = cb(skb, nlh); + err = cb(skb, nlh, &extack); if (err == -EINTR) goto skip; ack: if (nlh->nlmsg_flags & NLM_F_ACK || err) - netlink_ack(skb, nlh, err); + netlink_ack(skb, nlh, err, &extack); skip: msglen = NLMSG_ALIGN(nlh->nlmsg_len); diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index f792f8d7f982..3490f2430532 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -13,6 +13,7 @@ #define NETLINK_F_RECV_NO_ENOBUFS 0x8 #define NETLINK_F_LISTEN_ALL_NSID 0x10 #define NETLINK_F_CAP_ACK 0x20 +#define NETLINK_F_EXT_ACK 0x40 #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) #define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 92e0981f7404..57b2e3648bc0 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -605,7 +605,8 @@ out: return err; } -static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { const struct genl_family *family; int err; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4f7e62ddc17e..e93d5c0471b2 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2448,7 +2448,8 @@ static const struct xfrm_link { [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo }, }; -static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *attrs[XFRMA_MAX+1]; -- cgit v1.2.3-71-gd317 From ba0dc5f6e0ba5a5d2f575bcdb35e5d1960cf7c04 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Apr 2017 14:34:06 +0200 Subject: netlink: allow sending extended ACK with cookie on success Now that we have extended error reporting and a new message format for netlink ACK messages, also extend this to be able to return arbitrary cookie data on success. This will allow, for example, nl80211 to not send an extra message for cookies identifying newly created objects, but return those directly in the ACK message. The cookie data size is currently limited to 20 bytes (since Jamal talked about using SHA1 for identifiers.) Thanks to Jamal Hadi Salim for bringing up this idea during the discussions. Signed-off-by: Johannes Berg Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netlink.h | 7 +++++++ include/uapi/linux/netlink.h | 4 ++++ net/netlink/af_netlink.c | 33 ++++++++++++++++++++++----------- 3 files changed, 33 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 60e7137f840d..8d2a8924705c 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -62,15 +62,22 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) return __netlink_kernel_create(net, unit, THIS_MODULE, cfg); } +/* this can be increased when necessary - don't expose to userland */ +#define NETLINK_MAX_COOKIE_LEN 20 + /** * struct netlink_ext_ack - netlink extended ACK report struct * @_msg: message string to report - don't access directly, use * %NL_SET_ERR_MSG * @bad_attr: attribute with error + * @cookie: cookie data to return to userspace (for success) + * @cookie_len: actual cookie data length */ struct netlink_ext_ack { const char *_msg; const struct nlattr *bad_attr; + u8 cookie[NETLINK_MAX_COOKIE_LEN]; + u8 cookie_len; }; /* Always use this macro, this allows later putting the diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 7df88770e029..f86127a46cfc 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -122,6 +122,9 @@ struct nlmsgerr { * @NLMSGERR_ATTR_MSG: error message string (string) * @NLMSGERR_ATTR_OFFS: offset of the invalid attribute in the original * message, counting from the beginning of the header (u32) + * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to + * be used - in the success case - to identify a created + * object or operation or similar (binary) * @__NLMSGERR_ATTR_MAX: number of attributes * @NLMSGERR_ATTR_MAX: highest attribute number */ @@ -129,6 +132,7 @@ enum nlmsgerr_attrs { NLMSGERR_ATTR_UNUSED, NLMSGERR_ATTR_MSG, NLMSGERR_ATTR_OFFS, + NLMSGERR_ATTR_COOKIE, __NLMSGERR_ATTR_MAX, NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1 diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c1564768000e..ee841f00a6ec 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2311,6 +2311,10 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, } } else { flags |= NLM_F_CAPPED; + + if (nlk->flags & NETLINK_F_EXT_ACK && + extack && extack->cookie_len) + tlvlen += nla_total_size(extack->cookie_len); } if (tlvlen) @@ -2337,17 +2341,24 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, errmsg->error = err; memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh)); - if (err && nlk->flags & NETLINK_F_EXT_ACK && extack) { - if (extack->_msg) - WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, - extack->_msg)); - if (extack->bad_attr && - !WARN_ON((u8 *)extack->bad_attr < in_skb->data || - (u8 *)extack->bad_attr >= in_skb->data + - in_skb->len)) - WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, - (u8 *)extack->bad_attr - - in_skb->data)); + if (nlk->flags & NETLINK_F_EXT_ACK && extack) { + if (err) { + if (extack->_msg) + WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, + extack->_msg)); + if (extack->bad_attr && + !WARN_ON((u8 *)extack->bad_attr < in_skb->data || + (u8 *)extack->bad_attr >= in_skb->data + + in_skb->len)) + WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, + (u8 *)extack->bad_attr - + in_skb->data)); + } else { + if (extack->cookie_len) + WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, + extack->cookie_len, + extack->cookie)); + } } nlmsg_end(skb, rep); -- cgit v1.2.3-71-gd317 From d77e38e612a017480157fe6d2c1422f42cb5b7e3 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 14 Apr 2017 10:06:10 +0200 Subject: xfrm: Add an IPsec hardware offloading API This patch adds all the bits that are needed to do IPsec hardware offload for IPsec states and ESP packets. We add xfrmdev_ops to the net_device. xfrmdev_ops has function pointers that are needed to manage the xfrm states in the hardware and to do a per packet offloading decision. Joint work with: Ilan Tayari Guy Shapiro Yossi Kuperman Signed-off-by: Guy Shapiro Signed-off-by: Ilan Tayari Signed-off-by: Yossi Kuperman Signed-off-by: Steffen Klassert --- include/linux/netdevice.h | 14 +++++ include/net/xfrm.h | 65 +++++++++++++++++++++- include/uapi/linux/xfrm.h | 8 +++ net/ipv4/esp4.c | 7 +-- net/ipv4/xfrm4_output.c | 3 +- net/ipv6/esp6.c | 4 +- net/ipv6/xfrm6_output.c | 9 ++- net/xfrm/Makefile | 3 +- net/xfrm/xfrm_device.c | 138 +++++++++++++++++++++++++++++++++++++++++++++- net/xfrm/xfrm_input.c | 41 +++++++++++++- net/xfrm/xfrm_output.c | 44 +++++++++++++-- net/xfrm/xfrm_policy.c | 10 ++-- net/xfrm/xfrm_state.c | 74 +++++++++++++++++++++++++ net/xfrm/xfrm_user.c | 28 ++++++++++ 14 files changed, 424 insertions(+), 24 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5bb03d181848..b3eb83db0223 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -824,6 +824,16 @@ struct netdev_xdp { }; }; +#ifdef CONFIG_XFRM_OFFLOAD +struct xfrmdev_ops { + int (*xdo_dev_state_add) (struct xfrm_state *x); + void (*xdo_dev_state_delete) (struct xfrm_state *x); + void (*xdo_dev_state_free) (struct xfrm_state *x); + bool (*xdo_dev_offload_ok) (struct sk_buff *skb, + struct xfrm_state *x); +}; +#endif + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1697,6 +1707,10 @@ struct net_device { const struct ndisc_ops *ndisc_ops; #endif +#ifdef CONFIG_XFRM + const struct xfrmdev_ops *xfrmdev_ops; +#endif + const struct header_ops *header_ops; unsigned int flags; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 54515d989365..17603bf190c1 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -120,6 +120,13 @@ struct xfrm_state_walk { struct xfrm_address_filter *filter; }; +struct xfrm_state_offload { + struct net_device *dev; + unsigned long offload_handle; + unsigned int num_exthdrs; + u8 flags; +}; + /* Full description of state of transformer. */ struct xfrm_state { possible_net_t xs_net; @@ -207,6 +214,8 @@ struct xfrm_state { struct xfrm_lifetime_cur curlft; struct tasklet_hrtimer mtimer; + struct xfrm_state_offload xso; + /* used to fix curlft->add_time when changing date */ long saved_tmo; @@ -1453,7 +1462,6 @@ struct xfrm6_tunnel { void xfrm_init(void); void xfrm4_init(void); int xfrm_state_init(struct net *net); -void xfrm_dev_init(void); void xfrm_state_fini(struct net *net); void xfrm4_state_init(void); void xfrm4_protocol_init(void); @@ -1559,6 +1567,7 @@ struct xfrmk_spdinfo { struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); int xfrm_state_delete(struct xfrm_state *x); int xfrm_state_flush(struct net *net, u8 proto, bool task_valid); +int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid); void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); @@ -1641,6 +1650,11 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) } #endif +struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, + const xfrm_address_t *saddr, + const xfrm_address_t *daddr, + int family); + struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp); void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type); @@ -1846,6 +1860,55 @@ static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) } #endif +#ifdef CONFIG_XFRM_OFFLOAD +void __net_init xfrm_dev_init(void); +int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, + struct xfrm_user_offload *xuo); +bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); + +static inline void xfrm_dev_state_delete(struct xfrm_state *x) +{ + struct xfrm_state_offload *xso = &x->xso; + + if (xso->dev) + xso->dev->xfrmdev_ops->xdo_dev_state_delete(x); +} + +static inline void xfrm_dev_state_free(struct xfrm_state *x) +{ + struct xfrm_state_offload *xso = &x->xso; + struct net_device *dev = xso->dev; + + if (dev && dev->xfrmdev_ops) { + dev->xfrmdev_ops->xdo_dev_state_free(x); + xso->dev = NULL; + dev_put(dev); + } +} +#else +static inline void __net_init xfrm_dev_init(void) +{ +} + +static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo) +{ + return 0; +} + +static inline void xfrm_dev_state_delete(struct xfrm_state *x) +{ +} + +static inline void xfrm_dev_state_free(struct xfrm_state *x) +{ +} + +static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) +{ + return false; +} +#endif + static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m) { if (attrs[XFRMA_MARK]) diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 1fc62b239f1b..2b384ff09fa0 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -303,6 +303,7 @@ enum xfrm_attr_type_t { XFRMA_PROTO, /* __u8 */ XFRMA_ADDRESS_FILTER, /* struct xfrm_address_filter */ XFRMA_PAD, + XFRMA_OFFLOAD_DEV, /* struct xfrm_state_offload */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) @@ -494,6 +495,13 @@ struct xfrm_address_filter { __u8 dplen; }; +struct xfrm_user_offload { + int ifindex; + __u8 flags; +}; +#define XFRM_OFFLOAD_IPV6 1 +#define XFRM_OFFLOAD_INBOUND 2 + #ifndef __KERNEL__ /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b1e24446e297..c6aba234b6e9 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -435,9 +435,6 @@ skip_cow2: aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv); aead_request_set_ad(req, assoclen); - seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low + - ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32)); - memset(iv, 0, ivlen); memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&seqno + 8 - min(ivlen, 8), min(ivlen, 8)); @@ -470,6 +467,7 @@ static int esp_input_done2(struct sk_buff *skb, int err) { const struct iphdr *iph; struct xfrm_state *x = xfrm_input_state(skb); + struct xfrm_offload *xo = xfrm_offload(skb); struct crypto_aead *aead = x->data; int alen = crypto_aead_authsize(aead); int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); @@ -478,7 +476,8 @@ static int esp_input_done2(struct sk_buff *skb, int err) u8 nexthdr[2]; int padlen; - kfree(ESP_SKB_CB(skb)->tmp); + if (!xo || (xo && !(xo->flags & CRYPTO_DONE))) + kfree(ESP_SKB_CB(skb)->tmp); if (unlikely(err)) goto out; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 7ee6518afa86..94b8702603bc 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -29,7 +29,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) goto out; mtu = dst_mtu(skb_dst(skb)); - if (skb->len > mtu) { + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && skb_gso_network_seglen(skb) > ip_skb_dst_mtu(skb->sk, skb))) { skb->protocol = htons(ETH_P_IP); if (skb->sk) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index ff54faa75631..3d3757d20d0a 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -450,6 +450,7 @@ error: static int esp_input_done2(struct sk_buff *skb, int err) { struct xfrm_state *x = xfrm_input_state(skb); + struct xfrm_offload *xo = xfrm_offload(skb); struct crypto_aead *aead = x->data; int alen = crypto_aead_authsize(aead); int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); @@ -458,7 +459,8 @@ static int esp_input_done2(struct sk_buff *skb, int err) int padlen; u8 nexthdr[2]; - kfree(ESP_SKB_CB(skb)->tmp); + if (!xo || (xo && !(xo->flags & CRYPTO_DONE))) + kfree(ESP_SKB_CB(skb)->tmp); if (unlikely(err)) goto out; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 4d09ce6fa90e..8ae87d4ec5ff 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -73,11 +73,16 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) int mtu, ret = 0; struct dst_entry *dst = skb_dst(skb); + if (skb->ignore_df) + goto out; + mtu = dst_mtu(dst); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (!skb->ignore_df && skb->len > mtu) { + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && + skb_gso_network_seglen(skb) > ip6_skb_dst_mtu(skb))) { skb->dev = dst->dev; skb->protocol = htons(ETH_P_IPV6); @@ -89,7 +94,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ret = -EMSGSIZE; } - +out: return ret; } diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 55b2ac300995..abf81b329dc1 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -4,7 +4,8 @@ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o \ - xfrm_sysctl.o xfrm_replay.o xfrm_device.o + xfrm_sysctl.o xfrm_replay.o +obj-$(CONFIG_XFRM_OFFLOAD) += xfrm_device.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 34a260a61be9..9bac2ba9052c 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -22,13 +22,149 @@ #include #include +int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, + struct xfrm_user_offload *xuo) +{ + int err; + struct dst_entry *dst; + struct net_device *dev; + struct xfrm_state_offload *xso = &x->xso; + xfrm_address_t *saddr; + xfrm_address_t *daddr; + + if (!x->type_offload) + return 0; + + /* We don't yet support UDP encapsulation, TFC padding and ESN. */ + if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN)) + return 0; + + dev = dev_get_by_index(net, xuo->ifindex); + if (!dev) { + if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) { + saddr = &x->props.saddr; + daddr = &x->id.daddr; + } else { + saddr = &x->id.daddr; + daddr = &x->props.saddr; + } + + dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr, x->props.family); + if (IS_ERR(dst)) + return 0; + + dev = dst->dev; + + dev_hold(dev); + dst_release(dst); + } + + if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) { + dev_put(dev); + return 0; + } + + xso->dev = dev; + xso->num_exthdrs = 1; + xso->flags = xuo->flags; + + err = dev->xfrmdev_ops->xdo_dev_state_add(x); + if (err) { + dev_put(dev); + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(xfrm_dev_state_add); + +bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) +{ + int mtu; + struct dst_entry *dst = skb_dst(skb); + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + struct net_device *dev = x->xso.dev; + + if (!x->type_offload || x->encap) + return false; + + if ((x->xso.offload_handle && (dev == dst->path->dev)) && + !dst->child->xfrm && x->type->get_mtu) { + mtu = x->type->get_mtu(x, xdst->child_mtu_cached); + + if (skb->len <= mtu) + goto ok; + + if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + goto ok; + } + + return false; + +ok: + if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_offload_ok) + return x->xso.dev->xfrmdev_ops->xdo_dev_offload_ok(skb, x); + + return true; +} +EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok); + +int xfrm_dev_register(struct net_device *dev) +{ + if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops) + return NOTIFY_BAD; + if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) && + !(dev->features & NETIF_F_HW_ESP)) + return NOTIFY_BAD; + + return NOTIFY_DONE; +} + +static int xfrm_dev_unregister(struct net_device *dev) +{ + return NOTIFY_DONE; +} + +static int xfrm_dev_feat_change(struct net_device *dev) +{ + if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops) + return NOTIFY_BAD; + else if (!(dev->features & NETIF_F_HW_ESP)) + dev->xfrmdev_ops = NULL; + + if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) && + !(dev->features & NETIF_F_HW_ESP)) + return NOTIFY_BAD; + + return NOTIFY_DONE; +} + +static int xfrm_dev_down(struct net_device *dev) +{ + if (dev->hw_features & NETIF_F_HW_ESP) + xfrm_dev_state_flush(dev_net(dev), dev, true); + + xfrm_garbage_collect(dev_net(dev)); + + return NOTIFY_DONE; +} + static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { + case NETDEV_REGISTER: + return xfrm_dev_register(dev); + + case NETDEV_UNREGISTER: + return xfrm_dev_unregister(dev); + + case NETDEV_FEAT_CHANGE: + return xfrm_dev_feat_change(dev); + case NETDEV_DOWN: - xfrm_garbage_collect(dev_net(dev)); + return xfrm_dev_down(dev); } return NOTIFY_DONE; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 46bdb4fbed0b..362d655eac27 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -107,6 +107,8 @@ struct sec_path *secpath_dup(struct sec_path *src) sp->len = 0; sp->olen = 0; + memset(sp->ovec, 0, sizeof(sp->ovec[XFRM_MAX_OFFLOAD_DEPTH])); + if (src) { int i; @@ -207,8 +209,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) unsigned int family; int decaps = 0; int async = 0; - struct xfrm_offload *xo; bool xfrm_gro = false; + bool crypto_done = false; + struct xfrm_offload *xo = xfrm_offload(skb); if (encap_type < 0) { x = xfrm_input_state(skb); @@ -226,6 +229,37 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto lock; } + if (xo && (xo->flags & CRYPTO_DONE)) { + crypto_done = true; + x = xfrm_input_state(skb); + family = XFRM_SPI_SKB_CB(skb)->family; + + if (!(xo->status & CRYPTO_SUCCESS)) { + if (xo->status & + (CRYPTO_TRANSPORT_AH_AUTH_FAILED | + CRYPTO_TRANSPORT_ESP_AUTH_FAILED | + CRYPTO_TUNNEL_AH_AUTH_FAILED | + CRYPTO_TUNNEL_ESP_AUTH_FAILED)) { + + xfrm_audit_state_icvfail(x, skb, + x->type->proto); + x->stats.integrity_failed++; + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); + goto drop; + } + + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); + goto drop; + } + + if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); + goto drop; + } + + goto lock; + } + daddr = (xfrm_address_t *)(skb_network_header(skb) + XFRM_SPI_SKB_CB(skb)->daddroff); family = XFRM_SPI_SKB_CB(skb)->family; @@ -311,7 +345,10 @@ lock: skb_dst_force(skb); dev_hold(skb->dev); - nexthdr = x->type->input(x, skb); + if (crypto_done) + nexthdr = x->type_offload->input_tail(x, skb); + else + nexthdr = x->type->input(x, skb); if (nexthdr == -EINPROGRESS) return 0; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 8ba29fe58352..a15088613a6c 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -99,12 +99,13 @@ static int xfrm_output_one(struct sk_buff *skb, int err) skb_dst_force(skb); - /* Inner headers are invalid now. */ - skb->encapsulation = 0; - - err = x->type->output(x, skb); - if (err == -EINPROGRESS) - goto out; + if (xfrm_offload(skb)) { + x->type_offload->encap(x, skb); + } else { + err = x->type->output(x, skb); + if (err == -EINPROGRESS) + goto out; + } resume: if (err) { @@ -200,8 +201,38 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb int xfrm_output(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); + struct xfrm_state *x = skb_dst(skb)->xfrm; int err; + secpath_reset(skb); + + if (xfrm_dev_offload_ok(skb, x)) { + struct sec_path *sp; + + sp = secpath_dup(skb->sp); + if (!sp) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); + kfree_skb(skb); + return -ENOMEM; + } + if (skb->sp) + secpath_put(skb->sp); + skb->sp = sp; + + sp->olen++; + sp->xvec[skb->sp->len++] = x; + xfrm_state_hold(x); + + if (skb_is_gso(skb)) { + skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; + + return xfrm_output2(net, sk, skb); + } + + if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM) + goto out; + } + if (skb_is_gso(skb)) return xfrm_output_gso(net, sk, skb); @@ -214,6 +245,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) } } +out: return xfrm_output2(net, sk, skb); } EXPORT_SYMBOL_GPL(xfrm_output); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7befca2a0773..dd44ddc1aea5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -116,11 +116,10 @@ static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short fa return afinfo; } -static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, - int tos, int oif, - const xfrm_address_t *saddr, - const xfrm_address_t *daddr, - int family) +struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, + const xfrm_address_t *saddr, + const xfrm_address_t *daddr, + int family) { const struct xfrm_policy_afinfo *afinfo; struct dst_entry *dst; @@ -135,6 +134,7 @@ static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, return dst; } +EXPORT_SYMBOL(__xfrm_dst_lookup); static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, int oif, diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 47fefe97d1e3..fc3c5aa38754 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -440,6 +440,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) x->type->destructor(x); xfrm_put_type(x->type); } + xfrm_dev_state_free(x); security_xfrm_state_free(x); kfree(x); } @@ -609,6 +610,8 @@ int __xfrm_state_delete(struct xfrm_state *x) net->xfrm.state_num--; spin_unlock(&net->xfrm.xfrm_state_lock); + xfrm_dev_state_delete(x); + /* All xfrm_state objects are created by xfrm_state_alloc. * The xfrm_state_alloc call gives a reference, and that * is what we are dropping here. @@ -653,12 +656,41 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) return err; } + +static inline int +xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid) +{ + int i, err = 0; + + for (i = 0; i <= net->xfrm.state_hmask; i++) { + struct xfrm_state *x; + struct xfrm_state_offload *xso; + + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + xso = &x->xso; + + if (xso->dev == dev && + (err = security_xfrm_state_delete(x)) != 0) { + xfrm_audit_state_delete(x, 0, task_valid); + return err; + } + } + } + + return err; +} #else static inline int xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) { return 0; } + +static inline int +xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid) +{ + return 0; +} #endif int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) @@ -701,6 +733,48 @@ out: } EXPORT_SYMBOL(xfrm_state_flush); +int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid) +{ + int i, err = 0, cnt = 0; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + err = xfrm_dev_state_flush_secctx_check(net, dev, task_valid); + if (err) + goto out; + + err = -ESRCH; + for (i = 0; i <= net->xfrm.state_hmask; i++) { + struct xfrm_state *x; + struct xfrm_state_offload *xso; +restart: + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + xso = &x->xso; + + if (!xfrm_state_kern(x) && xso->dev == dev) { + xfrm_state_hold(x); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + + err = xfrm_state_delete(x); + xfrm_audit_state_delete(x, err ? 0 : 1, + task_valid); + xfrm_state_put(x); + if (!err) + cnt++; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + goto restart; + } + } + } + if (cnt) + err = 0; + +out: + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + return err; +} +EXPORT_SYMBOL(xfrm_dev_state_flush); + void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) { spin_lock_bh(&net->xfrm.xfrm_state_lock); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4f7e62ddc17e..de3332e3f9e2 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -595,6 +595,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; } + if (attrs[XFRMA_OFFLOAD_DEV] && + xfrm_dev_state_add(net, x, nla_data(attrs[XFRMA_OFFLOAD_DEV]))) + goto error; + if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, attrs[XFRMA_REPLAY_ESN_VAL]))) goto error; @@ -779,6 +783,23 @@ static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) return 0; } +static int copy_user_offload(struct xfrm_state_offload *xso, struct sk_buff *skb) +{ + struct xfrm_user_offload *xuo; + struct nlattr *attr; + + attr = nla_reserve(skb, XFRMA_OFFLOAD_DEV, sizeof(*xuo)); + if (attr == NULL) + return -EMSGSIZE; + + xuo = nla_data(attr); + + xuo->ifindex = xso->dev->ifindex; + xuo->flags = xso->flags; + + return 0; +} + static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) { struct xfrm_algo *algo; @@ -869,6 +890,10 @@ static int copy_to_user_state_extra(struct xfrm_state *x, &x->replay); if (ret) goto out; + if(x->xso.dev) + ret = copy_user_offload(&x->xso, skb); + if (ret) + goto out; if (x->security) ret = copy_sec_ctx(x->security, skb); out: @@ -2406,6 +2431,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 }, [XFRMA_PROTO] = { .type = NLA_U8 }, [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, + [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) }, }; static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { @@ -2622,6 +2648,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) l += nla_total_size(sizeof(*x->coaddr)); if (x->props.extra_flags) l += nla_total_size(sizeof(x->props.extra_flags)); + if (x->xso.dev) + l += nla_total_size(sizeof(x->xso)); /* Must count x->lastused as it may become non-zero behind our back. */ l += nla_total_size_64bit(sizeof(u64)); -- cgit v1.2.3-71-gd317 From fb9ffa6a7f7ef39cc0f14f417b66411be5492512 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 12 Apr 2016 19:40:46 -0300 Subject: [media] v4l: Add metadata buffer type and format The metadata buffer type is used to transfer metadata between userspace and kernelspace through a V4L2 buffers queue. It comes with a new metadata capture capability and format description. Signed-off-by: Laurent Pinchart Tested-by: Guennadi Liakhovetski Acked-by: Sakari Ailus Acked-by: Hans Verkuil [hans.verkuil@cisco.com: removed left-over 'experimental' note] [hans.verkuil@cisco.com: add newline after _v4l2-meta-format label] Signed-off-by: Mauro Carvalho Chehab --- Documentation/media/uapi/v4l/buffer.rst | 3 ++ Documentation/media/uapi/v4l/dev-meta.rst | 58 ++++++++++++++++++++++++ Documentation/media/uapi/v4l/devices.rst | 1 + Documentation/media/uapi/v4l/vidioc-querycap.rst | 3 ++ Documentation/media/videodev2.h.rst.exceptions | 2 + drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 19 ++++++++ drivers/media/v4l2-core/v4l2-dev.c | 16 ++++--- drivers/media/v4l2-core/v4l2-ioctl.c | 34 ++++++++++++++ drivers/media/v4l2-core/videobuf2-v4l2.c | 3 ++ include/media/v4l2-ioctl.h | 17 +++++++ include/trace/events/v4l2.h | 1 + include/uapi/linux/videodev2.h | 13 ++++++ 12 files changed, 164 insertions(+), 6 deletions(-) create mode 100644 Documentation/media/uapi/v4l/dev-meta.rst (limited to 'include/uapi') diff --git a/Documentation/media/uapi/v4l/buffer.rst b/Documentation/media/uapi/v4l/buffer.rst index d1e0d55dc219..64613d935edd 100644 --- a/Documentation/media/uapi/v4l/buffer.rst +++ b/Documentation/media/uapi/v4l/buffer.rst @@ -440,6 +440,9 @@ enum v4l2_buf_type - 12 - Buffer for Software Defined Radio (SDR) output stream, see :ref:`sdr`. + * - ``V4L2_BUF_TYPE_META_CAPTURE`` + - 13 + - Buffer for metadata capture, see :ref:`metadata`. diff --git a/Documentation/media/uapi/v4l/dev-meta.rst b/Documentation/media/uapi/v4l/dev-meta.rst new file mode 100644 index 000000000000..62518adfe37b --- /dev/null +++ b/Documentation/media/uapi/v4l/dev-meta.rst @@ -0,0 +1,58 @@ +.. -*- coding: utf-8; mode: rst -*- + +.. _metadata: + +****************** +Metadata Interface +****************** + +Metadata refers to any non-image data that supplements video frames with +additional information. This may include statistics computed over the image +or frame capture parameters supplied by the image source. This interface is +intended for transfer of metadata to userspace and control of that operation. + +The metadata interface is implemented on video capture device nodes. The device +can be dedicated to metadata or can implement both video and metadata capture +as specified in its reported capabilities. + +Querying Capabilities +===================== + +Device nodes supporting the metadata interface set the ``V4L2_CAP_META_CAPTURE`` +flag in the ``device_caps`` field of the +:c:type:`v4l2_capability` structure returned by the :c:func:`VIDIOC_QUERYCAP` +ioctl. That flag means the device can capture metadata to memory. + +At least one of the read/write or streaming I/O methods must be supported. + + +Data Format Negotiation +======================= + +The metadata device uses the :ref:`format` ioctls to select the capture format. +The metadata buffer content format is bound to that selected format. In addition +to the basic :ref:`format` ioctls, the :c:func:`VIDIOC_ENUM_FMT` ioctl must be +supported as well. + +To use the :ref:`format` ioctls applications set the ``type`` field of the +:c:type:`v4l2_format` structure to ``V4L2_BUF_TYPE_META_CAPTURE`` and use the +:c:type:`v4l2_meta_format` ``meta`` member of the ``fmt`` union as needed per +the desired operation. Both drivers and applications must set the remainder of +the :c:type:`v4l2_format` structure to 0. + +.. _v4l2-meta-format: + +.. flat-table:: struct v4l2_meta_format + :header-rows: 0 + :stub-columns: 0 + :widths: 1 1 2 + + * - __u32 + - ``dataformat`` + - The data format, set by the application. This is a little endian + :ref:`four character code `. V4L2 defines metadata formats + in :ref:`meta-formats`. + * - __u32 + - ``buffersize`` + - Maximum buffer size in bytes required for data. The value is set by the + driver. diff --git a/Documentation/media/uapi/v4l/devices.rst b/Documentation/media/uapi/v4l/devices.rst index 5c3d6c29e12c..fb7f8c26cf09 100644 --- a/Documentation/media/uapi/v4l/devices.rst +++ b/Documentation/media/uapi/v4l/devices.rst @@ -25,3 +25,4 @@ Interfaces dev-touch dev-event dev-subdev + dev-meta diff --git a/Documentation/media/uapi/v4l/vidioc-querycap.rst b/Documentation/media/uapi/v4l/vidioc-querycap.rst index 165d8314327e..12e0d9a63cd8 100644 --- a/Documentation/media/uapi/v4l/vidioc-querycap.rst +++ b/Documentation/media/uapi/v4l/vidioc-querycap.rst @@ -236,6 +236,9 @@ specification the ioctl returns an ``EINVAL`` error code. * - ``V4L2_CAP_SDR_OUTPUT`` - 0x00400000 - The device supports the :ref:`SDR Output ` interface. + * - ``V4L2_CAP_META_CAPTURE`` + - 0x00800000 + - The device supports the :ref:`metadata` capture interface. * - ``V4L2_CAP_READWRITE`` - 0x01000000 - The device supports the :ref:`read() ` and/or diff --git a/Documentation/media/videodev2.h.rst.exceptions b/Documentation/media/videodev2.h.rst.exceptions index e11a0d0a8931..c9c611b18ba1 100644 --- a/Documentation/media/videodev2.h.rst.exceptions +++ b/Documentation/media/videodev2.h.rst.exceptions @@ -27,6 +27,7 @@ replace symbol V4L2_FIELD_SEQ_TB :c:type:`v4l2_field` replace symbol V4L2_FIELD_TOP :c:type:`v4l2_field` # Documented enum v4l2_buf_type +replace symbol V4L2_BUF_TYPE_META_CAPTURE :c:type:`v4l2_buf_type` replace symbol V4L2_BUF_TYPE_SDR_CAPTURE :c:type:`v4l2_buf_type` replace symbol V4L2_BUF_TYPE_SDR_OUTPUT :c:type:`v4l2_buf_type` replace symbol V4L2_BUF_TYPE_SLICED_VBI_CAPTURE :c:type:`v4l2_buf_type` @@ -152,6 +153,7 @@ replace define V4L2_CAP_MODULATOR device-capabilities replace define V4L2_CAP_SDR_CAPTURE device-capabilities replace define V4L2_CAP_EXT_PIX_FORMAT device-capabilities replace define V4L2_CAP_SDR_OUTPUT device-capabilities +replace define V4L2_CAP_META_CAPTURE device-capabilities replace define V4L2_CAP_READWRITE device-capabilities replace define V4L2_CAP_ASYNCIO device-capabilities replace define V4L2_CAP_STREAMING device-capabilities diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 77b8a2dcfcdf..6f52970f8b54 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -161,6 +161,20 @@ static inline int put_v4l2_sdr_format(struct v4l2_sdr_format *kp, struct v4l2_sd return 0; } +static inline int get_v4l2_meta_format(struct v4l2_meta_format *kp, struct v4l2_meta_format __user *up) +{ + if (copy_from_user(kp, up, sizeof(struct v4l2_meta_format))) + return -EFAULT; + return 0; +} + +static inline int put_v4l2_meta_format(struct v4l2_meta_format *kp, struct v4l2_meta_format __user *up) +{ + if (copy_to_user(up, kp, sizeof(struct v4l2_meta_format))) + return -EFAULT; + return 0; +} + struct v4l2_format32 { __u32 type; /* enum v4l2_buf_type */ union { @@ -170,6 +184,7 @@ struct v4l2_format32 { struct v4l2_vbi_format vbi; struct v4l2_sliced_vbi_format sliced; struct v4l2_sdr_format sdr; + struct v4l2_meta_format meta; __u8 raw_data[200]; /* user-defined */ } fmt; }; @@ -216,6 +231,8 @@ static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: return get_v4l2_sdr_format(&kp->fmt.sdr, &up->fmt.sdr); + case V4L2_BUF_TYPE_META_CAPTURE: + return get_v4l2_meta_format(&kp->fmt.meta, &up->fmt.meta); default: pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", kp->type); @@ -263,6 +280,8 @@ static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: return put_v4l2_sdr_format(&kp->fmt.sdr, &up->fmt.sdr); + case V4L2_BUF_TYPE_META_CAPTURE: + return put_v4l2_meta_format(&kp->fmt.meta, &up->fmt.meta); default: pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", kp->type); diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c index fa2124cb31bd..c647ba648805 100644 --- a/drivers/media/v4l2-core/v4l2-dev.c +++ b/drivers/media/v4l2-core/v4l2-dev.c @@ -575,30 +575,34 @@ static void determine_valid_ioctls(struct video_device *vdev) set_bit(_IOC_NR(VIDIOC_ENUM_FREQ_BANDS), valid_ioctls); if (is_vid || is_tch) { - /* video specific ioctls */ + /* video and metadata specific ioctls */ if ((is_rx && (ops->vidioc_enum_fmt_vid_cap || ops->vidioc_enum_fmt_vid_cap_mplane || - ops->vidioc_enum_fmt_vid_overlay)) || + ops->vidioc_enum_fmt_vid_overlay || + ops->vidioc_enum_fmt_meta_cap)) || (is_tx && (ops->vidioc_enum_fmt_vid_out || ops->vidioc_enum_fmt_vid_out_mplane))) set_bit(_IOC_NR(VIDIOC_ENUM_FMT), valid_ioctls); if ((is_rx && (ops->vidioc_g_fmt_vid_cap || ops->vidioc_g_fmt_vid_cap_mplane || - ops->vidioc_g_fmt_vid_overlay)) || + ops->vidioc_g_fmt_vid_overlay || + ops->vidioc_g_fmt_meta_cap)) || (is_tx && (ops->vidioc_g_fmt_vid_out || ops->vidioc_g_fmt_vid_out_mplane || ops->vidioc_g_fmt_vid_out_overlay))) set_bit(_IOC_NR(VIDIOC_G_FMT), valid_ioctls); if ((is_rx && (ops->vidioc_s_fmt_vid_cap || ops->vidioc_s_fmt_vid_cap_mplane || - ops->vidioc_s_fmt_vid_overlay)) || + ops->vidioc_s_fmt_vid_overlay || + ops->vidioc_s_fmt_meta_cap)) || (is_tx && (ops->vidioc_s_fmt_vid_out || ops->vidioc_s_fmt_vid_out_mplane || ops->vidioc_s_fmt_vid_out_overlay))) set_bit(_IOC_NR(VIDIOC_S_FMT), valid_ioctls); if ((is_rx && (ops->vidioc_try_fmt_vid_cap || ops->vidioc_try_fmt_vid_cap_mplane || - ops->vidioc_try_fmt_vid_overlay)) || + ops->vidioc_try_fmt_vid_overlay || + ops->vidioc_try_fmt_meta_cap)) || (is_tx && (ops->vidioc_try_fmt_vid_out || ops->vidioc_try_fmt_vid_out_mplane || ops->vidioc_try_fmt_vid_out_overlay))) @@ -664,7 +668,7 @@ static void determine_valid_ioctls(struct video_device *vdev) } if (is_vid || is_vbi || is_sdr || is_tch) { - /* ioctls valid for video, vbi or sdr */ + /* ioctls valid for video, metadata, vbi or sdr */ SET_VALID_IOCTL(ops, VIDIOC_REQBUFS, vidioc_reqbufs); SET_VALID_IOCTL(ops, VIDIOC_QUERYBUF, vidioc_querybuf); SET_VALID_IOCTL(ops, VIDIOC_QBUF, vidioc_qbuf); diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 93e8f42b0d63..dec6b120a5a2 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -155,6 +155,7 @@ const char *v4l2_type_names[] = { [V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE] = "vid-out-mplane", [V4L2_BUF_TYPE_SDR_CAPTURE] = "sdr-cap", [V4L2_BUF_TYPE_SDR_OUTPUT] = "sdr-out", + [V4L2_BUF_TYPE_META_CAPTURE] = "meta-cap", }; EXPORT_SYMBOL(v4l2_type_names); @@ -246,6 +247,7 @@ static void v4l_print_format(const void *arg, bool write_only) const struct v4l2_sliced_vbi_format *sliced; const struct v4l2_window *win; const struct v4l2_sdr_format *sdr; + const struct v4l2_meta_format *meta; unsigned i; pr_cont("type=%s", prt_names(p->type, v4l2_type_names)); @@ -325,6 +327,15 @@ static void v4l_print_format(const void *arg, bool write_only) (sdr->pixelformat >> 16) & 0xff, (sdr->pixelformat >> 24) & 0xff); break; + case V4L2_BUF_TYPE_META_CAPTURE: + meta = &p->fmt.meta; + pr_cont(", dataformat=%c%c%c%c, buffersize=%u\n", + (meta->dataformat >> 0) & 0xff, + (meta->dataformat >> 8) & 0xff, + (meta->dataformat >> 16) & 0xff, + (meta->dataformat >> 24) & 0xff, + meta->buffersize); + break; } } @@ -943,6 +954,10 @@ static int check_fmt(struct file *file, enum v4l2_buf_type type) if (is_sdr && is_tx && ops->vidioc_g_fmt_sdr_out) return 0; break; + case V4L2_BUF_TYPE_META_CAPTURE: + if (is_vid && is_rx && ops->vidioc_g_fmt_meta_cap) + return 0; + break; default: break; } @@ -1327,6 +1342,11 @@ static int v4l_enum_fmt(const struct v4l2_ioctl_ops *ops, break; ret = ops->vidioc_enum_fmt_sdr_out(file, fh, arg); break; + case V4L2_BUF_TYPE_META_CAPTURE: + if (unlikely(!is_rx || !is_vid || !ops->vidioc_enum_fmt_meta_cap)) + break; + ret = ops->vidioc_enum_fmt_meta_cap(file, fh, arg); + break; } if (ret == 0) v4l_fill_fmtdesc(p); @@ -1426,6 +1446,10 @@ static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops, if (unlikely(!is_tx || !is_sdr || !ops->vidioc_g_fmt_sdr_out)) break; return ops->vidioc_g_fmt_sdr_out(file, fh, arg); + case V4L2_BUF_TYPE_META_CAPTURE: + if (unlikely(!is_rx || !is_vid || !ops->vidioc_g_fmt_meta_cap)) + break; + return ops->vidioc_g_fmt_meta_cap(file, fh, arg); } return -EINVAL; } @@ -1531,6 +1555,11 @@ static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops, break; CLEAR_AFTER_FIELD(p, fmt.sdr); return ops->vidioc_s_fmt_sdr_out(file, fh, arg); + case V4L2_BUF_TYPE_META_CAPTURE: + if (unlikely(!is_rx || !is_vid || !ops->vidioc_s_fmt_meta_cap)) + break; + CLEAR_AFTER_FIELD(p, fmt.meta); + return ops->vidioc_s_fmt_meta_cap(file, fh, arg); } return -EINVAL; } @@ -1616,6 +1645,11 @@ static int v4l_try_fmt(const struct v4l2_ioctl_ops *ops, break; CLEAR_AFTER_FIELD(p, fmt.sdr); return ops->vidioc_try_fmt_sdr_out(file, fh, arg); + case V4L2_BUF_TYPE_META_CAPTURE: + if (unlikely(!is_rx || !is_vid || !ops->vidioc_try_fmt_meta_cap)) + break; + CLEAR_AFTER_FIELD(p, fmt.meta); + return ops->vidioc_try_fmt_meta_cap(file, fh, arg); } return -EINVAL; } diff --git a/drivers/media/v4l2-core/videobuf2-v4l2.c b/drivers/media/v4l2-core/videobuf2-v4l2.c index 3529849d2218..0c0669976bdc 100644 --- a/drivers/media/v4l2-core/videobuf2-v4l2.c +++ b/drivers/media/v4l2-core/videobuf2-v4l2.c @@ -544,6 +544,9 @@ int vb2_create_bufs(struct vb2_queue *q, struct v4l2_create_buffers *create) case V4L2_BUF_TYPE_SDR_OUTPUT: requested_sizes[0] = f->fmt.sdr.buffersize; break; + case V4L2_BUF_TYPE_META_CAPTURE: + requested_sizes[0] = f->fmt.meta.buffersize; + break; default: return -EINVAL; } diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h index 6cd94e5ee113..bd5312118013 100644 --- a/include/media/v4l2-ioctl.h +++ b/include/media/v4l2-ioctl.h @@ -44,6 +44,9 @@ struct v4l2_fh; * @vidioc_enum_fmt_sdr_out: pointer to the function that implements * :ref:`VIDIOC_ENUM_FMT ` ioctl logic * for Software Defined Radio output + * @vidioc_enum_fmt_meta_cap: pointer to the function that implements + * :ref:`VIDIOC_ENUM_FMT ` ioctl logic + * for metadata capture * @vidioc_g_fmt_vid_cap: pointer to the function that implements * :ref:`VIDIOC_G_FMT ` ioctl logic for video capture * in single plane mode @@ -74,6 +77,8 @@ struct v4l2_fh; * @vidioc_g_fmt_sdr_out: pointer to the function that implements * :ref:`VIDIOC_G_FMT ` ioctl logic for Software Defined * Radio output + * @vidioc_g_fmt_meta_cap: pointer to the function that implements + * :ref:`VIDIOC_G_FMT ` ioctl logic for metadata capture * @vidioc_s_fmt_vid_cap: pointer to the function that implements * :ref:`VIDIOC_S_FMT ` ioctl logic for video capture * in single plane mode @@ -104,6 +109,8 @@ struct v4l2_fh; * @vidioc_s_fmt_sdr_out: pointer to the function that implements * :ref:`VIDIOC_S_FMT ` ioctl logic for Software Defined * Radio output + * @vidioc_s_fmt_meta_cap: pointer to the function that implements + * :ref:`VIDIOC_S_FMT ` ioctl logic for metadata capture * @vidioc_try_fmt_vid_cap: pointer to the function that implements * :ref:`VIDIOC_TRY_FMT ` ioctl logic for video capture * in single plane mode @@ -136,6 +143,8 @@ struct v4l2_fh; * @vidioc_try_fmt_sdr_out: pointer to the function that implements * :ref:`VIDIOC_TRY_FMT ` ioctl logic for Software Defined * Radio output + * @vidioc_try_fmt_meta_cap: pointer to the function that implements + * :ref:`VIDIOC_TRY_FMT ` ioctl logic for metadata capture * @vidioc_reqbufs: pointer to the function that implements * :ref:`VIDIOC_REQBUFS ` ioctl * @vidioc_querybuf: pointer to the function that implements @@ -306,6 +315,8 @@ struct v4l2_ioctl_ops { struct v4l2_fmtdesc *f); int (*vidioc_enum_fmt_sdr_out)(struct file *file, void *fh, struct v4l2_fmtdesc *f); + int (*vidioc_enum_fmt_meta_cap)(struct file *file, void *fh, + struct v4l2_fmtdesc *f); /* VIDIOC_G_FMT handlers */ int (*vidioc_g_fmt_vid_cap)(struct file *file, void *fh, @@ -332,6 +343,8 @@ struct v4l2_ioctl_ops { struct v4l2_format *f); int (*vidioc_g_fmt_sdr_out)(struct file *file, void *fh, struct v4l2_format *f); + int (*vidioc_g_fmt_meta_cap)(struct file *file, void *fh, + struct v4l2_format *f); /* VIDIOC_S_FMT handlers */ int (*vidioc_s_fmt_vid_cap)(struct file *file, void *fh, @@ -358,6 +371,8 @@ struct v4l2_ioctl_ops { struct v4l2_format *f); int (*vidioc_s_fmt_sdr_out)(struct file *file, void *fh, struct v4l2_format *f); + int (*vidioc_s_fmt_meta_cap)(struct file *file, void *fh, + struct v4l2_format *f); /* VIDIOC_TRY_FMT handlers */ int (*vidioc_try_fmt_vid_cap)(struct file *file, void *fh, @@ -384,6 +399,8 @@ struct v4l2_ioctl_ops { struct v4l2_format *f); int (*vidioc_try_fmt_sdr_out)(struct file *file, void *fh, struct v4l2_format *f); + int (*vidioc_try_fmt_meta_cap)(struct file *file, void *fh, + struct v4l2_format *f); /* Buffer handlers */ int (*vidioc_reqbufs)(struct file *file, void *fh, diff --git a/include/trace/events/v4l2.h b/include/trace/events/v4l2.h index ee7754c6e4a1..b3a85b3df53e 100644 --- a/include/trace/events/v4l2.h +++ b/include/trace/events/v4l2.h @@ -29,6 +29,7 @@ EM( V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, "VIDEO_OUTPUT_MPLANE" ) \ EM( V4L2_BUF_TYPE_SDR_CAPTURE, "SDR_CAPTURE" ) \ EM( V4L2_BUF_TYPE_SDR_OUTPUT, "SDR_OUTPUT" ) \ + EM( V4L2_BUF_TYPE_META_CAPTURE, "META_CAPTURE" ) \ EMe(V4L2_BUF_TYPE_PRIVATE, "PRIVATE" ) SHOW_TYPE diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 8d351f5df2aa..7078deef2c64 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -143,6 +143,7 @@ enum v4l2_buf_type { V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE = 10, V4L2_BUF_TYPE_SDR_CAPTURE = 11, V4L2_BUF_TYPE_SDR_OUTPUT = 12, + V4L2_BUF_TYPE_META_CAPTURE = 13, /* Deprecated, do not use */ V4L2_BUF_TYPE_PRIVATE = 0x80, }; @@ -451,6 +452,7 @@ struct v4l2_capability { #define V4L2_CAP_SDR_CAPTURE 0x00100000 /* Is a SDR capture device */ #define V4L2_CAP_EXT_PIX_FORMAT 0x00200000 /* Supports the extended pixel format */ #define V4L2_CAP_SDR_OUTPUT 0x00400000 /* Is a SDR output device */ +#define V4L2_CAP_META_CAPTURE 0x00800000 /* Is a metadata capture device */ #define V4L2_CAP_READWRITE 0x01000000 /* read/write systemcalls */ #define V4L2_CAP_ASYNCIO 0x02000000 /* async I/O */ @@ -2085,6 +2087,16 @@ struct v4l2_sdr_format { __u8 reserved[24]; } __attribute__ ((packed)); +/** + * struct v4l2_meta_format - metadata format definition + * @dataformat: little endian four character code (fourcc) + * @buffersize: maximum size in bytes required for data + */ +struct v4l2_meta_format { + __u32 dataformat; + __u32 buffersize; +} __attribute__ ((packed)); + /** * struct v4l2_format - stream data format * @type: enum v4l2_buf_type; type of the data stream @@ -2104,6 +2116,7 @@ struct v4l2_format { struct v4l2_vbi_format vbi; /* V4L2_BUF_TYPE_VBI_CAPTURE */ struct v4l2_sliced_vbi_format sliced; /* V4L2_BUF_TYPE_SLICED_VBI_CAPTURE */ struct v4l2_sdr_format sdr; /* V4L2_BUF_TYPE_SDR_CAPTURE */ + struct v4l2_meta_format meta; /* V4L2_BUF_TYPE_META_CAPTURE */ __u8 raw_data[200]; /* user-defined */ } fmt; }; -- cgit v1.2.3-71-gd317 From 14d66538716574f8899b22bff24a68301e65f08d Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 10 Apr 2016 04:37:48 -0300 Subject: [media] v4l: Define a pixel format for the R-Car VSP1 1-D histogram engine The format is used on the R-Car VSP1 video queues that carry 1-D histogram statistics data. Signed-off-by: Laurent Pinchart Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- Documentation/media/uapi/v4l/meta-formats.rst | 15 ++ .../media/uapi/v4l/pixfmt-meta-vsp1-hgo.rst | 168 +++++++++++++++++++++ Documentation/media/uapi/v4l/pixfmt.rst | 1 + drivers/media/v4l2-core/v4l2-ioctl.c | 1 + include/uapi/linux/videodev2.h | 3 + 5 files changed, 188 insertions(+) create mode 100644 Documentation/media/uapi/v4l/meta-formats.rst create mode 100644 Documentation/media/uapi/v4l/pixfmt-meta-vsp1-hgo.rst (limited to 'include/uapi') diff --git a/Documentation/media/uapi/v4l/meta-formats.rst b/Documentation/media/uapi/v4l/meta-formats.rst new file mode 100644 index 000000000000..05ab91e12f10 --- /dev/null +++ b/Documentation/media/uapi/v4l/meta-formats.rst @@ -0,0 +1,15 @@ +.. -*- coding: utf-8; mode: rst -*- + +.. _meta-formats: + +**************** +Metadata Formats +**************** + +These formats are used for the :ref:`metadata` interface only. + + +.. toctree:: + :maxdepth: 1 + + pixfmt-meta-vsp1-hgo diff --git a/Documentation/media/uapi/v4l/pixfmt-meta-vsp1-hgo.rst b/Documentation/media/uapi/v4l/pixfmt-meta-vsp1-hgo.rst new file mode 100644 index 000000000000..8d37bb313493 --- /dev/null +++ b/Documentation/media/uapi/v4l/pixfmt-meta-vsp1-hgo.rst @@ -0,0 +1,168 @@ +.. -*- coding: utf-8; mode: rst -*- + +.. _v4l2-meta-fmt-vsp1-hgo: + +******************************* +V4L2_META_FMT_VSP1_HGO ('VSPH') +******************************* + +Renesas R-Car VSP1 1-D Histogram Data + + +Description +=========== + +This format describes histogram data generated by the Renesas R-Car VSP1 1-D +Histogram (HGO) engine. + +The VSP1 HGO is a histogram computation engine that can operate on RGB, YCrCb +or HSV data. It operates on a possibly cropped and subsampled input image and +computes the minimum, maximum and sum of all pixels as well as per-channel +histograms. + +The HGO can compute histograms independently per channel, on the maximum of the +three channels (RGB data only) or on the Y channel only (YCbCr only). It can +additionally output the histogram with 64 or 256 bins, resulting in four +possible modes of operation. + +- In *64 bins normal mode*, the HGO operates on the three channels independently + to compute three 64-bins histograms. RGB, YCbCr and HSV image formats are + supported. +- In *64 bins maximum mode*, the HGO operates on the maximum of the (R, G, B) + channels to compute a single 64-bins histogram. Only the RGB image format is + supported. +- In *256 bins normal mode*, the HGO operates on the Y channel to compute a + single 256-bins histogram. Only the YCbCr image format is supported. +- In *256 bins maximum mode*, the HGO operates on the maximum of the (R, G, B) + channels to compute a single 256-bins histogram. Only the RGB image format is + supported. + +**Byte Order.** +All data is stored in memory in little endian format. Each cell in the tables +contains one byte. + +.. flat-table:: VSP1 HGO Data - 64 Bins, Normal Mode (792 bytes) + :header-rows: 2 + :stub-columns: 0 + + * - Offset + - :cspan:`4` Memory + * - + - [31:24] + - [23:16] + - [15:8] + - [7:0] + * - 0 + - - + - R/Cr/H max [7:0] + - - + - R/Cr/H min [7:0] + * - 4 + - - + - G/Y/S max [7:0] + - - + - G/Y/S min [7:0] + * - 8 + - - + - B/Cb/V max [7:0] + - - + - B/Cb/V min [7:0] + * - 12 + - :cspan:`4` R/Cr/H sum [31:0] + * - 16 + - :cspan:`4` G/Y/S sum [31:0] + * - 20 + - :cspan:`4` B/Cb/V sum [31:0] + * - 24 + - :cspan:`4` R/Cr/H bin 0 [31:0] + * - + - :cspan:`4` ... + * - 276 + - :cspan:`4` R/Cr/H bin 63 [31:0] + * - 280 + - :cspan:`4` G/Y/S bin 0 [31:0] + * - + - :cspan:`4` ... + * - 532 + - :cspan:`4` G/Y/S bin 63 [31:0] + * - 536 + - :cspan:`4` B/Cb/V bin 0 [31:0] + * - + - :cspan:`4` ... + * - 788 + - :cspan:`4` B/Cb/V bin 63 [31:0] + +.. flat-table:: VSP1 HGO Data - 64 Bins, Max Mode (264 bytes) + :header-rows: 2 + :stub-columns: 0 + + * - Offset + - :cspan:`4` Memory + * - + - [31:24] + - [23:16] + - [15:8] + - [7:0] + * - 0 + - - + - max(R,G,B) max [7:0] + - - + - max(R,G,B) min [7:0] + * - 4 + - :cspan:`4` max(R,G,B) sum [31:0] + * - 8 + - :cspan:`4` max(R,G,B) bin 0 [31:0] + * - + - :cspan:`4` ... + * - 260 + - :cspan:`4` max(R,G,B) bin 63 [31:0] + +.. flat-table:: VSP1 HGO Data - 256 Bins, Normal Mode (1032 bytes) + :header-rows: 2 + :stub-columns: 0 + + * - Offset + - :cspan:`4` Memory + * - + - [31:24] + - [23:16] + - [15:8] + - [7:0] + * - 0 + - - + - Y max [7:0] + - - + - Y min [7:0] + * - 4 + - :cspan:`4` Y sum [31:0] + * - 8 + - :cspan:`4` Y bin 0 [31:0] + * - + - :cspan:`4` ... + * - 1028 + - :cspan:`4` Y bin 255 [31:0] + +.. flat-table:: VSP1 HGO Data - 256 Bins, Max Mode (1032 bytes) + :header-rows: 2 + :stub-columns: 0 + + * - Offset + - :cspan:`4` Memory + * - + - [31:24] + - [23:16] + - [15:8] + - [7:0] + * - 0 + - - + - max(R,G,B) max [7:0] + - - + - max(R,G,B) min [7:0] + * - 4 + - :cspan:`4` max(R,G,B) sum [31:0] + * - 8 + - :cspan:`4` max(R,G,B) bin 0 [31:0] + * - + - :cspan:`4` ... + * - 1028 + - :cspan:`4` max(R,G,B) bin 255 [31:0] diff --git a/Documentation/media/uapi/v4l/pixfmt.rst b/Documentation/media/uapi/v4l/pixfmt.rst index 4f184c7aedab..00737152497b 100644 --- a/Documentation/media/uapi/v4l/pixfmt.rst +++ b/Documentation/media/uapi/v4l/pixfmt.rst @@ -34,4 +34,5 @@ see also :ref:`VIDIOC_G_FBUF `.) pixfmt-013 sdr-formats tch-formats + meta-formats pixfmt-reserved diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index dec6b120a5a2..a7c50241594e 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -1233,6 +1233,7 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt) case V4L2_TCH_FMT_DELTA_TD08: descr = "8-bit signed deltas"; break; case V4L2_TCH_FMT_TU16: descr = "16-bit unsigned touch data"; break; case V4L2_TCH_FMT_TU08: descr = "8-bit unsigned touch data"; break; + case V4L2_META_FMT_VSP1_HGO: descr = "R-Car VSP1 1-D Histogram"; break; default: /* Compressed formats */ diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 7078deef2c64..09cf3a32faf4 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -676,6 +676,9 @@ struct v4l2_pix_format { #define V4L2_TCH_FMT_TU16 v4l2_fourcc('T', 'U', '1', '6') /* 16-bit unsigned touch data */ #define V4L2_TCH_FMT_TU08 v4l2_fourcc('T', 'U', '0', '8') /* 8-bit unsigned touch data */ +/* Meta-data formats */ +#define V4L2_META_FMT_VSP1_HGO v4l2_fourcc('V', 'S', 'P', 'H') /* R-Car VSP1 Histogram */ + /* priv field value to indicates that subsequent fields are valid. */ #define V4L2_PIX_FMT_PRIV_MAGIC 0xfeedcafe -- cgit v1.2.3-71-gd317 From 5deb1c04c9f2cc3fe4b355a55a8fad244683a54a Mon Sep 17 00:00:00 2001 From: Niklas Söderlund Date: Tue, 6 Sep 2016 11:38:55 -0300 Subject: [media] v4l: Define a pixel format for the R-Car VSP1 2-D histogram engine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The format is used on the R-Car VSP1 video queues that carry 2-D histogram statistics data. Signed-off-by: Niklas Söderlund Reviewed-by: Laurent Pinchart Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- Documentation/media/uapi/v4l/meta-formats.rst | 1 + .../media/uapi/v4l/pixfmt-meta-vsp1-hgt.rst | 120 +++++++++++++++++++++ drivers/media/v4l2-core/v4l2-ioctl.c | 1 + include/uapi/linux/videodev2.h | 3 +- 4 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 Documentation/media/uapi/v4l/pixfmt-meta-vsp1-hgt.rst (limited to 'include/uapi') diff --git a/Documentation/media/uapi/v4l/meta-formats.rst b/Documentation/media/uapi/v4l/meta-formats.rst index 05ab91e12f10..01e24e3df571 100644 --- a/Documentation/media/uapi/v4l/meta-formats.rst +++ b/Documentation/media/uapi/v4l/meta-formats.rst @@ -13,3 +13,4 @@ These formats are used for the :ref:`metadata` interface only. :maxdepth: 1 pixfmt-meta-vsp1-hgo + pixfmt-meta-vsp1-hgt diff --git a/Documentation/media/uapi/v4l/pixfmt-meta-vsp1-hgt.rst b/Documentation/media/uapi/v4l/pixfmt-meta-vsp1-hgt.rst new file mode 100644 index 000000000000..fb9f79466319 --- /dev/null +++ b/Documentation/media/uapi/v4l/pixfmt-meta-vsp1-hgt.rst @@ -0,0 +1,120 @@ +.. -*- coding: utf-8; mode: rst -*- + +.. _v4l2-meta-fmt-vsp1-hgt: + +******************************* +V4L2_META_FMT_VSP1_HGT ('VSPT') +******************************* + +Renesas R-Car VSP1 2-D Histogram Data + + +Description +=========== + +This format describes histogram data generated by the Renesas R-Car VSP1 +2-D Histogram (HGT) engine. + +The VSP1 HGT is a histogram computation engine that operates on HSV +data. It operates on a possibly cropped and subsampled input image and +computes the sum, maximum and minimum of the S component as well as a +weighted frequency histogram based on the H and S components. + +The histogram is a matrix of 6 Hue and 32 Saturation buckets, 192 in +total. Each HSV value is added to one or more buckets with a weight +between 1 and 16 depending on the Hue areas configuration. Finding the +corresponding buckets is done by inspecting the H and S value independently. + +The Saturation position **n** (0 - 31) of the bucket in the matrix is +found by the expression: + + n = S / 8 + +The Hue position **m** (0 - 5) of the bucket in the matrix depends on +how the HGT Hue areas are configured. There are 6 user configurable Hue +Areas which can be configured to cover overlapping Hue values: + +:: + + Area 0 Area 1 Area 2 Area 3 Area 4 Area 5 + ________ ________ ________ ________ ________ ________ + \ /| |\ /| |\ /| |\ /| |\ /| |\ /| |\ / + \ / | | \ / | | \ / | | \ / | | \ / | | \ / | | \ / + X | | X | | X | | X | | X | | X | | X + / \ | | / \ | | / \ | | / \ | | / \ | | / \ | | / \ + / \| |/ \| |/ \| |/ \| |/ \| |/ \| |/ \ + 5U 0L 0U 1L 1U 2L 2U 3L 3U 4L 4U 5L 5U 0L + <0..............................Hue Value............................255> + +When two consecutive areas don't overlap (n+1L is equal to nU) the boundary +value is considered as part of the lower area. + +Pixels with a hue value included in the centre of an area (between nL and nU +included) are attributed to that single area and given a weight of 16. Pixels +with a hue value included in the overlapping region between two areas (between +n+1L and nU excluded) are attributed to both areas and given a weight for each +of these areas proportional to their position along the diagonal lines +(rounded down). + +The Hue area setup must match one of the following constrains: + +:: + + 0L <= 0U <= 1L <= 1U <= 2L <= 2U <= 3L <= 3U <= 4L <= 4U <= 5L <= 5U + +:: + + 0U <= 1L <= 1U <= 2L <= 2U <= 3L <= 3U <= 4L <= 4U <= 5L <= 5U <= 0L + +**Byte Order.** +All data is stored in memory in little endian format. Each cell in the tables +contains one byte. + +.. flat-table:: VSP1 HGT Data - (776 bytes) + :header-rows: 2 + :stub-columns: 0 + + * - Offset + - :cspan:`4` Memory + * - + - [31:24] + - [23:16] + - [15:8] + - [7:0] + * - 0 + - - + - S max [7:0] + - - + - S min [7:0] + * - 4 + - :cspan:`4` S sum [31:0] + * - 8 + - :cspan:`4` Histogram bucket (m=0, n=0) [31:0] + * - 12 + - :cspan:`4` Histogram bucket (m=0, n=1) [31:0] + * - + - :cspan:`4` ... + * - 132 + - :cspan:`4` Histogram bucket (m=0, n=31) [31:0] + * - 136 + - :cspan:`4` Histogram bucket (m=1, n=0) [31:0] + * - + - :cspan:`4` ... + * - 264 + - :cspan:`4` Histogram bucket (m=2, n=0) [31:0] + * - + - :cspan:`4` ... + * - 392 + - :cspan:`4` Histogram bucket (m=3, n=0) [31:0] + * - + - :cspan:`4` ... + * - 520 + - :cspan:`4` Histogram bucket (m=4, n=0) [31:0] + * - + - :cspan:`4` ... + * - 648 + - :cspan:`4` Histogram bucket (m=5, n=0) [31:0] + * - + - :cspan:`4` ... + * - 772 + - :cspan:`4` Histogram bucket (m=5, n=31) [31:0] diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index a7c50241594e..e5a2187381db 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -1234,6 +1234,7 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt) case V4L2_TCH_FMT_TU16: descr = "16-bit unsigned touch data"; break; case V4L2_TCH_FMT_TU08: descr = "8-bit unsigned touch data"; break; case V4L2_META_FMT_VSP1_HGO: descr = "R-Car VSP1 1-D Histogram"; break; + case V4L2_META_FMT_VSP1_HGT: descr = "R-Car VSP1 2-D Histogram"; break; default: /* Compressed formats */ diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 09cf3a32faf4..75f032448ae5 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -677,7 +677,8 @@ struct v4l2_pix_format { #define V4L2_TCH_FMT_TU08 v4l2_fourcc('T', 'U', '0', '8') /* 8-bit unsigned touch data */ /* Meta-data formats */ -#define V4L2_META_FMT_VSP1_HGO v4l2_fourcc('V', 'S', 'P', 'H') /* R-Car VSP1 Histogram */ +#define V4L2_META_FMT_VSP1_HGO v4l2_fourcc('V', 'S', 'P', 'H') /* R-Car VSP1 1-D Histogram */ +#define V4L2_META_FMT_VSP1_HGT v4l2_fourcc('V', 'S', 'P', 'T') /* R-Car VSP1 2-D Histogram */ /* priv field value to indicates that subsequent fields are valid. */ #define V4L2_PIX_FMT_PRIV_MAGIC 0xfeedcafe -- cgit v1.2.3-71-gd317 From 6df8be763115a0ce6b486ea304742e5646f74287 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 10 Apr 2017 16:15:26 -0300 Subject: [media] videodev.h: add V4L2_CTRL_FLAG_MODIFY_LAYOUT Add new flag to indicate that changing this control will change the buffer/mediabus layout as well. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 75f032448ae5..2b8feb86d09e 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1659,6 +1659,7 @@ struct v4l2_querymenu { #define V4L2_CTRL_FLAG_VOLATILE 0x0080 #define V4L2_CTRL_FLAG_HAS_PAYLOAD 0x0100 #define V4L2_CTRL_FLAG_EXECUTE_ON_WRITE 0x0200 +#define V4L2_CTRL_FLAG_MODIFY_LAYOUT 0x0400 /* Query flags, to be ORed with the control ID */ #define V4L2_CTRL_FLAG_NEXT_CTRL 0x80000000 -- cgit v1.2.3-71-gd317 From cc41c84b7e7f2d7f6698bccc84890943fd021265 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 14 Apr 2017 20:31:08 +0200 Subject: netfilter: kill the fake untracked conntrack objects resurrect an old patch from Pablo Neira to remove the untracked objects. Currently, there are four possible states of an skb wrt. conntrack. 1. No conntrack attached, ct is NULL. 2. Normal (kmem cache allocated) ct attached. 3. a template (kmalloc'd), not in any hash tables at any point in time 4. the 'untracked' conntrack, a percpu nf_conn object, tagged via IPS_UNTRACKED_BIT in ct->status. Untracked is supposed to be identical to case 1. It exists only so users can check -m conntrack --ctstate UNTRACKED vs. -m conntrack --ctstate INVALID e.g. attempts to set connmark on INVALID or UNTRACKED conntracks is supposed to be a no-op. Thus currently we need to check ct == NULL || nf_ct_is_untracked(ct) in a lot of places in order to avoid altering untracked objects. The other consequence of the percpu untracked object is that all -j NOTRACK (and, later, kfree_skb of such skbs) result in an atomic op (inc/dec the untracked conntracks refcount). This adds a new kernel-private ctinfo state, IP_CT_UNTRACKED, to make the distinction instead. The (few) places that care about packet invalid (ct is NULL) vs. packet untracked now need to test ct == NULL vs. ctinfo == IP_CT_UNTRACKED, but all other places can omit the nf_ct_is_untracked() check. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 6 +-- include/net/netfilter/nf_conntrack.h | 10 +---- include/uapi/linux/netfilter/nf_conntrack_common.h | 6 ++- net/ipv4/netfilter/nf_dup_ipv4.c | 3 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 3 +- net/ipv6/netfilter/nf_dup_ipv6.c | 3 +- net/netfilter/nf_conntrack_core.c | 48 +++------------------- net/netfilter/nf_nat_core.c | 3 -- net/netfilter/nft_ct.c | 14 +++---- net/netfilter/xt_CT.c | 16 ++++---- net/netfilter/xt_conntrack.c | 11 +++-- net/netfilter/xt_state.c | 13 +++--- 12 files changed, 39 insertions(+), 97 deletions(-) (limited to 'include/uapi') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 8a4a57b887fb..9a75d9933e63 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1556,12 +1556,8 @@ static inline void ip_vs_notrack(struct sk_buff *skb) struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (!ct || !nf_ct_is_untracked(ct)) { - struct nf_conn *untracked; - nf_conntrack_put(&ct->ct_general); - untracked = nf_ct_untracked_get(); - nf_conntrack_get(&untracked->ct_general); - nf_ct_set(skb, untracked, IP_CT_NEW); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); } #endif } diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 19605878da47..012b99f563e5 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -243,14 +243,6 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq); -/* Fake conntrack entry for untracked connections */ -DECLARE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked); -static inline struct nf_conn *nf_ct_untracked_get(void) -{ - return raw_cpu_ptr(&nf_conntrack_untracked); -} -void nf_ct_untracked_status_or(unsigned long bits); - /* Iterate over all conntracks: if iter returns true, it's deleted. */ void nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), @@ -283,7 +275,7 @@ static inline int nf_ct_is_dying(const struct nf_conn *ct) static inline int nf_ct_is_untracked(const struct nf_conn *ct) { - return test_bit(IPS_UNTRACKED_BIT, &ct->status); + return false; } /* Packet is received from loopback */ diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 6a8e33dd4ecb..b4a0a1940118 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -28,12 +28,14 @@ enum ip_conntrack_info { /* only for userspace compatibility */ #ifndef __KERNEL__ IP_CT_NEW_REPLY = IP_CT_NUMBER, +#else + IP_CT_UNTRACKED = 7, #endif }; #define NF_CT_STATE_INVALID_BIT (1 << 0) #define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1)) -#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_NUMBER + 1)) +#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_UNTRACKED + 1)) /* Bitset representing status of connection. */ enum ip_conntrack_status { @@ -94,7 +96,7 @@ enum ip_conntrack_status { IPS_TEMPLATE_BIT = 11, IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT), - /* Conntrack is a fake untracked entry */ + /* Conntrack is a fake untracked entry. Obsolete and not used anymore */ IPS_UNTRACKED_BIT = 12, IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT), diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index f0dbff05fc28..39895b9ddeb9 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -69,8 +69,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Avoid counting cloned packets towards the original connection. */ nf_reset(skb); - nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); - nf_conntrack_get(skb_nfct(skb)); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); #endif /* * If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index d2c2ccbfbe72..d5f028e33f65 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -221,8 +221,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, type = icmp6h->icmp6_type - 130; if (type >= 0 && type < sizeof(noct_valid_new) && noct_valid_new[type]) { - nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); - nf_conntrack_get(skb_nfct(skb)); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); return NF_ACCEPT; } diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index 888ecd106e5f..4a7ddeddbaab 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -58,8 +58,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_reset(skb); - nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); - nf_conntrack_get(skb_nfct(skb)); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); #endif if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_IN) { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index bcf1d2a6539e..03150f60714d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -180,14 +180,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); unsigned int nf_conntrack_max __read_mostly; seqcount_t nf_conntrack_generation __read_mostly; - -/* nf_conn must be 8 bytes aligned, as the 3 LSB bits are used - * for the nfctinfo. We cheat by (ab)using the PER CPU cache line - * alignment to enforce this. - */ -DEFINE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked); -EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); - static unsigned int nf_conntrack_hash_rnd __read_mostly; static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, @@ -1314,9 +1306,10 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, int ret; tmpl = nf_ct_get(skb, &ctinfo); - if (tmpl) { + if (tmpl || ctinfo == IP_CT_UNTRACKED) { /* Previously seen (loopback or untracked)? Ignore. */ - if (!nf_ct_is_template(tmpl)) { + if ((tmpl && !nf_ct_is_template(tmpl)) || + ctinfo == IP_CT_UNTRACKED) { NF_CT_STAT_INC_ATOMIC(net, ignore); return NF_ACCEPT; } @@ -1629,18 +1622,6 @@ void nf_ct_free_hashtable(void *hash, unsigned int size) } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -static int untrack_refs(void) -{ - int cnt = 0, cpu; - - for_each_possible_cpu(cpu) { - struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu); - - cnt += atomic_read(&ct->ct_general.use) - 1; - } - return cnt; -} - void nf_conntrack_cleanup_start(void) { conntrack_gc_work.exiting = true; @@ -1650,8 +1631,6 @@ void nf_conntrack_cleanup_start(void) void nf_conntrack_cleanup_end(void) { RCU_INIT_POINTER(nf_ct_destroy, NULL); - while (untrack_refs() > 0) - schedule(); cancel_delayed_work_sync(&conntrack_gc_work.dwork); nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); @@ -1825,20 +1804,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); -void nf_ct_untracked_status_or(unsigned long bits) -{ - int cpu; - - for_each_possible_cpu(cpu) - per_cpu(nf_conntrack_untracked, cpu).status |= bits; -} -EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or); - int nf_conntrack_init_start(void) { int max_factor = 8; int ret = -ENOMEM; - int i, cpu; + int i; seqcount_init(&nf_conntrack_generation); @@ -1921,15 +1891,6 @@ int nf_conntrack_init_start(void) if (ret < 0) goto err_proto; - /* Set up fake conntrack: to never be deleted, not in any hashes */ - for_each_possible_cpu(cpu) { - struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu); - write_pnet(&ct->ct_net, &init_net); - atomic_set(&ct->ct_general.use, 1); - } - /* - and look it like as a confirmed connection */ - nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); - conntrack_gc_work_init(&conntrack_gc_work); queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ); @@ -1977,6 +1938,7 @@ int nf_conntrack_init_net(struct net *net) int ret = -ENOMEM; int cpu; + BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER); atomic_set(&net->ct.count, 0); net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 5e35643da650..9cbf49f9c1b7 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -861,9 +861,6 @@ static int __init nf_nat_init(void) nf_ct_helper_expectfn_register(&follow_master_nat); - /* Initialize fake conntrack so that NAT will skip it */ - nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); - BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, nfnetlink_parse_nat_setup); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 6e23dbbedd7f..6c6fd48b024c 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -72,12 +72,12 @@ static void nft_ct_get_eval(const struct nft_expr *expr, switch (priv->key) { case NFT_CT_STATE: - if (ct == NULL) - state = NF_CT_STATE_INVALID_BIT; - else if (nf_ct_is_untracked(ct)) + if (ct) + state = NF_CT_STATE_BIT(ctinfo); + else if (ctinfo == IP_CT_UNTRACKED) state = NF_CT_STATE_UNTRACKED_BIT; else - state = NF_CT_STATE_BIT(ctinfo); + state = NF_CT_STATE_INVALID_BIT; *dest = state; return; default: @@ -718,12 +718,10 @@ static void nft_notrack_eval(const struct nft_expr *expr, ct = nf_ct_get(pkt->skb, &ctinfo); /* Previously seen (loopback or untracked)? Ignore. */ - if (ct) + if (ct || ctinfo == IP_CT_UNTRACKED) return; - ct = nf_ct_untracked_get(); - atomic_inc(&ct->ct_general.use); - nf_ct_set(skb, ct, IP_CT_NEW); + nf_ct_set(skb, ct, IP_CT_UNTRACKED); } static struct nft_expr_type nft_notrack_type; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index b008db0184b8..3cbe1bcf6a74 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -26,11 +26,12 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) if (skb->_nfct != 0) return XT_CONTINUE; - /* special case the untracked ct : we want the percpu object */ - if (!ct) - ct = nf_ct_untracked_get(); - atomic_inc(&ct->ct_general.use); - nf_ct_set(skb, ct, IP_CT_NEW); + if (ct) { + atomic_inc(&ct->ct_general.use); + nf_ct_set(skb, ct, IP_CT_NEW); + } else { + nf_ct_set(skb, ct, IP_CT_UNTRACKED); + } return XT_CONTINUE; } @@ -335,7 +336,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, struct nf_conn *ct = info->ct; struct nf_conn_help *help; - if (ct && !nf_ct_is_untracked(ct)) { + if (ct) { help = nfct_help(ct); if (help) module_put(help->helper->me); @@ -412,8 +413,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) if (skb->_nfct != 0) return XT_CONTINUE; - nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); - nf_conntrack_get(skb_nfct(skb)); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); return XT_CONTINUE; } diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index c0fb217bc649..39cf1d019240 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -172,12 +172,11 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, ct = nf_ct_get(skb, &ctinfo); - if (ct) { - if (nf_ct_is_untracked(ct)) - statebit = XT_CONNTRACK_STATE_UNTRACKED; - else - statebit = XT_CONNTRACK_STATE_BIT(ctinfo); - } else + if (ct) + statebit = XT_CONNTRACK_STATE_BIT(ctinfo); + else if (ctinfo == IP_CT_UNTRACKED) + statebit = XT_CONNTRACK_STATE_UNTRACKED; + else statebit = XT_CONNTRACK_STATE_INVALID; if (info->match_flags & XT_CONNTRACK_STATE) { diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index 5746a33789a5..5fbd79194d21 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -28,14 +28,13 @@ state_mt(const struct sk_buff *skb, struct xt_action_param *par) unsigned int statebit; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (!ct) + if (ct) + statebit = XT_STATE_BIT(ctinfo); + else if (ctinfo == IP_CT_UNTRACKED) + statebit = XT_STATE_UNTRACKED; + else statebit = XT_STATE_INVALID; - else { - if (nf_ct_is_untracked(ct)) - statebit = XT_STATE_UNTRACKED; - else - statebit = XT_STATE_BIT(ctinfo); - } + return (sinfo->statemask & statebit); } -- cgit v1.2.3-71-gd317 From 4af3f75d7992dd0dc49da95fbc039fa3806fba4f Mon Sep 17 00:00:00 2001 From: Javier González Date: Sat, 15 Apr 2017 20:55:45 +0200 Subject: lightnvm: allow to init targets on factory mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Target initialization has two responsibilities: creating the target partition and instantiating the target. This patch enables to create a factory partition (e.g., do not trigger recovery on the given target). This is useful for target development and for being able to restore the device state at any moment in time without requiring a full-device erase. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 14 +++++++++++--- drivers/lightnvm/rrpc.c | 3 ++- include/linux/lightnvm.h | 3 ++- include/uapi/linux/lightnvm.h | 4 ++++ 4 files changed, 19 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 5f84d2a418f6..a63b563b1a8a 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -280,7 +280,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) tdisk->fops = &nvm_fops; tdisk->queue = tqueue; - targetdata = tt->init(tgt_dev, tdisk); + targetdata = tt->init(tgt_dev, tdisk, create->flags); if (IS_ERR(targetdata)) goto err_init; @@ -1244,8 +1244,16 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg) create.tgtname[DISK_NAME_LEN - 1] = '\0'; if (create.flags != 0) { - pr_err("nvm: no flags supported\n"); - return -EINVAL; + __u32 flags = create.flags; + + /* Check for valid flags */ + if (flags & NVM_TARGET_FACTORY) + flags &= ~NVM_TARGET_FACTORY; + + if (flags) { + pr_err("nvm: flag not supported\n"); + return -EINVAL; + } } return __nvm_configure_create(&create); diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c index a8acf9e06401..5dba54470c2a 100644 --- a/drivers/lightnvm/rrpc.c +++ b/drivers/lightnvm/rrpc.c @@ -1506,7 +1506,8 @@ err: static struct nvm_tgt_type tt_rrpc; -static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk) +static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, + int flags) { struct request_queue *bqueue = dev->q; struct request_queue *tqueue = tdisk->queue; diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index eff7d1f312a8..7dfa56ebbc6d 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -436,7 +436,8 @@ static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2) typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); typedef sector_t (nvm_tgt_capacity_fn)(void *); -typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *); +typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *, + int flags); typedef void (nvm_tgt_exit_fn)(void *); typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *); typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *); diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h index fd19f36b3129..c8aec4b9e73b 100644 --- a/include/uapi/linux/lightnvm.h +++ b/include/uapi/linux/lightnvm.h @@ -85,6 +85,10 @@ struct nvm_ioctl_create_conf { }; }; +enum { + NVM_TARGET_FACTORY = 1 << 0, /* Init target in factory mode */ +}; + struct nvm_ioctl_create { char dev[DISK_NAME_LEN]; /* open-channel SSD device */ char tgttype[NVM_TTYPE_NAME_MAX]; /* target type name */ -- cgit v1.2.3-71-gd317 From e46c7287b1c27683a8e30ca825fb98e2b97f1099 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 6 Apr 2017 17:02:00 -0400 Subject: nbd: add a basic netlink interface The existing ioctl interface for configuring NBD devices is a bit cumbersome and hard to extend. The other problem is we leave a userspace app sitting in it's syscall until the device disconnects, which is less than ideal. This patch introduces a netlink interface for adding and disconnecting nbd devices. This has the benefits of being easily extendable without breaking older userspace applications, and allows us to configure a nbd device without leaving a userspace app sitting waiting for the device to disconnect. With this interface we also gain the ability to configure more devices than are preallocated at insmod time. We also have gained the ability to not specify a particular device and be provided one for us so that userspace doesn't need to find a free device to configure. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 316 +++++++++++++++++++++++++++++++++++---- include/uapi/linux/nbd-netlink.h | 69 +++++++++ 2 files changed, 359 insertions(+), 26 deletions(-) create mode 100644 include/uapi/linux/nbd-netlink.h (limited to 'include/uapi') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index d5828b9dbfef..efd2eba37c69 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -40,6 +40,8 @@ #include #include +#include +#include static DEFINE_IDR(nbd_index_idr); static DEFINE_MUTEX(nbd_index_mutex); @@ -63,6 +65,8 @@ struct recv_thread_args { #define NBD_DISCONNECT_REQUESTED 1 #define NBD_DISCONNECTED 2 #define NBD_HAS_PID_FILE 3 +#define NBD_HAS_CONFIG_REF 4 +#define NBD_BOUND 5 struct nbd_config { u32 flags; @@ -83,6 +87,7 @@ struct nbd_config { struct nbd_device { struct blk_mq_tag_set tag_set; + int index; refcount_t config_refs; struct nbd_config *config; struct mutex config_lock; @@ -114,6 +119,7 @@ static int part_shift; static int nbd_dev_dbg_init(struct nbd_device *nbd); static void nbd_dev_dbg_close(struct nbd_device *nbd); static void nbd_config_put(struct nbd_device *nbd); +static void nbd_connect_reply(struct genl_info *info, int index); static inline struct device *nbd_to_dev(struct nbd_device *nbd) { @@ -728,7 +734,8 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, return ret; } -static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg) +static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, + bool netlink) { struct nbd_config *config = nbd->config; struct socket *sock; @@ -740,13 +747,17 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg) if (!sock) return err; - if (!nbd->task_setup) + if (!netlink && !nbd->task_setup && + !test_bit(NBD_BOUND, &config->runtime_flags)) nbd->task_setup = current; - if (nbd->task_setup != current) { + + if (!netlink && + (nbd->task_setup != current || + test_bit(NBD_BOUND, &config->runtime_flags))) { dev_err(disk_to_dev(nbd->disk), "Device being setup by another task"); sockfd_put(sock); - return -EINVAL; + return -EBUSY; } socks = krealloc(config->socks, (config->num_connections + 1) * @@ -872,7 +883,7 @@ static void nbd_config_put(struct nbd_device *nbd) } } -static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev) +static int nbd_start_device(struct nbd_device *nbd) { struct nbd_config *config = nbd->config; int num_connections = config->num_connections; @@ -888,11 +899,8 @@ static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev) return -EINVAL; } - if (max_part) - bdev->bd_invalidated = 1; blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections); nbd->task_recv = current; - mutex_unlock(&nbd->config_lock); nbd_parse_flags(nbd); @@ -901,11 +909,7 @@ static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev) dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); return error; } - set_bit(NBD_HAS_PID_FILE, &config->runtime_flags); - if (max_part) - bdev->bd_invalidated = 1; - bd_set_size(bdev, config->bytesize); nbd_dev_dbg_init(nbd); for (i = 0; i < num_connections; i++) { @@ -924,18 +928,34 @@ static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev) args->index = i; queue_work(recv_workqueue, &args->work); } - error = wait_event_interruptible(config->recv_wq, + return error; +} + +static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev) +{ + struct nbd_config *config = nbd->config; + int ret; + + ret = nbd_start_device(nbd); + if (ret) + return ret; + + bd_set_size(bdev, config->bytesize); + if (max_part) + bdev->bd_invalidated = 1; + mutex_unlock(&nbd->config_lock); + ret = wait_event_interruptible(config->recv_wq, atomic_read(&config->recv_threads) == 0); - if (error) + if (ret) sock_shutdown(nbd); mutex_lock(&nbd->config_lock); - + bd_set_size(bdev, 0); /* user requested, ignore socket errors */ if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags)) - error = 0; + ret = 0; if (test_bit(NBD_TIMEDOUT, &config->runtime_flags)) - error = -ETIMEDOUT; - return error; + ret = -ETIMEDOUT; + return ret; } static void nbd_clear_sock_ioctl(struct nbd_device *nbd, @@ -944,6 +964,9 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd, nbd_clear_sock(nbd); kill_bdev(bdev); nbd_bdev_reset(bdev); + if (test_and_clear_bit(NBD_HAS_CONFIG_REF, + &nbd->config->runtime_flags)) + nbd_config_put(nbd); } /* Must be called with config_lock held */ @@ -959,7 +982,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, nbd_clear_sock_ioctl(nbd, bdev); return 0; case NBD_SET_SOCK: - return nbd_add_socket(nbd, arg); + return nbd_add_socket(nbd, arg, false); case NBD_SET_BLKSIZE: nbd_size_set(nbd, arg, div_s64(config->bytesize, arg)); @@ -982,7 +1005,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, config->flags = arg; return 0; case NBD_DO_IT: - return nbd_start_device(nbd, bdev); + return nbd_start_device_ioctl(nbd, bdev); case NBD_CLEAR_QUE: /* * This is for compatibility only. The queue is always cleared @@ -1003,13 +1026,22 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct nbd_device *nbd = bdev->bd_disk->private_data; - int error; + struct nbd_config *config = nbd->config; + int error = -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; mutex_lock(&nbd->config_lock); - error = __nbd_ioctl(bdev, nbd, cmd, arg); + + /* Don't allow ioctl operations on a nbd device that was created with + * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine. + */ + if (!test_bit(NBD_BOUND, &config->runtime_flags) || + (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK)) + error = __nbd_ioctl(bdev, nbd, cmd, arg); + else + dev_err(nbd_to_dev(nbd), "Cannot use ioctl interface on a netlink controlled device.\n"); mutex_unlock(&nbd->config_lock); return error; } @@ -1258,6 +1290,7 @@ static int nbd_dev_add(int index) if (err < 0) goto out_free_disk; + nbd->index = index; nbd->disk = disk; nbd->tag_set.ops = &nbd_mq_ops; nbd->tag_set.nr_hw_queues = 1; @@ -1312,10 +1345,235 @@ out: return err; } -/* - * And here should be modules and kernel interface - * (Just smiley confuses emacs :-) - */ +static int find_free_cb(int id, void *ptr, void *data) +{ + struct nbd_device *nbd = ptr; + struct nbd_device **found = data; + + if (!refcount_read(&nbd->config_refs)) { + *found = nbd; + return 1; + } + return 0; +} + +/* Netlink interface. */ +static struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = { + [NBD_ATTR_INDEX] = { .type = NLA_U32 }, + [NBD_ATTR_SIZE_BYTES] = { .type = NLA_U64 }, + [NBD_ATTR_BLOCK_SIZE_BYTES] = { .type = NLA_U64 }, + [NBD_ATTR_TIMEOUT] = { .type = NLA_U64 }, + [NBD_ATTR_SERVER_FLAGS] = { .type = NLA_U64 }, + [NBD_ATTR_CLIENT_FLAGS] = { .type = NLA_U64 }, + [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED}, +}; + +static struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = { + [NBD_SOCK_FD] = { .type = NLA_U32 }, +}; + +static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) +{ + struct nbd_device *nbd = NULL; + struct nbd_config *config; + int index = -1; + int ret; + + if (!netlink_capable(skb, CAP_SYS_ADMIN)) + return -EPERM; + + if (info->attrs[NBD_ATTR_INDEX]) + index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + if (!info->attrs[NBD_ATTR_SOCKETS]) { + printk(KERN_ERR "nbd: must specify at least one socket\n"); + return -EINVAL; + } + if (!info->attrs[NBD_ATTR_SIZE_BYTES]) { + printk(KERN_ERR "nbd: must specify a size in bytes for the device\n"); + return -EINVAL; + } +again: + mutex_lock(&nbd_index_mutex); + if (index == -1) { + ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd); + if (ret == 0) { + int new_index; + new_index = nbd_dev_add(-1); + if (new_index < 0) { + mutex_unlock(&nbd_index_mutex); + printk(KERN_ERR "nbd: failed to add new device\n"); + return ret; + } + nbd = idr_find(&nbd_index_idr, new_index); + } + } else { + nbd = idr_find(&nbd_index_idr, index); + } + mutex_unlock(&nbd_index_mutex); + if (!nbd) { + printk(KERN_ERR "nbd: couldn't find device at index %d\n", + index); + return -EINVAL; + } + + mutex_lock(&nbd->config_lock); + if (refcount_read(&nbd->config_refs)) { + mutex_unlock(&nbd->config_lock); + if (index == -1) + goto again; + printk(KERN_ERR "nbd: nbd%d already in use\n", index); + return -EBUSY; + } + if (WARN_ON(nbd->config)) { + mutex_unlock(&nbd->config_lock); + return -EINVAL; + } + config = nbd->config = nbd_alloc_config(); + if (!nbd->config) { + mutex_unlock(&nbd->config_lock); + printk(KERN_ERR "nbd: couldn't allocate config\n"); + return -ENOMEM; + } + refcount_set(&nbd->config_refs, 1); + set_bit(NBD_BOUND, &config->runtime_flags); + + if (info->attrs[NBD_ATTR_SIZE_BYTES]) { + u64 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); + nbd_size_set(nbd, config->blksize, + div64_u64(bytes, config->blksize)); + } + if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { + u64 bsize = + nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); + nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize)); + } + if (info->attrs[NBD_ATTR_TIMEOUT]) { + u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); + nbd->tag_set.timeout = timeout * HZ; + blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); + } + if (info->attrs[NBD_ATTR_SERVER_FLAGS]) + config->flags = + nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]); + if (info->attrs[NBD_ATTR_SOCKETS]) { + struct nlattr *attr; + int rem, fd; + + nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], + rem) { + struct nlattr *socks[NBD_SOCK_MAX+1]; + + if (nla_type(attr) != NBD_SOCK_ITEM) { + printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); + ret = -EINVAL; + goto out; + } + ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, + nbd_sock_policy); + if (ret != 0) { + printk(KERN_ERR "nbd: error processing sock list\n"); + ret = -EINVAL; + goto out; + } + if (!socks[NBD_SOCK_FD]) + continue; + fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); + ret = nbd_add_socket(nbd, fd, true); + if (ret) + goto out; + } + } + ret = nbd_start_device(nbd); +out: + mutex_unlock(&nbd->config_lock); + if (!ret) { + set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags); + refcount_inc(&nbd->config_refs); + nbd_connect_reply(info, nbd->index); + } + nbd_config_put(nbd); + return ret; +} + +static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) +{ + struct nbd_device *nbd; + int index; + + if (!netlink_capable(skb, CAP_SYS_ADMIN)) + return -EPERM; + + if (!info->attrs[NBD_ATTR_INDEX]) { + printk(KERN_ERR "nbd: must specify an index to disconnect\n"); + return -EINVAL; + } + index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + mutex_lock(&nbd_index_mutex); + nbd = idr_find(&nbd_index_idr, index); + mutex_unlock(&nbd_index_mutex); + if (!nbd) { + printk(KERN_ERR "nbd: couldn't find device at index %d\n", + index); + return -EINVAL; + } + if (!refcount_inc_not_zero(&nbd->config_refs)) + return 0; + mutex_lock(&nbd->config_lock); + nbd_disconnect(nbd); + mutex_unlock(&nbd->config_lock); + if (test_and_clear_bit(NBD_HAS_CONFIG_REF, + &nbd->config->runtime_flags)) + nbd_config_put(nbd); + nbd_config_put(nbd); + return 0; +} + +static const struct genl_ops nbd_connect_genl_ops[] = { + { + .cmd = NBD_CMD_CONNECT, + .policy = nbd_attr_policy, + .doit = nbd_genl_connect, + }, + { + .cmd = NBD_CMD_DISCONNECT, + .policy = nbd_attr_policy, + .doit = nbd_genl_disconnect, + }, +}; + +static struct genl_family nbd_genl_family __ro_after_init = { + .hdrsize = 0, + .name = NBD_GENL_FAMILY_NAME, + .version = NBD_GENL_VERSION, + .module = THIS_MODULE, + .ops = nbd_connect_genl_ops, + .n_ops = ARRAY_SIZE(nbd_connect_genl_ops), + .maxattr = NBD_ATTR_MAX, +}; + +static void nbd_connect_reply(struct genl_info *info, int index) +{ + struct sk_buff *skb; + void *msg_head; + int ret; + + skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); + if (!skb) + return; + msg_head = genlmsg_put_reply(skb, info, &nbd_genl_family, 0, + NBD_CMD_CONNECT); + if (!msg_head) { + nlmsg_free(skb); + return; + } + ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); + if (ret) { + nlmsg_free(skb); + return; + } + genlmsg_end(skb, msg_head); + genlmsg_reply(skb, info); +} static int __init nbd_init(void) { @@ -1358,6 +1616,11 @@ static int __init nbd_init(void) return -EIO; } + if (genl_register_family(&nbd_genl_family)) { + unregister_blkdev(NBD_MAJOR, "nbd"); + destroy_workqueue(recv_workqueue); + return -EINVAL; + } nbd_dbg_init(); mutex_lock(&nbd_index_mutex); @@ -1380,6 +1643,7 @@ static void __exit nbd_cleanup(void) idr_for_each(&nbd_index_idr, &nbd_exit_cb, NULL); idr_destroy(&nbd_index_idr); + genl_unregister_family(&nbd_genl_family); destroy_workqueue(recv_workqueue); unregister_blkdev(NBD_MAJOR, "nbd"); } diff --git a/include/uapi/linux/nbd-netlink.h b/include/uapi/linux/nbd-netlink.h new file mode 100644 index 000000000000..fd0f4e45f03e --- /dev/null +++ b/include/uapi/linux/nbd-netlink.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2017 Facebook. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#ifndef _UAPILINUX_NBD_NETLINK_H +#define _UAPILINUX_NBD_NETLINK_H + +#define NBD_GENL_FAMILY_NAME "nbd" +#define NBD_GENL_VERSION 0x1 + +/* Configuration policy attributes, used for CONNECT */ +enum { + NBD_ATTR_UNSPEC, + NBD_ATTR_INDEX, + NBD_ATTR_SIZE_BYTES, + NBD_ATTR_BLOCK_SIZE_BYTES, + NBD_ATTR_TIMEOUT, + NBD_ATTR_SERVER_FLAGS, + NBD_ATTR_CLIENT_FLAGS, + NBD_ATTR_SOCKETS, + __NBD_ATTR_MAX, +}; +#define NBD_ATTR_MAX (__NBD_ATTR_MAX - 1) + +/* + * This is the format for multiple sockets with NBD_ATTR_SOCKETS + * + * [NBD_ATTR_SOCKETS] + * [NBD_SOCK_ITEM] + * [NBD_SOCK_FD] + * [NBD_SOCK_ITEM] + * [NBD_SOCK_FD] + */ +enum { + NBD_SOCK_ITEM_UNSPEC, + NBD_SOCK_ITEM, + __NBD_SOCK_ITEM_MAX, +}; +#define NBD_SOCK_ITEM_MAX (__NBD_SOCK_ITEM_MAX - 1) + +enum { + NBD_SOCK_UNSPEC, + NBD_SOCK_FD, + __NBD_SOCK_MAX, +}; +#define NBD_SOCK_MAX (__NBD_SOCK_MAX - 1) + +enum { + NBD_CMD_UNSPEC, + NBD_CMD_CONNECT, + NBD_CMD_DISCONNECT, + __NBD_CMD_MAX, +}; +#define NBD_CMD_MAX (__NBD_CMD_MAX - 1) + +#endif /* _UAPILINUX_NBD_NETLINK_H */ -- cgit v1.2.3-71-gd317 From b7aa3d39385dc2d95899f9e379623fef446a2acd Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 6 Apr 2017 17:02:01 -0400 Subject: nbd: add a reconfigure netlink command We want to be able to reconnect dead connections to existing block devices, so add a reconfigure netlink command. We will also allow users to change their timeout on the fly, but everything else will require a disconnect and reconnect. You won't be able to add more connections either, simply replace dead connections with new more lively connections. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 141 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/nbd-netlink.h | 1 + 2 files changed, 142 insertions(+) (limited to 'include/uapi') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index efd2eba37c69..394ea891d909 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -785,6 +785,59 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, return 0; } +static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) +{ + struct nbd_config *config = nbd->config; + struct socket *sock, *old; + struct recv_thread_args *args; + int i; + int err; + + sock = sockfd_lookup(arg, &err); + if (!sock) + return err; + + args = kzalloc(sizeof(*args), GFP_KERNEL); + if (!args) { + sockfd_put(sock); + return -ENOMEM; + } + + for (i = 0; i < config->num_connections; i++) { + struct nbd_sock *nsock = config->socks[i]; + + if (!nsock->dead) + continue; + + mutex_lock(&nsock->tx_lock); + if (!nsock->dead) { + mutex_unlock(&nsock->tx_lock); + continue; + } + sk_set_memalloc(sock->sk); + atomic_inc(&config->recv_threads); + refcount_inc(&nbd->config_refs); + old = nsock->sock; + nsock->fallback_index = -1; + nsock->sock = sock; + nsock->dead = false; + INIT_WORK(&args->work, recv_work); + args->index = i; + args->nbd = nbd; + mutex_unlock(&nsock->tx_lock); + sockfd_put(old); + + /* We take the tx_mutex in an error path in the recv_work, so we + * need to queue_work outside of the tx_mutex. + */ + queue_work(recv_workqueue, &args->work); + return 0; + } + sockfd_put(sock); + kfree(args); + return -ENOSPC; +} + /* Reset all properties of an NBD device */ static void nbd_reset(struct nbd_device *nbd) { @@ -1528,6 +1581,89 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) return 0; } +static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) +{ + struct nbd_device *nbd = NULL; + struct nbd_config *config; + int index; + int ret = -EINVAL; + + if (!netlink_capable(skb, CAP_SYS_ADMIN)) + return -EPERM; + + if (!info->attrs[NBD_ATTR_INDEX]) { + printk(KERN_ERR "nbd: must specify a device to reconfigure\n"); + return -EINVAL; + } + index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + mutex_lock(&nbd_index_mutex); + nbd = idr_find(&nbd_index_idr, index); + mutex_unlock(&nbd_index_mutex); + if (!nbd) { + printk(KERN_ERR "nbd: couldn't find a device at index %d\n", + index); + return -EINVAL; + } + + if (!refcount_inc_not_zero(&nbd->config_refs)) { + dev_err(nbd_to_dev(nbd), + "not configured, cannot reconfigure\n"); + return -EINVAL; + } + + mutex_lock(&nbd->config_lock); + config = nbd->config; + if (!test_bit(NBD_BOUND, &config->runtime_flags) || + !nbd->task_recv) { + dev_err(nbd_to_dev(nbd), + "not configured, cannot reconfigure\n"); + goto out; + } + + if (info->attrs[NBD_ATTR_TIMEOUT]) { + u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); + nbd->tag_set.timeout = timeout * HZ; + blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); + } + + if (info->attrs[NBD_ATTR_SOCKETS]) { + struct nlattr *attr; + int rem, fd; + + nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], + rem) { + struct nlattr *socks[NBD_SOCK_MAX+1]; + + if (nla_type(attr) != NBD_SOCK_ITEM) { + printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); + ret = -EINVAL; + goto out; + } + ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, + nbd_sock_policy); + if (ret != 0) { + printk(KERN_ERR "nbd: error processing sock list\n"); + ret = -EINVAL; + goto out; + } + if (!socks[NBD_SOCK_FD]) + continue; + fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); + ret = nbd_reconnect_socket(nbd, fd); + if (ret) { + if (ret == -ENOSPC) + ret = 0; + goto out; + } + dev_info(nbd_to_dev(nbd), "reconnected socket\n"); + } + } +out: + mutex_unlock(&nbd->config_lock); + nbd_config_put(nbd); + return ret; +} + static const struct genl_ops nbd_connect_genl_ops[] = { { .cmd = NBD_CMD_CONNECT, @@ -1539,6 +1675,11 @@ static const struct genl_ops nbd_connect_genl_ops[] = { .policy = nbd_attr_policy, .doit = nbd_genl_disconnect, }, + { + .cmd = NBD_CMD_RECONFIGURE, + .policy = nbd_attr_policy, + .doit = nbd_genl_reconfigure, + }, }; static struct genl_family nbd_genl_family __ro_after_init = { diff --git a/include/uapi/linux/nbd-netlink.h b/include/uapi/linux/nbd-netlink.h index fd0f4e45f03e..f932f96a7c2f 100644 --- a/include/uapi/linux/nbd-netlink.h +++ b/include/uapi/linux/nbd-netlink.h @@ -62,6 +62,7 @@ enum { NBD_CMD_UNSPEC, NBD_CMD_CONNECT, NBD_CMD_DISCONNECT, + NBD_CMD_RECONFIGURE, __NBD_CMD_MAX, }; #define NBD_CMD_MAX (__NBD_CMD_MAX - 1) -- cgit v1.2.3-71-gd317 From 799f9a38bc9f5551819fd118a82826df0a8525cf Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 6 Apr 2017 17:02:02 -0400 Subject: nbd: multicast dead link notifications Provide a mechanism to notify userspace that there's been a link problem on a NBD device. This will allow userspace to re-establish a connection and provide the new socket to the device without disrupting the device. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 89 ++++++++++++++++++++++++++++++++++------ include/uapi/linux/nbd-netlink.h | 6 ++- 2 files changed, 81 insertions(+), 14 deletions(-) (limited to 'include/uapi') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 394ea891d909..b55cc057f569 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -53,6 +53,7 @@ struct nbd_sock { int sent; bool dead; int fallback_index; + int cookie; }; struct recv_thread_args { @@ -61,6 +62,11 @@ struct recv_thread_args { int index; }; +struct link_dead_args { + struct work_struct work; + int index; +}; + #define NBD_TIMEDOUT 0 #define NBD_DISCONNECT_REQUESTED 1 #define NBD_DISCONNECTED 2 @@ -100,6 +106,7 @@ struct nbd_device { struct nbd_cmd { struct nbd_device *nbd; int index; + int cookie; struct completion send_complete; }; @@ -120,6 +127,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd); static void nbd_dev_dbg_close(struct nbd_device *nbd); static void nbd_config_put(struct nbd_device *nbd); static void nbd_connect_reply(struct genl_info *info, int index); +static void nbd_dead_link_work(struct work_struct *work); static inline struct device *nbd_to_dev(struct nbd_device *nbd) { @@ -152,8 +160,24 @@ static struct device_attribute pid_attr = { .show = pid_show, }; -static void nbd_mark_nsock_dead(struct nbd_sock *nsock) +static int nbd_disconnected(struct nbd_config *config) +{ + return test_bit(NBD_DISCONNECTED, &config->runtime_flags) || + test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); +} + +static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, + int notify) { + if (!nsock->dead && notify && !nbd_disconnected(nbd->config)) { + struct link_dead_args *args; + args = kmalloc(sizeof(struct link_dead_args), GFP_NOIO); + if (args) { + INIT_WORK(&args->work, nbd_dead_link_work); + args->index = nbd->index; + queue_work(system_wq, &args->work); + } + } if (!nsock->dead) kernel_sock_shutdown(nsock->sock, SHUT_RDWR); nsock->dead = true; @@ -215,8 +239,7 @@ static void sock_shutdown(struct nbd_device *nbd) for (i = 0; i < config->num_connections; i++) { struct nbd_sock *nsock = config->socks[i]; mutex_lock(&nsock->tx_lock); - kernel_sock_shutdown(nsock->sock, SHUT_RDWR); - nbd_mark_nsock_dead(nsock); + nbd_mark_nsock_dead(nbd, nsock, 0); mutex_unlock(&nsock->tx_lock); } dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n"); @@ -248,7 +271,14 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, struct nbd_sock *nsock = config->socks[cmd->index]; mutex_lock(&nsock->tx_lock); - nbd_mark_nsock_dead(nsock); + /* We can have multiple outstanding requests, so + * we don't want to mark the nsock dead if we've + * already reconnected with a new socket, so + * only mark it dead if its the same socket we + * were sent out on. + */ + if (cmd->cookie == nsock->cookie) + nbd_mark_nsock_dead(nbd, nsock, 1); mutex_unlock(&nsock->tx_lock); } blk_mq_requeue_request(req, true); @@ -370,6 +400,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) iov_iter_advance(&from, sent); } cmd->index = index; + cmd->cookie = nsock->cookie; request.type = htonl(type); if (type != NBD_CMD_FLUSH) { request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); @@ -458,12 +489,6 @@ out: return 0; } -static int nbd_disconnected(struct nbd_config *config) -{ - return test_bit(NBD_DISCONNECTED, &config->runtime_flags) || - test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); -} - /* NULL returned = something went wrong, inform userspace */ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) { @@ -564,7 +589,7 @@ static void recv_work(struct work_struct *work) struct nbd_sock *nsock = config->socks[args->index]; mutex_lock(&nsock->tx_lock); - nbd_mark_nsock_dead(nsock); + nbd_mark_nsock_dead(nbd, nsock, 1); mutex_unlock(&nsock->tx_lock); ret = PTR_ERR(cmd); break; @@ -691,7 +716,7 @@ again: if (ret == -EAGAIN) { dev_err_ratelimited(disk_to_dev(nbd->disk), "Request send failed trying another connection\n"); - nbd_mark_nsock_dead(nsock); + nbd_mark_nsock_dead(nbd, nsock, 1); mutex_unlock(&nsock->tx_lock); goto again; } @@ -780,6 +805,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, nsock->sock = sock; nsock->pending = NULL; nsock->sent = 0; + nsock->cookie = 0; socks[config->num_connections++] = nsock; return 0; @@ -824,6 +850,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) INIT_WORK(&args->work, recv_work); args->index = i; args->nbd = nbd; + nsock->cookie++; mutex_unlock(&nsock->tx_lock); sockfd_put(old); @@ -1682,6 +1709,10 @@ static const struct genl_ops nbd_connect_genl_ops[] = { }, }; +static const struct genl_multicast_group nbd_mcast_grps[] = { + { .name = NBD_GENL_MCAST_GROUP_NAME, }, +}; + static struct genl_family nbd_genl_family __ro_after_init = { .hdrsize = 0, .name = NBD_GENL_FAMILY_NAME, @@ -1690,6 +1721,8 @@ static struct genl_family nbd_genl_family __ro_after_init = { .ops = nbd_connect_genl_ops, .n_ops = ARRAY_SIZE(nbd_connect_genl_ops), .maxattr = NBD_ATTR_MAX, + .mcgrps = nbd_mcast_grps, + .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), }; static void nbd_connect_reply(struct genl_info *info, int index) @@ -1716,6 +1749,38 @@ static void nbd_connect_reply(struct genl_info *info, int index) genlmsg_reply(skb, info); } +static void nbd_mcast_index(int index) +{ + struct sk_buff *skb; + void *msg_head; + int ret; + + skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); + if (!skb) + return; + msg_head = genlmsg_put(skb, 0, 0, &nbd_genl_family, 0, + NBD_CMD_LINK_DEAD); + if (!msg_head) { + nlmsg_free(skb); + return; + } + ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); + if (ret) { + nlmsg_free(skb); + return; + } + genlmsg_end(skb, msg_head); + genlmsg_multicast(&nbd_genl_family, skb, 0, 0, GFP_KERNEL); +} + +static void nbd_dead_link_work(struct work_struct *work) +{ + struct link_dead_args *args = container_of(work, struct link_dead_args, + work); + nbd_mcast_index(args->index); + kfree(args); +} + static int __init nbd_init(void) { int i; diff --git a/include/uapi/linux/nbd-netlink.h b/include/uapi/linux/nbd-netlink.h index f932f96a7c2f..b69105cc8eea 100644 --- a/include/uapi/linux/nbd-netlink.h +++ b/include/uapi/linux/nbd-netlink.h @@ -18,8 +18,9 @@ #ifndef _UAPILINUX_NBD_NETLINK_H #define _UAPILINUX_NBD_NETLINK_H -#define NBD_GENL_FAMILY_NAME "nbd" -#define NBD_GENL_VERSION 0x1 +#define NBD_GENL_FAMILY_NAME "nbd" +#define NBD_GENL_VERSION 0x1 +#define NBD_GENL_MCAST_GROUP_NAME "nbd_mc_group" /* Configuration policy attributes, used for CONNECT */ enum { @@ -63,6 +64,7 @@ enum { NBD_CMD_CONNECT, NBD_CMD_DISCONNECT, NBD_CMD_RECONFIGURE, + NBD_CMD_LINK_DEAD, __NBD_CMD_MAX, }; #define NBD_CMD_MAX (__NBD_CMD_MAX - 1) -- cgit v1.2.3-71-gd317 From 560bc4b39952ed77cdb0000992e9415b0ee89edb Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 6 Apr 2017 17:02:04 -0400 Subject: nbd: handle dead connections Sometimes we like to upgrade our server without making all of our clients freak out and reconnect. This patch provides a way to specify a dead connection timeout to allow us to pause all requests and wait for new connections to be opened. With this in place I can take down the nbd server for less than the dead connection timeout time and bring it back up and everything resumes gracefully. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 63 +++++++++++++++++++++++++++++++++++++--- include/uapi/linux/nbd-netlink.h | 1 + 2 files changed, 60 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 71e98cb78c95..c5f866bcfea6 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -77,9 +77,12 @@ struct link_dead_args { struct nbd_config { u32 flags; unsigned long runtime_flags; + u64 dead_conn_timeout; struct nbd_sock **socks; int num_connections; + atomic_t live_connections; + wait_queue_head_t conn_wait; atomic_t recv_threads; wait_queue_head_t recv_wq; @@ -178,8 +181,10 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, queue_work(system_wq, &args->work); } } - if (!nsock->dead) + if (!nsock->dead) { kernel_sock_shutdown(nsock->sock, SHUT_RDWR); + atomic_dec(&nbd->config->live_connections); + } nsock->dead = true; nsock->pending = NULL; nsock->sent = 0; @@ -257,6 +262,14 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, return BLK_EH_HANDLED; } + /* If we are waiting on our dead timer then we could get timeout + * callbacks for our request. For this we just want to reset the timer + * and let the queue side take care of everything. + */ + if (!completion_done(&cmd->send_complete)) { + nbd_config_put(nbd); + return BLK_EH_RESET_TIMER; + } config = nbd->config; if (config->num_connections > 1) { @@ -665,6 +678,19 @@ static int find_fallback(struct nbd_device *nbd, int index) return new_index; } +static int wait_for_reconnect(struct nbd_device *nbd) +{ + struct nbd_config *config = nbd->config; + if (!config->dead_conn_timeout) + return 0; + if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) + return 0; + wait_event_interruptible_timeout(config->conn_wait, + atomic_read(&config->live_connections), + config->dead_conn_timeout); + return atomic_read(&config->live_connections); +} + static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) { struct request *req = blk_mq_rq_from_pdu(cmd); @@ -691,12 +717,24 @@ again: nsock = config->socks[index]; mutex_lock(&nsock->tx_lock); if (nsock->dead) { + int old_index = index; index = find_fallback(nbd, index); + mutex_unlock(&nsock->tx_lock); if (index < 0) { - ret = -EIO; - goto out; + if (wait_for_reconnect(nbd)) { + index = old_index; + goto again; + } + /* All the sockets should already be down at this point, + * we just want to make sure that DISCONNECTED is set so + * any requests that come in that were queue'ed waiting + * for the reconnect timer don't trigger the timer again + * and instead just error out. + */ + sock_shutdown(nbd); + nbd_config_put(nbd); + return -EIO; } - mutex_unlock(&nsock->tx_lock); goto again; } @@ -809,6 +847,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, nsock->sent = 0; nsock->cookie = 0; socks[config->num_connections++] = nsock; + atomic_inc(&config->live_connections); return 0; } @@ -860,6 +899,9 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) * need to queue_work outside of the tx_mutex. */ queue_work(recv_workqueue, &args->work); + + atomic_inc(&config->live_connections); + wake_up(&config->conn_wait); return 0; } sockfd_put(sock); @@ -1137,7 +1179,9 @@ static struct nbd_config *nbd_alloc_config(void) return NULL; atomic_set(&config->recv_threads, 0); init_waitqueue_head(&config->recv_wq); + init_waitqueue_head(&config->conn_wait); config->blksize = 1024; + atomic_set(&config->live_connections, 0); try_module_get(THIS_MODULE); return config; } @@ -1448,6 +1492,7 @@ static struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = { [NBD_ATTR_SERVER_FLAGS] = { .type = NLA_U64 }, [NBD_ATTR_CLIENT_FLAGS] = { .type = NLA_U64 }, [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED}, + [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 }, }; static struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = { @@ -1534,6 +1579,11 @@ again: nbd->tag_set.timeout = timeout * HZ; blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); } + if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { + config->dead_conn_timeout = + nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); + config->dead_conn_timeout *= HZ; + } if (info->attrs[NBD_ATTR_SERVER_FLAGS]) config->flags = nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]); @@ -1654,6 +1704,11 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) nbd->tag_set.timeout = timeout * HZ; blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); } + if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { + config->dead_conn_timeout = + nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); + config->dead_conn_timeout *= HZ; + } if (info->attrs[NBD_ATTR_SOCKETS]) { struct nlattr *attr; diff --git a/include/uapi/linux/nbd-netlink.h b/include/uapi/linux/nbd-netlink.h index b69105cc8eea..c2209c75626c 100644 --- a/include/uapi/linux/nbd-netlink.h +++ b/include/uapi/linux/nbd-netlink.h @@ -32,6 +32,7 @@ enum { NBD_ATTR_SERVER_FLAGS, NBD_ATTR_CLIENT_FLAGS, NBD_ATTR_SOCKETS, + NBD_ATTR_DEAD_CONN_TIMEOUT, __NBD_ATTR_MAX, }; #define NBD_ATTR_MAX (__NBD_ATTR_MAX - 1) -- cgit v1.2.3-71-gd317 From 47d902b90a32a42a3d33aef3a02170fc6f70aa23 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 6 Apr 2017 17:02:05 -0400 Subject: nbd: add a status netlink command Allow users to query the status of existing nbd devices. Right now this only returns whether or not the device is connected, but could be extended in the future to include more information. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 108 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/nbd-netlink.h | 25 +++++++++ 2 files changed, 133 insertions(+) (limited to 'include/uapi') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index c5f866bcfea6..cb45d799bc5c 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -45,6 +45,7 @@ static DEFINE_IDR(nbd_index_idr); static DEFINE_MUTEX(nbd_index_mutex); +static int nbd_total_devices = 0; struct nbd_sock { struct socket *sock; @@ -130,6 +131,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd); static void nbd_dev_dbg_close(struct nbd_device *nbd); static void nbd_config_put(struct nbd_device *nbd); static void nbd_connect_reply(struct genl_info *info, int index); +static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info); static void nbd_dead_link_work(struct work_struct *work); static inline struct device *nbd_to_dev(struct nbd_device *nbd) @@ -1457,6 +1459,7 @@ static int nbd_dev_add(int index) sprintf(disk->disk_name, "nbd%d", index); nbd_reset(nbd); add_disk(disk); + nbd_total_devices++; return index; out_free_tags: @@ -1493,12 +1496,22 @@ static struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = { [NBD_ATTR_CLIENT_FLAGS] = { .type = NLA_U64 }, [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED}, [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 }, + [NBD_ATTR_DEVICE_LIST] = { .type = NLA_NESTED}, }; static struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = { [NBD_SOCK_FD] = { .type = NLA_U32 }, }; +/* We don't use this right now since we don't parse the incoming list, but we + * still want it here so userspace knows what to expect. + */ +static struct nla_policy __attribute__((unused)) +nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = { + [NBD_DEVICE_INDEX] = { .type = NLA_U32 }, + [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 }, +}; + static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) { struct nbd_device *nbd = NULL; @@ -1764,6 +1777,11 @@ static const struct genl_ops nbd_connect_genl_ops[] = { .policy = nbd_attr_policy, .doit = nbd_genl_reconfigure, }, + { + .cmd = NBD_CMD_STATUS, + .policy = nbd_attr_policy, + .doit = nbd_genl_status, + }, }; static const struct genl_multicast_group nbd_mcast_grps[] = { @@ -1782,6 +1800,96 @@ static struct genl_family nbd_genl_family __ro_after_init = { .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), }; +static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply) +{ + struct nlattr *dev_opt; + u8 connected = 0; + int ret; + + /* This is a little racey, but for status it's ok. The + * reason we don't take a ref here is because we can't + * take a ref in the index == -1 case as we would need + * to put under the nbd_index_mutex, which could + * deadlock if we are configured to remove ourselves + * once we're disconnected. + */ + if (refcount_read(&nbd->config_refs)) + connected = 1; + dev_opt = nla_nest_start(reply, NBD_DEVICE_ITEM); + if (!dev_opt) + return -EMSGSIZE; + ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index); + if (ret) + return -EMSGSIZE; + ret = nla_put_u8(reply, NBD_DEVICE_CONNECTED, + connected); + if (ret) + return -EMSGSIZE; + nla_nest_end(reply, dev_opt); + return 0; +} + +static int status_cb(int id, void *ptr, void *data) +{ + struct nbd_device *nbd = ptr; + return populate_nbd_status(nbd, (struct sk_buff *)data); +} + +static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *dev_list; + struct sk_buff *reply; + void *reply_head; + size_t msg_size; + int index = -1; + int ret = -ENOMEM; + + if (info->attrs[NBD_ATTR_INDEX]) + index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + + mutex_lock(&nbd_index_mutex); + + msg_size = nla_total_size(nla_attr_size(sizeof(u32)) + + nla_attr_size(sizeof(u8))); + msg_size *= (index == -1) ? nbd_total_devices : 1; + + reply = genlmsg_new(msg_size, GFP_KERNEL); + if (!reply) + goto out; + reply_head = genlmsg_put_reply(reply, info, &nbd_genl_family, 0, + NBD_CMD_STATUS); + if (!reply_head) { + nlmsg_free(reply); + goto out; + } + + dev_list = nla_nest_start(reply, NBD_ATTR_DEVICE_LIST); + if (index == -1) { + ret = idr_for_each(&nbd_index_idr, &status_cb, reply); + if (ret) { + nlmsg_free(reply); + goto out; + } + } else { + struct nbd_device *nbd; + nbd = idr_find(&nbd_index_idr, index); + if (nbd) { + ret = populate_nbd_status(nbd, reply); + if (ret) { + nlmsg_free(reply); + goto out; + } + } + } + nla_nest_end(reply, dev_list); + genlmsg_end(reply, reply_head); + genlmsg_reply(reply, info); + ret = 0; +out: + mutex_unlock(&nbd_index_mutex); + return ret; +} + static void nbd_connect_reply(struct genl_info *info, int index) { struct sk_buff *skb; diff --git a/include/uapi/linux/nbd-netlink.h b/include/uapi/linux/nbd-netlink.h index c2209c75626c..6f7ca3d63a65 100644 --- a/include/uapi/linux/nbd-netlink.h +++ b/include/uapi/linux/nbd-netlink.h @@ -33,10 +33,34 @@ enum { NBD_ATTR_CLIENT_FLAGS, NBD_ATTR_SOCKETS, NBD_ATTR_DEAD_CONN_TIMEOUT, + NBD_ATTR_DEVICE_LIST, __NBD_ATTR_MAX, }; #define NBD_ATTR_MAX (__NBD_ATTR_MAX - 1) +/* + * This is the format for multiple devices with NBD_ATTR_DEVICE_LIST + * + * [NBD_ATTR_DEVICE_LIST] + * [NBD_DEVICE_ITEM] + * [NBD_DEVICE_INDEX] + * [NBD_DEVICE_CONNECTED] + */ +enum { + NBD_DEVICE_ITEM_UNSPEC, + NBD_DEVICE_ITEM, + __NBD_DEVICE_ITEM_MAX, +}; +#define NBD_DEVICE_ITEM_MAX (__NBD_DEVICE_ITEM_MAX - 1) + +enum { + NBD_DEVICE_UNSPEC, + NBD_DEVICE_INDEX, + NBD_DEVICE_CONNECTED, + __NBD_DEVICE_MAX, +}; +#define NBD_DEVICE_ATTR_MAX (__NBD_DEVICE_MAX - 1) + /* * This is the format for multiple sockets with NBD_ATTR_SOCKETS * @@ -66,6 +90,7 @@ enum { NBD_CMD_DISCONNECT, NBD_CMD_RECONFIGURE, NBD_CMD_LINK_DEAD, + NBD_CMD_STATUS, __NBD_CMD_MAX, }; #define NBD_CMD_MAX (__NBD_CMD_MAX - 1) -- cgit v1.2.3-71-gd317 From a2c97909f97ef32b76e856572fba4f77e1885fe6 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 6 Apr 2017 17:02:07 -0400 Subject: nbd: add a flag to destroy an nbd device on disconnect For ease of management it would be nice for users to specify that the device node for a nbd device is destroyed once it is disconnected and there are no more users. Add a client flag and enable this operation to happen. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 30 ++++++++++++++++++++++++++++++ include/uapi/linux/nbd.h | 6 +++++- 2 files changed, 35 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 4237e7286e99..b78f23ce2395 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -74,6 +74,7 @@ struct link_dead_args { #define NBD_HAS_PID_FILE 3 #define NBD_HAS_CONFIG_REF 4 #define NBD_BOUND 5 +#define NBD_DESTROY_ON_DISCONNECT 6 struct nbd_config { u32 flags; @@ -174,6 +175,7 @@ static void nbd_dev_remove(struct nbd_device *nbd) del_gendisk(disk); blk_cleanup_queue(disk->queue); blk_mq_free_tag_set(&nbd->tag_set); + disk->private_data = NULL; put_disk(disk); } kfree(nbd); @@ -1028,6 +1030,7 @@ static void nbd_config_put(struct nbd_device *nbd) kfree(config->socks); } nbd_reset(nbd); + mutex_unlock(&nbd->config_lock); nbd_put(nbd); module_put(THIS_MODULE); @@ -1539,6 +1542,7 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) struct nbd_config *config; int index = -1; int ret; + bool put_dev = false; if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; @@ -1633,6 +1637,15 @@ again: if (info->attrs[NBD_ATTR_SERVER_FLAGS]) config->flags = nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]); + if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { + u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); + if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { + set_bit(NBD_DESTROY_ON_DISCONNECT, + &config->runtime_flags); + put_dev = true; + } + } + if (info->attrs[NBD_ATTR_SOCKETS]) { struct nlattr *attr; int rem, fd; @@ -1670,6 +1683,8 @@ out: nbd_connect_reply(info, nbd->index); } nbd_config_put(nbd); + if (put_dev) + nbd_put(nbd); return ret; } @@ -1722,6 +1737,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) struct nbd_config *config; int index; int ret = -EINVAL; + bool put_dev = false; if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; @@ -1773,6 +1789,18 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); config->dead_conn_timeout *= HZ; } + if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { + u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); + if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { + if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT, + &config->runtime_flags)) + put_dev = true; + } else { + if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT, + &config->runtime_flags)) + refcount_inc(&nbd->refs); + } + } if (info->attrs[NBD_ATTR_SOCKETS]) { struct nlattr *attr; @@ -1810,6 +1838,8 @@ out: mutex_unlock(&nbd->config_lock); nbd_config_put(nbd); nbd_put(nbd); + if (put_dev) + nbd_put(nbd); return ret; } diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index c91c642ea900..155e33f81913 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -37,7 +37,7 @@ enum { NBD_CMD_TRIM = 4 }; -/* values for flags field */ +/* values for flags field, these are server interaction specific. */ #define NBD_FLAG_HAS_FLAGS (1 << 0) /* nbd-server supports flags */ #define NBD_FLAG_READ_ONLY (1 << 1) /* device is read-only */ #define NBD_FLAG_SEND_FLUSH (1 << 2) /* can flush writeback cache */ @@ -45,6 +45,10 @@ enum { #define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */ #define NBD_FLAG_CAN_MULTI_CONN (1 << 8) /* Server supports multiple connections per export. */ +/* These are client behavior specific flags. */ +#define NBD_CFLAG_DESTROY_ON_DISCONNECT (1 << 0) /* delete the nbd device on + disconnect. */ + /* userspace doesn't need the nbd_device structure */ /* These are sent over the network in the request/reply magic fields */ -- cgit v1.2.3-71-gd317 From 2611dc1939569718c65ffd59c8fb9ba7474d026c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 8 Apr 2017 14:34:51 -0400 Subject: Remove compat_sys_getdents64() Unlike normal compat syscall variants, it is needed only for biarch architectures that have different alignement requirements for u64 in 32bit and 64bit ABI *and* have __put_user() that won't handle a store of 64bit value at 32bit-aligned address. We used to have one such (ia64), but its biarch support has been gone since 2010 (after being broken in 2008, which went unnoticed since nobody had been using it). It had escaped removal at the same time only because back in 2004 a patch that switched several syscalls on amd64 from private wrappers to generic compat ones had switched to use of compat_sys_getdents64(), which hadn't needed (or used) a compat wrapper on amd64. Let's bury it - it's at least 7 years overdue. Signed-off-by: Al Viro --- arch/arm64/include/asm/unistd.h | 1 - arch/arm64/include/asm/unistd32.h | 2 +- arch/x86/entry/syscalls/syscall_32.tbl | 2 +- arch/x86/include/asm/unistd.h | 1 - fs/compat.c | 91 ---------------------------------- include/linux/compat.h | 5 -- include/uapi/asm-generic/unistd.h | 3 +- 7 files changed, 3 insertions(+), 102 deletions(-) (limited to 'include/uapi') diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index bdbeb06dc11e..a0baa9af5487 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -14,7 +14,6 @@ * along with this program. If not, see . */ #ifdef CONFIG_COMPAT -#define __ARCH_WANT_COMPAT_SYS_GETDENTS64 #define __ARCH_WANT_COMPAT_STAT64 #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_PAUSE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index c66b51aab195..ef292160748c 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -456,7 +456,7 @@ __SYSCALL(__NR_setfsuid32, sys_setfsuid) #define __NR_setfsgid32 216 __SYSCALL(__NR_setfsgid32, sys_setfsgid) #define __NR_getdents64 217 -__SYSCALL(__NR_getdents64, compat_sys_getdents64) +__SYSCALL(__NR_getdents64, sys_getdents64) #define __NR_pivot_root 218 __SYSCALL(__NR_pivot_root, sys_pivot_root) #define __NR_mincore 219 diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 9ba050fe47f3..b1a63f6f53c0 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -226,7 +226,7 @@ 217 i386 pivot_root sys_pivot_root 218 i386 mincore sys_mincore 219 i386 madvise sys_madvise -220 i386 getdents64 sys_getdents64 compat_sys_getdents64 +220 i386 getdents64 sys_getdents64 221 i386 fcntl64 sys_fcntl64 compat_sys_fcntl64 # 222 is unused # 223 is unused diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 32712a925f26..1ba1536f627e 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -23,7 +23,6 @@ # include # include # define __ARCH_WANT_COMPAT_SYS_TIME -# define __ARCH_WANT_COMPAT_SYS_GETDENTS64 # define __ARCH_WANT_COMPAT_SYS_PREADV64 # define __ARCH_WANT_COMPAT_SYS_PWRITEV64 # define __ARCH_WANT_COMPAT_SYS_PREADV64V2 diff --git a/fs/compat.c b/fs/compat.c index c61b506f5bc9..54e5855e291a 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -907,97 +907,6 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, return error; } -#ifdef __ARCH_WANT_COMPAT_SYS_GETDENTS64 - -struct compat_getdents_callback64 { - struct dir_context ctx; - struct linux_dirent64 __user *current_dir; - struct linux_dirent64 __user *previous; - int count; - int error; -}; - -static int compat_filldir64(struct dir_context *ctx, const char *name, - int namlen, loff_t offset, u64 ino, - unsigned int d_type) -{ - struct linux_dirent64 __user *dirent; - struct compat_getdents_callback64 *buf = - container_of(ctx, struct compat_getdents_callback64, ctx); - int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, - sizeof(u64)); - u64 off; - - buf->error = -EINVAL; /* only used if we fail.. */ - if (reclen > buf->count) - return -EINVAL; - dirent = buf->previous; - - if (dirent) { - if (signal_pending(current)) - return -EINTR; - if (__put_user_unaligned(offset, &dirent->d_off)) - goto efault; - } - dirent = buf->current_dir; - if (__put_user_unaligned(ino, &dirent->d_ino)) - goto efault; - off = 0; - if (__put_user_unaligned(off, &dirent->d_off)) - goto efault; - if (__put_user(reclen, &dirent->d_reclen)) - goto efault; - if (__put_user(d_type, &dirent->d_type)) - goto efault; - if (copy_to_user(dirent->d_name, name, namlen)) - goto efault; - if (__put_user(0, dirent->d_name + namlen)) - goto efault; - buf->previous = dirent; - dirent = (void __user *)dirent + reclen; - buf->current_dir = dirent; - buf->count -= reclen; - return 0; -efault: - buf->error = -EFAULT; - return -EFAULT; -} - -COMPAT_SYSCALL_DEFINE3(getdents64, unsigned int, fd, - struct linux_dirent64 __user *, dirent, unsigned int, count) -{ - struct fd f; - struct linux_dirent64 __user * lastdirent; - struct compat_getdents_callback64 buf = { - .ctx.actor = compat_filldir64, - .current_dir = dirent, - .count = count - }; - int error; - - if (!access_ok(VERIFY_WRITE, dirent, count)) - return -EFAULT; - - f = fdget_pos(fd); - if (!f.file) - return -EBADF; - - error = iterate_dir(f.file, &buf.ctx); - if (error >= 0) - error = buf.error; - lastdirent = buf.previous; - if (lastdirent) { - typeof(lastdirent->d_off) d_off = buf.ctx.pos; - if (__put_user_unaligned(d_off, &lastdirent->d_off)) - error = -EFAULT; - else - error = count - buf.count; - } - fdput_pos(f); - return error; -} -#endif /* __ARCH_WANT_COMPAT_SYS_GETDENTS64 */ - /* * Exactly like fs/open.c:sys_open(), except that it doesn't set the * O_LARGEFILE flag. diff --git a/include/linux/compat.h b/include/linux/compat.h index aef47be2a5c1..54d65eb3d1e7 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -528,11 +528,6 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, asmlinkage long compat_sys_getdents(unsigned int fd, struct compat_linux_dirent __user *dirent, unsigned int count); -#ifdef __ARCH_WANT_COMPAT_SYS_GETDENTS64 -asmlinkage long compat_sys_getdents64(unsigned int fd, - struct linux_dirent64 __user *dirent, - unsigned int count); -#endif asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *, unsigned int nr_segs, unsigned int flags); asmlinkage long compat_sys_open(const char __user *filename, int flags, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index a076cf1a3a23..061185a5eb51 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -194,8 +194,7 @@ __SYSCALL(__NR_quotactl, sys_quotactl) /* fs/readdir.c */ #define __NR_getdents64 61 -#define __ARCH_WANT_COMPAT_SYS_GETDENTS64 -__SC_COMP(__NR_getdents64, sys_getdents64, compat_sys_getdents64) +__SYSCALL(__NR_getdents64, sys_getdents64) /* fs/read_write.c */ #define __NR3264_lseek 62 -- cgit v1.2.3-71-gd317 From 261cc2cca0a8c1d817be65434052feb1db1fd961 Mon Sep 17 00:00:00 2001 From: Hans van Kranenburg Date: Wed, 8 Mar 2017 18:58:43 +0100 Subject: Btrfs: consistent usage of types in balance_args The btrfs_balance_args are only used for the balance ioctl, so use __u instead of __le here for consistency. The __le usage was introduced in bc3094673f22d and dee32d0ac3719 and was probably a result of copy/pasting when the code was written. The usage of __le did not break anything, but it's unnecessary. Also, this change makes the code less confusing for the careful reader. Signed-off-by: Hans van Kranenburg Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index dcfc3a5a9cb1..a456e5309238 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -291,10 +291,10 @@ struct btrfs_ioctl_feature_flags { struct btrfs_balance_args { __u64 profiles; union { - __le64 usage; + __u64 usage; struct { - __le32 usage_min; - __le32 usage_max; + __u32 usage_min; + __u32 usage_max; }; }; __u64 devid; @@ -324,8 +324,8 @@ struct btrfs_balance_args { * Process chunks that cross stripes_min..stripes_max devices, * BTRFS_BALANCE_ARGS_STRIPES_RANGE */ - __le32 stripes_min; - __le32 stripes_max; + __u32 stripes_min; + __u32 stripes_max; __u64 unused[6]; } __attribute__ ((__packed__)); -- cgit v1.2.3-71-gd317 From 21470e32ca7f976bf131aa3c7b54019d07f7d821 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 16 Apr 2017 21:51:07 -0300 Subject: usb: fix some references for /proc/bus/usb Since when we got rid of usbfs, the /proc/bus/usb is now elsewhere. Fix references for it. Signed-off-by: Mauro Carvalho Chehab Acked-by: Serge Hallyn Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/pwc/philips.txt | 2 +- drivers/usb/class/Kconfig | 2 +- drivers/usb/class/usblp.c | 2 +- drivers/usb/core/devices.c | 4 ++-- drivers/usb/storage/unusual_devs.h | 2 +- include/linux/usb.h | 2 +- include/uapi/linux/capability.h | 2 +- include/uapi/linux/usb/ch9.h | 3 ++- tools/usb/usbip/README | 2 +- 9 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/uapi') diff --git a/drivers/media/usb/pwc/philips.txt b/drivers/media/usb/pwc/philips.txt index d38dd791511e..be8c80eff374 100644 --- a/drivers/media/usb/pwc/philips.txt +++ b/drivers/media/usb/pwc/philips.txt @@ -140,7 +140,7 @@ dev_hint A camera is specified by its type (the number from the camera model, like PCA645, PCVC750VC, etc) and optionally the serial number (visible - in /proc/bus/usb/devices). A hint consists of a string with the following + in /sys/kernel/debug/usb/devices). A hint consists of a string with the following format: [type[.serialnumber]:]node diff --git a/drivers/usb/class/Kconfig b/drivers/usb/class/Kconfig index bb8b73682a70..971385fe9abc 100644 --- a/drivers/usb/class/Kconfig +++ b/drivers/usb/class/Kconfig @@ -12,7 +12,7 @@ config USB_ACM Please read for details. If your modem only reports "Cls=ff(vend.)" in the descriptors in - /proc/bus/usb/devices, then your modem will not work with this + /sys/kernel/debug/usb/devices, then your modem will not work with this driver. To compile this driver as a module, choose M here: the diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 73bd9a2ac530..fb87c17ed6fa 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -294,7 +294,7 @@ static int usblp_ctrl_msg(struct usblp *usblp, int request, int type, int dir, i /* * See the description for usblp_select_alts() below for the usage - * explanation. Look into your /proc/bus/usb/devices and dmesg in + * explanation. Look into your /sys/kernel/debug/usb/devices and dmesg in * case of any trouble. */ static int proto_bias = -1; diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c index f2987ddb1cde..55dea2e7828f 100644 --- a/drivers/usb/core/devices.c +++ b/drivers/usb/core/devices.c @@ -24,7 +24,7 @@ * /devices contains USB topology, device, config, class, * interface, & endpoint data. * - * I considered using /proc/bus/usb/devices/device# for each device + * I considered using /dev/bus/usb/device# for each device * as it is attached or detached, but I didn't like this for some * reason -- maybe it's just too deep of a directory structure. * I also don't like looking in multiple places to gather and view @@ -40,7 +40,7 @@ * Converted the whole proc stuff to real * read methods. Now not the whole device list needs to fit * into one page, only the device list for one bus. - * Added a poll method to /proc/bus/usb/devices, to wake + * Added a poll method to /sys/kernel/debug/usb/devices, to wake * up an eventual usbd * 2000-01-04: Thomas Sailer * Turned into its own filesystem diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 9129f6cb8230..a2b748975457 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -42,7 +42,7 @@ * - a patch that adds the entry for your device, including your * email address right above the entry (plus maybe a brief * explanation of the reason for the entry), - * - a copy of /proc/bus/usb/devices with your device plugged in + * - a copy of /sys/kernel/debug/usb/devices with your device plugged in * running with this patch. * Send your submission to either Phil Dibowitz or * Alan Stern , and don't forget to CC: the diff --git a/include/linux/usb.h b/include/linux/usb.h index 226557362d36..cb9fbd54386e 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -318,7 +318,7 @@ void usb_put_intf(struct usb_interface *intf); * struct usb_interface (which persists only as long as its configuration * is installed). The altsetting arrays can be accessed through these * structures at any time, permitting comparison of configurations and - * providing support for the /proc/bus/usb/devices pseudo-file. + * providing support for the /sys/kernel/debug/usb/devices pseudo-file. */ struct usb_interface_cache { unsigned num_altsetting; /* number of alternate settings */ diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 49bc06295398..6fe14d001f68 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -205,7 +205,7 @@ struct vfs_cap_data { #define CAP_SYS_MODULE 16 /* Allow ioperm/iopl access */ -/* Allow sending USB messages to any device via /proc/bus/usb */ +/* Allow sending USB messages to any device via /dev/bus/usb */ #define CAP_SYS_RAWIO 17 diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 2c5d7c4a69e3..ce1169af39d7 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -224,7 +224,8 @@ struct usb_ctrlrequest { * through the Linux-USB APIs, they are not converted to cpu byte * order; it is the responsibility of the client code to do this. * The single exception is when device and configuration descriptors (but - * not other descriptors) are read from usbfs (i.e. /proc/bus/usb/BBB/DDD); + * not other descriptors) are read from character devices + * (i.e. /dev/bus/usb/BBB/DDD); * in this case the fields are converted to host endianness by the kernel. */ diff --git a/tools/usb/usbip/README b/tools/usb/usbip/README index 5eb2b6c7722b..7844490fc603 100644 --- a/tools/usb/usbip/README +++ b/tools/usb/usbip/README @@ -244,7 +244,7 @@ Detach the imported device: - See 'Debug Tips' on the project wiki. - http://usbip.wiki.sourceforge.net/how-to-debug-usbip - usbip-host.ko must be bound to the target device. - - See /proc/bus/usb/devices and find "Driver=..." lines of the device. + - See /sys/kernel/debug/usb/devices and find "Driver=..." lines of the device. - Target USB gadget must be bound to vudc (using USB gadget susbsys, not usbip bind command) - Shutdown firewall. -- cgit v1.2.3-71-gd317 From 76dc52684d0f72971d9f6cc7d5ae198061b715bd Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Fri, 14 Apr 2017 13:38:02 -0700 Subject: PCI: Make PCI_ROM_ADDRESS_MASK a 32-bit constant A 64-bit value is not needed since a PCI ROM address consists in 32 bits. This fixes a clang warning about "implicit conversion from 'unsigned long' to 'u32'". Also remove now unnecessary casts to u32 from __pci_read_base() and pci_std_update_resource(). Signed-off-by: Matthias Kaehlcke Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 2 +- drivers/pci/setup-res.c | 2 +- include/uapi/linux/pci_regs.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index dfc9a2794141..7d5d4a56a186 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -231,7 +231,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, res->flags |= IORESOURCE_ROM_ENABLE; l64 = l & PCI_ROM_ADDRESS_MASK; sz64 = sz & PCI_ROM_ADDRESS_MASK; - mask64 = (u32)PCI_ROM_ADDRESS_MASK; + mask64 = PCI_ROM_ADDRESS_MASK; } if (res->flags & IORESOURCE_MEM_64) { diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 4bc589ee78d0..85774b7a316a 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -63,7 +63,7 @@ static void pci_std_update_resource(struct pci_dev *dev, int resno) mask = (u32)PCI_BASE_ADDRESS_IO_MASK; new |= res->flags & ~PCI_BASE_ADDRESS_IO_MASK; } else if (resno == PCI_ROM_RESOURCE) { - mask = (u32)PCI_ROM_ADDRESS_MASK; + mask = PCI_ROM_ADDRESS_MASK; } else { mask = (u32)PCI_BASE_ADDRESS_MEM_MASK; new |= res->flags & ~PCI_BASE_ADDRESS_MEM_MASK; diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 634c9c44ed6c..fff521c9458c 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -114,7 +114,7 @@ #define PCI_SUBSYSTEM_ID 0x2e #define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */ #define PCI_ROM_ADDRESS_ENABLE 0x01 -#define PCI_ROM_ADDRESS_MASK (~0x7ffUL) +#define PCI_ROM_ADDRESS_MASK (~0x7ffU) #define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */ -- cgit v1.2.3-71-gd317 From 8c5073db0ee680c7e70e123918c9b260e49f757d Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 11 Apr 2017 07:21:05 +0530 Subject: powerpc/perf: Define big-endian version of perf_mem_data_src perf_mem_data_src is a union that is initialized in the kernel via the ->val field and accessed by userspace via the mem_xxx bitfields. For this to work correctly on big endian platforms, we need a big-endian definition for the bitfields. Currently on a big endian system, if a user requests PERF_SAMPLE_DATA_SRC (perf report -d), they will get the default value from perf_sample_data_init(), which is PERF_MEM_NA. The value for PERF_MEM_NA is constructed using shifts: /* TLB access */ #define PERF_MEM_TLB_NA 0x01 /* not available */ ... #define PERF_MEM_TLB_SHIFT 26 #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) #define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ PERF_MEM_S(LVL, NA) |\ PERF_MEM_S(SNOOP, NA) |\ PERF_MEM_S(LOCK, NA) |\ PERF_MEM_S(TLB, NA)) Which works out as: ((0x01 << 0) | (0x01 << 5) | (0x01 << 19) | (0x01 << 24) | (0x01 << 26)) Which means the PERF_MEM_NA value comes out of the kernel as 0x5080021 in CPU endian. But then in the perf tool, the code uses the bitfields to inspect the value, and currently the bitfields are defined using little endian ordering. So eg. in perf_mem__tlb_scnprintf() we see: data_src->val = 0x5080021 op = 0x0 lvl = 0x0 snoop = 0x0 lock = 0x0 dtlb = 0x0 rsvd = 0x5080021 Because of the way the perf tool code is written this is still displayed to the user as "N/A", so there is no bug visible at the UI level. Currently there are no big endian architectures which export a meaningful value (ie. other than PERF_MEM_NA), so the extent of the bug on big endian platforms is that the PERF_MEM_NA value is exported incorrectly as described above. Subsequent patches will add support on big endian powerpc for populating the data source value. This patch does a minimal fix of adding big endian definition of the bitfields to match the values that are already exported by the kernel on big endian. And it makes no change on little endian. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Madhavan Srinivasan Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman --- include/uapi/linux/perf_event.h | 16 ++++++++++++++++ tools/include/uapi/linux/perf_event.h | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c66a485a24ac..c4af1159a200 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -891,6 +891,7 @@ enum perf_callchain_context { #define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ #define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ +#if defined(__LITTLE_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { @@ -902,6 +903,21 @@ union perf_mem_data_src { mem_rsvd:31; }; }; +#elif defined(__BIG_ENDIAN_BITFIELD) +union perf_mem_data_src { + __u64 val; + struct { + __u64 mem_rsvd:31, + mem_dtlb:7, /* tlb access */ + mem_lock:2, /* lock instr */ + mem_snoop:5, /* snoop mode */ + mem_lvl:14, /* memory hierarchy level */ + mem_op:5; /* type of opcode */ + }; +}; +#else +#error "Unknown endianness" +#endif /* type of opcode (load/store/prefetch,code) */ #define PERF_MEM_OP_NA 0x01 /* not available */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index c66a485a24ac..c4af1159a200 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -891,6 +891,7 @@ enum perf_callchain_context { #define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ #define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ +#if defined(__LITTLE_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { @@ -902,6 +903,21 @@ union perf_mem_data_src { mem_rsvd:31; }; }; +#elif defined(__BIG_ENDIAN_BITFIELD) +union perf_mem_data_src { + __u64 val; + struct { + __u64 mem_rsvd:31, + mem_dtlb:7, /* tlb access */ + mem_lock:2, /* lock instr */ + mem_snoop:5, /* snoop mode */ + mem_lvl:14, /* memory hierarchy level */ + mem_op:5; /* type of opcode */ + }; +}; +#else +#error "Unknown endianness" +#endif /* type of opcode (load/store/prefetch,code) */ #define PERF_MEM_OP_NA 0x01 /* not available */ -- cgit v1.2.3-71-gd317 From 694a0055f039bc1d73aba10606ea74e798d2d759 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 15 Apr 2017 19:26:10 +0200 Subject: netfilter: nft_ct: allow to set ctnetlink event types of a connection By default the kernel emits all ctnetlink events for a connection. This allows to select the types of events to generate. This can be used to e.g. only send DESTROY events but no NEW/UPDATE ones and will work even if sysctl net.netfilter.nf_conntrack_events is set to 0. This was already possible via iptables' CT target, but the nft version has the advantage that it can also be used with already-established conntracks. The added nf_ct_is_template() check isn't a bug fix as we only support mark and labels (and unlike ecache the conntrack core doesn't copy those). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nft_ct.c | 25 ++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 8f3842690d17..683f6f88fcac 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -901,6 +901,7 @@ enum nft_rt_attributes { * @NFT_CT_BYTES: conntrack bytes * @NFT_CT_AVGPKT: conntrack average bytes per packet * @NFT_CT_ZONE: conntrack zone + * @NFT_CT_EVENTMASK: ctnetlink events to be generated for this conntrack */ enum nft_ct_keys { NFT_CT_STATE, @@ -921,6 +922,7 @@ enum nft_ct_keys { NFT_CT_BYTES, NFT_CT_AVGPKT, NFT_CT_ZONE, + NFT_CT_EVENTMASK, }; /** diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 6c6fd48b024c..a34ceb38fc55 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -264,7 +264,7 @@ static void nft_ct_set_eval(const struct nft_expr *expr, struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) + if (ct == NULL || nf_ct_is_template(ct)) return; switch (priv->key) { @@ -283,6 +283,22 @@ static void nft_ct_set_eval(const struct nft_expr *expr, ®s->data[priv->sreg], NF_CT_LABELS_MAX_SIZE / sizeof(u32)); break; +#endif +#ifdef CONFIG_NF_CONNTRACK_EVENTS + case NFT_CT_EVENTMASK: { + struct nf_conntrack_ecache *e = nf_ct_ecache_find(ct); + u32 ctmask = regs->data[priv->sreg]; + + if (e) { + if (e->ctmask != ctmask) + e->ctmask = ctmask; + break; + } + + if (ctmask && !nf_ct_is_confirmed(ct)) + nf_ct_ecache_ext_add(ct, ctmask, 0, GFP_ATOMIC); + break; + } #endif default: break; @@ -538,6 +554,13 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, nft_ct_pcpu_template_refcnt++; len = sizeof(u16); break; +#endif +#ifdef CONFIG_NF_CONNTRACK_EVENTS + case NFT_CT_EVENTMASK: + if (tb[NFTA_CT_DIRECTION]) + return -EINVAL; + len = sizeof(u32); + break; #endif default: return -EOPNOTSUPP; -- cgit v1.2.3-71-gd317 From 01026edef9062b7d26ace74a5b4a5a33a2399501 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 18 Apr 2017 17:27:32 +0200 Subject: nefilter: eache: reduce struct size from 32 to 24 byte Only "cache" needs to use ulong (its used with set_bit()), missed can use u16. Also add build-time assertion to ensure event bits fit. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_ecache.h | 4 ++-- include/uapi/linux/netfilter/nf_conntrack_common.h | 3 +++ net/netfilter/nf_conntrack_ecache.c | 3 +++ 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 12d967b58726..2a10c6570fcc 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -20,11 +20,11 @@ enum nf_ct_ecache_state { struct nf_conntrack_ecache { unsigned long cache; /* bitops want long */ - unsigned long missed; /* missed events */ + u16 missed; /* missed events */ u16 ctmask; /* bitmask of ct events to be delivered */ u16 expmask; /* bitmask of expect events to be delivered */ + enum nf_ct_ecache_state state:8;/* ecache state */ u32 portid; /* netlink portid of destroyer */ - enum nf_ct_ecache_state state; /* ecache state */ }; static inline struct nf_conntrack_ecache * diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index b4a0a1940118..a8072cc7fa0b 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -119,6 +119,9 @@ enum ip_conntrack_events { IPCT_NATSEQADJ = IPCT_SEQADJ, IPCT_SECMARK, /* new security mark has been set */ IPCT_LABEL, /* new connlabel has been set */ +#ifdef __KERNEL__ + __IPCT_MAX +#endif }; enum ip_conntrack_expect_events { diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 6161e92d2980..515212948125 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -420,6 +420,9 @@ int nf_conntrack_ecache_init(void) int ret = nf_ct_extend_register(&event_extend); if (ret < 0) pr_err("nf_ct_event: Unable to register event extension.\n"); + + BUILD_BUG_ON(__IPCT_MAX >= 16); /* ctmask, missed use u16 */ + return ret; } -- cgit v1.2.3-71-gd317 From 4898d3f49b5b156c33f0ae0f49ede417ab86195e Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 22 Mar 2017 15:21:51 +1100 Subject: KVM: PPC: Reserve KVM_CAP_SPAPR_TCE_VFIO capability number This adds a capability number for in-kernel support for VFIO on SPAPR platform. The capability will tell the user space whether in-kernel handlers of H_PUT_TCE can handle VFIO-targeted requests or not. If not, the user space must not attempt allocating a TCE table in the host kernel via the KVM_CREATE_SPAPR_TCE KVM ioctl because in that case TCE requests will not be passed to the user space which is desired action in the situation like that. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6180ea50e9ef..7b488eae61b8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -892,6 +892,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_MIPS_64BIT 139 #define KVM_CAP_S390_GS 140 #define KVM_CAP_S390_AIS 141 +#define KVM_CAP_SPAPR_TCE_VFIO 142 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-71-gd317 From 121f80ba68f1a5779a36d7b3247206e60e0a7418 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 22 Mar 2017 15:21:56 +1100 Subject: KVM: PPC: VFIO: Add in-kernel acceleration for VFIO This allows the host kernel to handle H_PUT_TCE, H_PUT_TCE_INDIRECT and H_STUFF_TCE requests targeted an IOMMU TCE table used for VFIO without passing them to user space which saves time on switching to user space and back. This adds H_PUT_TCE/H_PUT_TCE_INDIRECT/H_STUFF_TCE handlers to KVM. KVM tries to handle a TCE request in the real mode, if failed it passes the request to the virtual mode to complete the operation. If it a virtual mode handler fails, the request is passed to the user space; this is not expected to happen though. To avoid dealing with page use counters (which is tricky in real mode), this only accelerates SPAPR TCE IOMMU v2 clients which are required to pre-register the userspace memory. The very first TCE request will be handled in the VFIO SPAPR TCE driver anyway as the userspace view of the TCE table (iommu_table::it_userspace) is not allocated till the very first mapping happens and we cannot call vmalloc in real mode. If we fail to update a hardware IOMMU table unexpected reason, we just clear it and move on as there is nothing really we can do about it - for example, if we hot plug a VFIO device to a guest, existing TCE tables will be mirrored automatically to the hardware and there is no interface to report to the guest about possible failures. This adds new attribute - KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE - to the VFIO KVM device. It takes a VFIO group fd and SPAPR TCE table fd and associates a physical IOMMU table with the SPAPR TCE table (which is a guest view of the hardware IOMMU table). The iommu_table object is cached and referenced so we do not have to look up for it in real mode. This does not implement the UNSET counterpart as there is no use for it - once the acceleration is enabled, the existing userspace won't disable it unless a VFIO container is destroyed; this adds necessary cleanup to the KVM_DEV_VFIO_GROUP_DEL handler. This advertises the new KVM_CAP_SPAPR_TCE_VFIO capability to the user space. This adds real mode version of WARN_ON_ONCE() as the generic version causes problems with rcu_sched. Since we testing what vmalloc_to_phys() returns in the code, this also adds a check for already existing vmalloc_to_phys() call in kvmppc_rm_h_put_tce_indirect(). This finally makes use of vfio_external_user_iommu_id() which was introduced quite some time ago and was considered for removal. Tests show that this patch increases transmission speed from 220MB/s to 750..1020MB/s on 10Gb network (Chelsea CXGB3 10Gb ethernet card). Signed-off-by: Alexey Kardashevskiy Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/devices/vfio.txt | 18 +- arch/powerpc/include/asm/kvm_host.h | 8 + arch/powerpc/include/asm/kvm_ppc.h | 4 + arch/powerpc/kvm/book3s_64_vio.c | 306 ++++++++++++++++++++++++++++- arch/powerpc/kvm/book3s_64_vio_hv.c | 201 ++++++++++++++++++- arch/powerpc/kvm/powerpc.c | 2 + include/uapi/linux/kvm.h | 6 + virt/kvm/vfio.c | 105 ++++++++++ 8 files changed, 645 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt index ef51740c67ca..528c77c8022c 100644 --- a/Documentation/virtual/kvm/devices/vfio.txt +++ b/Documentation/virtual/kvm/devices/vfio.txt @@ -16,7 +16,21 @@ Groups: KVM_DEV_VFIO_GROUP attributes: KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking + kvm_device_attr.addr points to an int32_t file descriptor + for the VFIO group. KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking + kvm_device_attr.addr points to an int32_t file descriptor + for the VFIO group. + KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: attaches a guest visible TCE table + allocated by sPAPR KVM. + kvm_device_attr.addr points to a struct: -For each, kvm_device_attr.addr points to an int32_t file descriptor -for the VFIO group. + struct kvm_vfio_spapr_tce { + __s32 groupfd; + __s32 tablefd; + }; + + where + @groupfd is a file descriptor for a VFIO group; + @tablefd is a file descriptor for a TCE table allocated via + KVM_CREATE_SPAPR_TCE. diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 0f3ac09cbfe0..77c60826d145 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -188,6 +188,13 @@ struct kvmppc_pginfo { atomic_t refcnt; }; +struct kvmppc_spapr_tce_iommu_table { + struct rcu_head rcu; + struct list_head next; + struct iommu_table *tbl; + struct kref kref; +}; + struct kvmppc_spapr_tce_table { struct list_head list; struct kvm *kvm; @@ -196,6 +203,7 @@ struct kvmppc_spapr_tce_table { u32 page_shift; u64 offset; /* in pages */ u64 size; /* window size in pages */ + struct list_head iommu_tables; struct page *pages[0]; }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 4d079a29eae2..5885d327c025 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -173,6 +173,10 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm, extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, unsigned long porder); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); +extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, + struct iommu_group *grp); +extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, + struct iommu_group *grp); extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce_64 *args); diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index d507d94e020c..a160c14304eb 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include @@ -40,6 +42,7 @@ #include #include #include +#include static unsigned long kvmppc_tce_pages(unsigned long iommu_pages) { @@ -91,6 +94,137 @@ static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc) return ret; } +static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head) +{ + struct kvmppc_spapr_tce_iommu_table *stit = container_of(head, + struct kvmppc_spapr_tce_iommu_table, rcu); + + iommu_tce_table_put(stit->tbl); + + kfree(stit); +} + +static void kvm_spapr_tce_liobn_put(struct kref *kref) +{ + struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref, + struct kvmppc_spapr_tce_iommu_table, kref); + + list_del_rcu(&stit->next); + + call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free); +} + +extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, + struct iommu_group *grp) +{ + int i; + struct kvmppc_spapr_tce_table *stt; + struct kvmppc_spapr_tce_iommu_table *stit, *tmp; + struct iommu_table_group *table_group = NULL; + + list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { + + table_group = iommu_group_get_iommudata(grp); + if (WARN_ON(!table_group)) + continue; + + list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + if (table_group->tables[i] != stit->tbl) + continue; + + kref_put(&stit->kref, kvm_spapr_tce_liobn_put); + return; + } + } + } +} + +extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, + struct iommu_group *grp) +{ + struct kvmppc_spapr_tce_table *stt = NULL; + bool found = false; + struct iommu_table *tbl = NULL; + struct iommu_table_group *table_group; + long i; + struct kvmppc_spapr_tce_iommu_table *stit; + struct fd f; + + f = fdget(tablefd); + if (!f.file) + return -EBADF; + + list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { + if (stt == f.file->private_data) { + found = true; + break; + } + } + + fdput(f); + + if (!found) + return -EINVAL; + + table_group = iommu_group_get_iommudata(grp); + if (WARN_ON(!table_group)) + return -EFAULT; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbltmp = table_group->tables[i]; + + if (!tbltmp) + continue; + /* + * Make sure hardware table parameters are exactly the same; + * this is used in the TCE handlers where boundary checks + * use only the first attached table. + */ + if ((tbltmp->it_page_shift == stt->page_shift) && + (tbltmp->it_offset == stt->offset) && + (tbltmp->it_size == stt->size)) { + /* + * Reference the table to avoid races with + * add/remove DMA windows. + */ + tbl = iommu_tce_table_get(tbltmp); + break; + } + } + if (!tbl) + return -EINVAL; + + list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { + if (tbl != stit->tbl) + continue; + + if (!kref_get_unless_zero(&stit->kref)) { + /* stit is being destroyed */ + iommu_tce_table_put(tbl); + return -ENOTTY; + } + /* + * The table is already known to this KVM, we just increased + * its KVM reference counter and can return. + */ + return 0; + } + + stit = kzalloc(sizeof(*stit), GFP_KERNEL); + if (!stit) { + iommu_tce_table_put(tbl); + return -ENOMEM; + } + + stit->tbl = tbl; + kref_init(&stit->kref); + + list_add_rcu(&stit->next, &stt->iommu_tables); + + return 0; +} + static void release_spapr_tce_table(struct rcu_head *head) { struct kvmppc_spapr_tce_table *stt = container_of(head, @@ -130,9 +264,18 @@ static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) { struct kvmppc_spapr_tce_table *stt = filp->private_data; + struct kvmppc_spapr_tce_iommu_table *stit, *tmp; list_del_rcu(&stt->list); + list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { + WARN_ON(!kref_read(&stit->kref)); + while (1) { + if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put)) + break; + } + } + kvm_put_kvm(stt->kvm); kvmppc_account_memlimit( @@ -183,6 +326,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, stt->offset = args->offset; stt->size = size; stt->kvm = kvm; + INIT_LIST_HEAD_RCU(&stt->iommu_tables); for (i = 0; i < npages; i++) { stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); @@ -211,11 +355,101 @@ fail: return ret; } +static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry) +{ + unsigned long hpa = 0; + enum dma_data_direction dir = DMA_NONE; + + iommu_tce_xchg(tbl, entry, &hpa, &dir); +} + +static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm, + struct iommu_table *tbl, unsigned long entry) +{ + struct mm_iommu_table_group_mem_t *mem = NULL; + const unsigned long pgsize = 1ULL << tbl->it_page_shift; + unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + + if (!pua) + /* it_userspace allocation might be delayed */ + return H_TOO_HARD; + + mem = mm_iommu_lookup(kvm->mm, *pua, pgsize); + if (!mem) + return H_TOO_HARD; + + mm_iommu_mapped_dec(mem); + + *pua = 0; + + return H_SUCCESS; +} + +static long kvmppc_tce_iommu_unmap(struct kvm *kvm, + struct iommu_table *tbl, unsigned long entry) +{ + enum dma_data_direction dir = DMA_NONE; + unsigned long hpa = 0; + long ret; + + if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir))) + return H_HARDWARE; + + if (dir == DMA_NONE) + return H_SUCCESS; + + ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); + if (ret != H_SUCCESS) + iommu_tce_xchg(tbl, entry, &hpa, &dir); + + return ret; +} + +long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, + unsigned long entry, unsigned long ua, + enum dma_data_direction dir) +{ + long ret; + unsigned long hpa, *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + struct mm_iommu_table_group_mem_t *mem; + + if (!pua) + /* it_userspace allocation might be delayed */ + return H_TOO_HARD; + + mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift); + if (!mem) + /* This only handles v2 IOMMU type, v1 is handled via ioctl() */ + return H_TOO_HARD; + + if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa))) + return H_HARDWARE; + + if (mm_iommu_mapped_inc(mem)) + return H_CLOSED; + + ret = iommu_tce_xchg(tbl, entry, &hpa, &dir); + if (WARN_ON_ONCE(ret)) { + mm_iommu_mapped_dec(mem); + return H_HARDWARE; + } + + if (dir != DMA_NONE) + kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); + + *pua = ua; + + return 0; +} + long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce) { struct kvmppc_spapr_tce_table *stt; - long ret; + long ret, idx; + struct kvmppc_spapr_tce_iommu_table *stit; + unsigned long entry, ua = 0; + enum dma_data_direction dir; /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ /* liobn, ioba, tce); */ @@ -232,7 +466,35 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, if (ret != H_SUCCESS) return ret; - kvmppc_tce_put(stt, ioba >> stt->page_shift, tce); + dir = iommu_tce_direction(tce); + if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, + tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) + return H_PARAMETER; + + entry = ioba >> stt->page_shift; + + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { + if (dir == DMA_NONE) { + ret = kvmppc_tce_iommu_unmap(vcpu->kvm, + stit->tbl, entry); + } else { + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvmppc_tce_iommu_map(vcpu->kvm, stit->tbl, + entry, ua, dir); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + } + + if (ret == H_SUCCESS) + continue; + + if (ret == H_TOO_HARD) + return ret; + + WARN_ON_ONCE(1); + kvmppc_clear_tce(stit->tbl, entry); + } + + kvmppc_tce_put(stt, entry, tce); return H_SUCCESS; } @@ -247,6 +509,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, unsigned long entry, ua = 0; u64 __user *tces; u64 tce; + struct kvmppc_spapr_tce_iommu_table *stit; stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) @@ -285,6 +548,26 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (ret != H_SUCCESS) goto unlock_exit; + if (kvmppc_gpa_to_ua(vcpu->kvm, + tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), + &ua, NULL)) + return H_PARAMETER; + + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { + ret = kvmppc_tce_iommu_map(vcpu->kvm, + stit->tbl, entry + i, ua, + iommu_tce_direction(tce)); + + if (ret == H_SUCCESS) + continue; + + if (ret == H_TOO_HARD) + goto unlock_exit; + + WARN_ON_ONCE(1); + kvmppc_clear_tce(stit->tbl, entry); + } + kvmppc_tce_put(stt, entry + i, tce); } @@ -301,6 +584,7 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, { struct kvmppc_spapr_tce_table *stt; long i, ret; + struct kvmppc_spapr_tce_iommu_table *stit; stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) @@ -314,6 +598,24 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) return H_PARAMETER; + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { + unsigned long entry = ioba >> stit->tbl->it_page_shift; + + for (i = 0; i < npages; ++i) { + ret = kvmppc_tce_iommu_unmap(vcpu->kvm, + stit->tbl, entry + i); + + if (ret == H_SUCCESS) + continue; + + if (ret == H_TOO_HARD) + return ret; + + WARN_ON_ONCE(1); + kvmppc_clear_tce(stit->tbl, entry); + } + } + for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 440d3ab5dc32..eda0a8f6fae8 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -40,6 +40,31 @@ #include #include +#ifdef CONFIG_BUG + +#define WARN_ON_ONCE_RM(condition) ({ \ + static bool __section(.data.unlikely) __warned; \ + int __ret_warn_once = !!(condition); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + __warned = true; \ + pr_err("WARN_ON_ONCE_RM: (%s) at %s:%u\n", \ + __stringify(condition), \ + __func__, __LINE__); \ + dump_stack(); \ + } \ + unlikely(__ret_warn_once); \ +}) + +#else + +#define WARN_ON_ONCE_RM(condition) ({ \ + int __ret_warn_on = !!(condition); \ + unlikely(__ret_warn_on); \ +}) + +#endif + #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) /* @@ -161,11 +186,117 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry) +{ + unsigned long hpa = 0; + enum dma_data_direction dir = DMA_NONE; + + iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); +} + +static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, + struct iommu_table *tbl, unsigned long entry) +{ + struct mm_iommu_table_group_mem_t *mem = NULL; + const unsigned long pgsize = 1ULL << tbl->it_page_shift; + unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + + if (!pua) + /* it_userspace allocation might be delayed */ + return H_TOO_HARD; + + pua = (void *) vmalloc_to_phys(pua); + if (WARN_ON_ONCE_RM(!pua)) + return H_HARDWARE; + + mem = mm_iommu_lookup_rm(kvm->mm, *pua, pgsize); + if (!mem) + return H_TOO_HARD; + + mm_iommu_mapped_dec(mem); + + *pua = 0; + + return H_SUCCESS; +} + +static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm, + struct iommu_table *tbl, unsigned long entry) +{ + enum dma_data_direction dir = DMA_NONE; + unsigned long hpa = 0; + long ret; + + if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir)) + /* + * real mode xchg can fail if struct page crosses + * a page boundary + */ + return H_TOO_HARD; + + if (dir == DMA_NONE) + return H_SUCCESS; + + ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry); + if (ret) + iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); + + return ret; +} + +static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, + unsigned long entry, unsigned long ua, + enum dma_data_direction dir) +{ + long ret; + unsigned long hpa = 0; + unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + struct mm_iommu_table_group_mem_t *mem; + + if (!pua) + /* it_userspace allocation might be delayed */ + return H_TOO_HARD; + + mem = mm_iommu_lookup_rm(kvm->mm, ua, 1ULL << tbl->it_page_shift); + if (!mem) + return H_TOO_HARD; + + if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa))) + return H_HARDWARE; + + pua = (void *) vmalloc_to_phys(pua); + if (WARN_ON_ONCE_RM(!pua)) + return H_HARDWARE; + + if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) + return H_CLOSED; + + ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); + if (ret) { + mm_iommu_mapped_dec(mem); + /* + * real mode xchg can fail if struct page crosses + * a page boundary + */ + return H_TOO_HARD; + } + + if (dir != DMA_NONE) + kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry); + + *pua = ua; + + return 0; +} + long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce) { struct kvmppc_spapr_tce_table *stt; long ret; + struct kvmppc_spapr_tce_iommu_table *stit; + unsigned long entry, ua = 0; + enum dma_data_direction dir; /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ /* liobn, ioba, tce); */ @@ -182,7 +313,32 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, if (ret != H_SUCCESS) return ret; - kvmppc_tce_put(stt, ioba >> stt->page_shift, tce); + dir = iommu_tce_direction(tce); + if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, + tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) + return H_PARAMETER; + + entry = ioba >> stt->page_shift; + + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { + if (dir == DMA_NONE) + ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, + stit->tbl, entry); + else + ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, + stit->tbl, entry, ua, dir); + + if (ret == H_SUCCESS) + continue; + + if (ret == H_TOO_HARD) + return ret; + + WARN_ON_ONCE_RM(1); + kvmppc_rm_clear_tce(stit->tbl, entry); + } + + kvmppc_tce_put(stt, entry, tce); return H_SUCCESS; } @@ -223,6 +379,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, unsigned long tces, entry, ua = 0; unsigned long *rmap = NULL; bool prereg = false; + struct kvmppc_spapr_tce_iommu_table *stit; stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) @@ -270,6 +427,8 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, return H_TOO_HARD; rmap = (void *) vmalloc_to_phys(rmap); + if (WARN_ON_ONCE_RM(!rmap)) + return H_HARDWARE; /* * Synchronize with the MMU notifier callbacks in @@ -293,6 +452,27 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (ret != H_SUCCESS) goto unlock_exit; + ua = 0; + if (kvmppc_gpa_to_ua(vcpu->kvm, + tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), + &ua, NULL)) + return H_PARAMETER; + + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { + ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, + stit->tbl, entry + i, ua, + iommu_tce_direction(tce)); + + if (ret == H_SUCCESS) + continue; + + if (ret == H_TOO_HARD) + goto unlock_exit; + + WARN_ON_ONCE_RM(1); + kvmppc_rm_clear_tce(stit->tbl, entry); + } + kvmppc_tce_put(stt, entry + i, tce); } @@ -309,6 +489,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, { struct kvmppc_spapr_tce_table *stt; long i, ret; + struct kvmppc_spapr_tce_iommu_table *stit; stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) @@ -322,6 +503,24 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) return H_PARAMETER; + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { + unsigned long entry = ioba >> stit->tbl->it_page_shift; + + for (i = 0; i < npages; ++i) { + ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, + stit->tbl, entry + i); + + if (ret == H_SUCCESS) + continue; + + if (ret == H_TOO_HARD) + return ret; + + WARN_ON_ONCE_RM(1); + kvmppc_rm_clear_tce(stit->tbl, entry); + } + } + for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6c7244879bfe..cf725c580fc5 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -534,6 +534,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: case KVM_CAP_SPAPR_TCE_64: + /* fallthrough */ + case KVM_CAP_SPAPR_TCE_VFIO: case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: case KVM_CAP_PPC_ENABLE_HCALL: diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7b488eae61b8..3c168b6fd74b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1097,6 +1097,7 @@ struct kvm_device_attr { #define KVM_DEV_VFIO_GROUP 1 #define KVM_DEV_VFIO_GROUP_ADD 1 #define KVM_DEV_VFIO_GROUP_DEL 2 +#define KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE 3 enum kvm_device_type { KVM_DEV_TYPE_FSL_MPIC_20 = 1, @@ -1118,6 +1119,11 @@ enum kvm_device_type { KVM_DEV_TYPE_MAX, }; +struct kvm_vfio_spapr_tce { + __s32 groupfd; + __s32 tablefd; +}; + /* * ioctls for VM fds */ diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index d32f239eb471..37d9118fd84b 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -20,6 +20,10 @@ #include #include "vfio.h" +#ifdef CONFIG_SPAPR_TCE_IOMMU +#include +#endif + struct kvm_vfio_group { struct list_head node; struct vfio_group *vfio_group; @@ -89,6 +93,47 @@ static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group) return ret > 0; } +#ifdef CONFIG_SPAPR_TCE_IOMMU +static int kvm_vfio_external_user_iommu_id(struct vfio_group *vfio_group) +{ + int (*fn)(struct vfio_group *); + int ret = -EINVAL; + + fn = symbol_get(vfio_external_user_iommu_id); + if (!fn) + return ret; + + ret = fn(vfio_group); + + symbol_put(vfio_external_user_iommu_id); + + return ret; +} + +static struct iommu_group *kvm_vfio_group_get_iommu_group( + struct vfio_group *group) +{ + int group_id = kvm_vfio_external_user_iommu_id(group); + + if (group_id < 0) + return NULL; + + return iommu_group_get_by_id(group_id); +} + +static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm, + struct vfio_group *vfio_group) +{ + struct iommu_group *grp = kvm_vfio_group_get_iommu_group(vfio_group); + + if (WARN_ON_ONCE(!grp)) + return; + + kvm_spapr_tce_release_iommu_group(kvm, grp); + iommu_group_put(grp); +} +#endif + /* * Groups can use the same or different IOMMU domains. If the same then * adding a new group may change the coherency of groups we've previously @@ -211,6 +256,9 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) mutex_unlock(&kv->lock); +#ifdef CONFIG_SPAPR_TCE_IOMMU + kvm_spapr_tce_release_vfio_group(dev->kvm, vfio_group); +#endif kvm_vfio_group_set_kvm(vfio_group, NULL); kvm_vfio_group_put_external_user(vfio_group); @@ -218,6 +266,57 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) kvm_vfio_update_coherency(dev); return ret; + +#ifdef CONFIG_SPAPR_TCE_IOMMU + case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: { + struct kvm_vfio_spapr_tce param; + struct kvm_vfio *kv = dev->private; + struct vfio_group *vfio_group; + struct kvm_vfio_group *kvg; + struct fd f; + struct iommu_group *grp; + + if (copy_from_user(¶m, (void __user *)arg, + sizeof(struct kvm_vfio_spapr_tce))) + return -EFAULT; + + f = fdget(param.groupfd); + if (!f.file) + return -EBADF; + + vfio_group = kvm_vfio_group_get_external_user(f.file); + fdput(f); + + if (IS_ERR(vfio_group)) + return PTR_ERR(vfio_group); + + grp = kvm_vfio_group_get_iommu_group(vfio_group); + if (WARN_ON_ONCE(!grp)) { + kvm_vfio_group_put_external_user(vfio_group); + return -EIO; + } + + ret = -ENOENT; + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + if (kvg->vfio_group != vfio_group) + continue; + + ret = kvm_spapr_tce_attach_iommu_group(dev->kvm, + param.tablefd, grp); + break; + } + + mutex_unlock(&kv->lock); + + iommu_group_put(grp); + kvm_vfio_group_put_external_user(vfio_group); + + return ret; + } +#endif /* CONFIG_SPAPR_TCE_IOMMU */ } return -ENXIO; @@ -242,6 +341,9 @@ static int kvm_vfio_has_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_DEV_VFIO_GROUP_ADD: case KVM_DEV_VFIO_GROUP_DEL: +#ifdef CONFIG_SPAPR_TCE_IOMMU + case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: +#endif return 0; } @@ -257,6 +359,9 @@ static void kvm_vfio_destroy(struct kvm_device *dev) struct kvm_vfio_group *kvg, *tmp; list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { +#ifdef CONFIG_SPAPR_TCE_IOMMU + kvm_spapr_tce_release_vfio_group(dev->kvm, kvg->vfio_group); +#endif kvm_vfio_group_set_kvm(kvg->vfio_group, NULL); kvm_vfio_group_put_external_user(kvg->vfio_group); list_del(&kvg->node); -- cgit v1.2.3-71-gd317 From f6ab4d59a5fe205f12287e515a43fc5e6de779b1 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 8 Apr 2017 19:51:15 -0400 Subject: nubus: Add MVC and VSC video card definitions Also move the NUBUS_DRHW_APPLE_JET definition, for numerical order. Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- include/uapi/linux/nubus.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nubus.h b/include/uapi/linux/nubus.h index 77513d2b5638..ac516064f0ee 100644 --- a/include/uapi/linux/nubus.h +++ b/include/uapi/linux/nubus.h @@ -113,13 +113,15 @@ enum nubus_drhw { NUBUS_DRHW_SIGMA_CLRMAX = 0x0007, /* Sigma Design ColorMax */ NUBUS_DRHW_APPLE_SE30 = 0x0009, /* Apple SE/30 video */ NUBUS_DRHW_APPLE_HRVC = 0x0013, /* Mac II High-Res Video Card */ + NUBUS_DRHW_APPLE_MVC = 0x0014, /* Mac II Monochrome Video Card */ NUBUS_DRHW_APPLE_PVC = 0x0017, /* Mac II Portrait Video Card */ NUBUS_DRHW_APPLE_RBV1 = 0x0018, /* IIci RBV video */ NUBUS_DRHW_APPLE_MDC = 0x0019, /* Macintosh Display Card */ + NUBUS_DRHW_APPLE_VSC = 0x0020, /* Duo MiniDock ViSC framebuffer */ NUBUS_DRHW_APPLE_SONORA = 0x0022, /* Sonora built-in video */ + NUBUS_DRHW_APPLE_JET = 0x0029, /* Jet framebuffer (DuoDock) */ NUBUS_DRHW_APPLE_24AC = 0x002b, /* Mac 24AC Video Card */ NUBUS_DRHW_APPLE_VALKYRIE = 0x002e, - NUBUS_DRHW_APPLE_JET = 0x0029, /* Jet framebuffer (DuoDock) */ NUBUS_DRHW_SMAC_GFX = 0x0105, /* SuperMac GFX */ NUBUS_DRHW_RASTER_CB264 = 0x013B, /* RasterOps ColorBoard 264 */ NUBUS_DRHW_MICRON_XCEED = 0x0146, /* Micron Exceed color */ -- cgit v1.2.3-71-gd317 From fc6d2a3ca59d5656d5b0ac3b25ecf493e4614abd Mon Sep 17 00:00:00 2001 From: Artur Paszkiewicz Date: Wed, 19 Apr 2017 10:48:06 +0200 Subject: uapi: fix linux/raid/md_p.h userspace compilation error Use __le32 and __le64 instead of u32 and u64. This fixes klibc build error: In file included from /klibc/usr/klibc/../include/sys/md.h:30:0, from /klibc/usr/kinit/do_mounts_md.c:19: /linux-next/usr/include/linux/raid/md_p.h:414:51: error: 'u32' undeclared here (not in a function) (PPL_HEADER_SIZE - PPL_HDR_RESERVED - 4 * sizeof(u32) - sizeof(u64)) Reported-by: Greg Thelen Reported-by: Nigel Croxon Tested-by: Greg Thelen Signed-off-by: Artur Paszkiewicz Signed-off-by: Shaohua Li --- include/uapi/linux/raid/md_p.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index d9a1ead867b9..d500bd224979 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -411,7 +411,7 @@ struct ppl_header_entry { #define PPL_HEADER_SIZE 4096 #define PPL_HDR_RESERVED 512 #define PPL_HDR_ENTRY_SPACE \ - (PPL_HEADER_SIZE - PPL_HDR_RESERVED - 4 * sizeof(u32) - sizeof(u64)) + (PPL_HEADER_SIZE - PPL_HDR_RESERVED - 4 * sizeof(__le32) - sizeof(__le64)) #define PPL_HDR_MAX_ENTRIES \ (PPL_HDR_ENTRY_SPACE / sizeof(struct ppl_header_entry)) -- cgit v1.2.3-71-gd317 From e525f8a6e696210d15f8b8277d4da12fc4add299 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 20 Apr 2017 13:54:11 +0200 Subject: s390/gs: add regset for the guarded storage broadcast control block The guarded storage interface allows to register a control block for each thread that is activated with the guarded storage broadcast event. To retrieve the complete state of a process from the kernel a register set for the stored broadcast control block is required. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ptrace.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/elf.h | 1 + 2 files changed, 47 insertions(+) (limited to 'include/uapi') diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index c933e255b5d5..488c5bb8dc77 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -1171,10 +1171,48 @@ static int s390_gs_cb_set(struct task_struct *target, { struct gs_cb *data = target->thread.gs_cb; + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) { + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + target->thread.gs_cb = data; + } + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + +static int s390_gs_bc_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_bc_cb; + if (!MACHINE_HAS_GS) return -ENODEV; if (!data) return -ENODATA; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + +static int s390_gs_bc_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_bc_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) { + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + target->thread.gs_bc_cb = data; + } return user_regset_copyin(&pos, &count, &kbuf, &ubuf, data, 0, sizeof(struct gs_cb)); } @@ -1244,6 +1282,14 @@ static const struct user_regset s390_regsets[] = { .get = s390_gs_cb_get, .set = s390_gs_cb_set, }, + { + .core_note_type = NT_S390_GS_BC, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_bc_get, + .set = s390_gs_bc_set, + }, }; static const struct user_regset_view user_s390_view = { diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 8c6d3bdb9a00..176b6cb1008d 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -410,6 +410,7 @@ typedef struct elf64_shdr { #define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */ #define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ #define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */ +#define NT_S390_GS_BC 0x30c /* s390 guarded storage broadcast control block */ #define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ #define NT_ARM_TLS 0x401 /* ARM TLS register */ #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ -- cgit v1.2.3-71-gd317 From 668fffa3f838edfcb1679f842f7ef1afa61c3e9a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 21 Apr 2017 12:27:17 +0200 Subject: kvm: better MWAIT emulation for guests Guests that are heavy on futexes end up IPI'ing each other a lot. That can lead to significant slowdowns and latency increase for those guests when running within KVM. If only a single guest is needed on a host, we have a lot of spare host CPU time we can throw at the problem. Modern CPUs implement a feature called "MWAIT" which allows guests to wake up sleeping remote CPUs without an IPI - thus without an exit - at the expense of never going out of guest context. The decision whether this is something sensible to use should be up to the VM admin, so to user space. We can however allow MWAIT execution on systems that support it properly hardware wise. This patch adds a CAP to user space and a KVM cpuid leaf to indicate availability of native MWAIT execution. With that enabled, the worst a guest can do is waste as many cycles as a "jmp ." would do, so it's not a privilege problem. We consciously do *not* expose the feature in our CPUID bitmap, as most people will want to benefit from sleeping vCPUs to allow for over commit. Reported-by: "Gabriel L. Somlo" Signed-off-by: Michael S. Tsirkin [agraf: fix amd, change commit message] Signed-off-by: Alexander Graf Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 9 +++++++++ arch/x86/kvm/svm.c | 7 +++++-- arch/x86/kvm/vmx.c | 6 ++++-- arch/x86/kvm/x86.c | 3 +++ arch/x86/kvm/x86.h | 36 ++++++++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 1 + 6 files changed, 58 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e60be91d8036..dc674c2b8b31 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4111,3 +4111,12 @@ reserved. 2: MIPS64 or microMIPS64 with access to all address segments. Both registers and addresses are 64-bits wide. It will be possible to run 64-bit or 32-bit guest code. + +8.8 KVM_CAP_X86_GUEST_MWAIT + +Architectures: x86 + +This capability indicates that guest using memory monotoring instructions +(MWAIT/MWAITX) to stop the virtual CPU will not cause a VM exit. As such time +spent while virtual CPU is halted in this way will then be accounted for as +guest running time on the host (as opposed to e.g. HLT). diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1b203abf76e1..c41f03e5090a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1198,10 +1198,13 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_CLGI); set_intercept(svm, INTERCEPT_SKINIT); set_intercept(svm, INTERCEPT_WBINVD); - set_intercept(svm, INTERCEPT_MONITOR); - set_intercept(svm, INTERCEPT_MWAIT); set_intercept(svm, INTERCEPT_XSETBV); + if (!kvm_mwait_in_guest()) { + set_intercept(svm, INTERCEPT_MONITOR); + set_intercept(svm, INTERCEPT_MWAIT); + } + control->iopm_base_pa = iopm_base; control->msrpm_base_pa = __pa(svm->msrpm); control->int_ctl = V_INTR_MASKING_MASK; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c1a12b94e1fd..a4ef63718101 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3527,11 +3527,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING | - CPU_BASED_MWAIT_EXITING | - CPU_BASED_MONITOR_EXITING | CPU_BASED_INVLPG_EXITING | CPU_BASED_RDPMC_EXITING; + if (!kvm_mwait_in_guest()) + min |= CPU_BASED_MWAIT_EXITING | + CPU_BASED_MONITOR_EXITING; + opt = CPU_BASED_TPR_SHADOW | CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 49a69c0a0d50..2f9fe6bf7091 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2687,6 +2687,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ADJUST_CLOCK: r = KVM_CLOCK_TSC_STABLE; break; + case KVM_CAP_X86_GUEST_MWAIT: + r = kvm_mwait_in_guest(); + break; case KVM_CAP_X86_SMM: /* SMBASE is usually relocated above 1M on modern chipsets, * and SMM handlers might indeed rely on 4G segment limits, diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index e8ff3e4ce38a..612067074905 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -1,6 +1,8 @@ #ifndef ARCH_X86_KVM_X86_H #define ARCH_X86_KVM_X86_H +#include +#include #include #include #include "kvm_cache_regs.h" @@ -212,4 +214,38 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) __rem; \ }) +static inline bool kvm_mwait_in_guest(void) +{ + unsigned int eax, ebx, ecx, edx; + + if (!cpu_has(&boot_cpu_data, X86_FEATURE_MWAIT)) + return false; + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + /* All AMD CPUs have a working MWAIT implementation */ + return true; + case X86_VENDOR_INTEL: + /* Handle Intel below */ + break; + default: + return false; + } + + /* + * Intel CPUs without CPUID5_ECX_INTERRUPT_BREAK are problematic as + * they would allow guest to stop the CPU completely by disabling + * interrupts then invoking MWAIT. + */ + if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) + return false; + + cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); + + if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK)) + return false; + + return true; +} + #endif diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 3c168b6fd74b..e43906b95d9f 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -893,6 +893,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_GS 140 #define KVM_CAP_S390_AIS 141 #define KVM_CAP_SPAPR_TCE_VFIO 142 +#define KVM_CAP_X86_GUEST_MWAIT 143 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-71-gd317 From 483a3966b570529a910dc2a02deac0036e642309 Mon Sep 17 00:00:00 2001 From: Slava Shwartsman Date: Mon, 3 Apr 2017 13:13:51 +0300 Subject: IB/core: Introduce drop flow specification This flow steering specification identifies flow for drop by the HW. If user create a flow only with the drop specification, then all the packets that hit this flow will be dropped, otherwise the HW will drop only the packets that match the other L2/L3/L4 specifications. Signed-off-by: Slava Shwartsman Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 7 +++++++ include/rdma/ib_verbs.h | 7 +++++++ include/uapi/rdma/ib_user_verbs.h | 11 +++++++++++ 4 files changed, 26 insertions(+) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index a3230b6ab766..64d494a64daf 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -233,6 +233,7 @@ struct ib_uverbs_flow_spec { struct ib_uverbs_flow_spec_tcp_udp tcp_udp; struct ib_uverbs_flow_spec_ipv6 ipv6; struct ib_uverbs_flow_spec_action_tag flow_tag; + struct ib_uverbs_flow_spec_action_drop drop; }; }; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e2fee045f03b..562184ed23c2 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2689,6 +2689,13 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec, ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag); ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id; break; + case IB_FLOW_SPEC_ACTION_DROP: + if (kern_spec->drop.size != + sizeof(struct ib_uverbs_flow_spec_action_drop)) + return -EINVAL; + + ib_spec->drop.size = sizeof(struct ib_flow_spec_action_drop); + break; default: return -EINVAL; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 16f15ea8606e..56fa31e1948a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1679,6 +1679,7 @@ enum ib_flow_spec_type { IB_FLOW_SPEC_INNER = 0x100, /* Actions */ IB_FLOW_SPEC_ACTION_TAG = 0x1000, + IB_FLOW_SPEC_ACTION_DROP = 0x1001, }; #define IB_FLOW_SPEC_LAYER_MASK 0xF0 #define IB_FLOW_SPEC_SUPPORT_LAYERS 8 @@ -1807,6 +1808,11 @@ struct ib_flow_spec_action_tag { u32 tag_id; }; +struct ib_flow_spec_action_drop { + enum ib_flow_spec_type type; + u16 size; +}; + union ib_flow_spec { struct { u32 type; @@ -1819,6 +1825,7 @@ union ib_flow_spec { struct ib_flow_spec_ipv6 ipv6; struct ib_flow_spec_tunnel tunnel; struct ib_flow_spec_action_tag flow_tag; + struct ib_flow_spec_action_drop drop; }; struct ib_flow_attr { diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 997f904c7692..477d629f539d 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -947,6 +947,17 @@ struct ib_uverbs_flow_spec_action_tag { __u32 reserved1; }; +struct ib_uverbs_flow_spec_action_drop { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; +}; + struct ib_uverbs_flow_tunnel_filter { __be32 tunnel_id; }; -- cgit v1.2.3-71-gd317 From 0a473b82cb23e7a35c4be6e9765c8487a65e8f55 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Wed, 19 Apr 2017 12:30:53 -0400 Subject: ip6_tunnel: Allow policy-based routing through tunnels This feature allows the administrator to set an fwmark for packets traversing a tunnel. This allows the use of independent routing tables for tunneled packets without the use of iptables. Signed-off-by: Craig Gallek Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 2 ++ include/uapi/linux/if_tunnel.h | 3 +++ net/ipv6/ip6_gre.c | 14 +++++++++++++- net/ipv6/ip6_tunnel.c | 15 ++++++++++++++- net/ipv6/ip6_vti.c | 10 +++++++++- 5 files changed, 41 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 1b1cf33cbfb0..08fbc7f7d8d7 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -33,6 +33,8 @@ struct __ip6_tnl_parm { __be16 o_flags; __be32 i_key; __be32 o_key; + + __u32 fwmark; }; /* IPv6 tunnel */ diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 92f3c8677523..6792d1967d31 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -75,6 +75,7 @@ enum { IFLA_IPTUN_ENCAP_SPORT, IFLA_IPTUN_ENCAP_DPORT, IFLA_IPTUN_COLLECT_METADATA, + IFLA_IPTUN_FWMARK, __IFLA_IPTUN_MAX, }; #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) @@ -132,6 +133,7 @@ enum { IFLA_GRE_ENCAP_DPORT, IFLA_GRE_COLLECT_METADATA, IFLA_GRE_IGNORE_DF, + IFLA_GRE_FWMARK, __IFLA_GRE_MAX, }; @@ -147,6 +149,7 @@ enum { IFLA_VTI_OKEY, IFLA_VTI_LOCAL, IFLA_VTI_REMOTE, + IFLA_VTI_FWMARK, __IFLA_VTI_MAX, }; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 6fcb7cb49bb2..8d128ba79b66 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -544,6 +544,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) & IPV6_TCLASS_MASK; if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + else + fl6.flowi6_mark = t->parms.fwmark; fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); @@ -603,6 +605,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + else + fl6.flowi6_mark = t->parms.fwmark; fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); @@ -780,6 +784,7 @@ static int ip6gre_tnl_change(struct ip6_tnl *t, t->parms.o_key = p->o_key; t->parms.i_flags = p->i_flags; t->parms.o_flags = p->o_flags; + t->parms.fwmark = p->fwmark; dst_cache_reset(&t->dst_cache); ip6gre_tnl_link_config(t, set_mtu); return 0; @@ -1249,6 +1254,9 @@ static void ip6gre_netlink_parms(struct nlattr *data[], if (data[IFLA_GRE_FLAGS]) parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]); + + if (data[IFLA_GRE_FWMARK]) + parms->fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); } static int ip6gre_tap_init(struct net_device *dev) @@ -1470,6 +1478,8 @@ static size_t ip6gre_get_size(const struct net_device *dev) nla_total_size(2) + /* IFLA_GRE_ENCAP_DPORT */ nla_total_size(2) + + /* IFLA_GRE_FWMARK */ + nla_total_size(4) + 0; } @@ -1490,7 +1500,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) || nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || - nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags)) + nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) || + nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, @@ -1525,6 +1536,7 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = { [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, + [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, }; static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 75fac933c209..ad15d38b41e8 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1256,6 +1256,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) & IPV6_TCLASS_MASK; if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + else + fl6.flowi6_mark = t->parms.fwmark; } fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); @@ -1338,6 +1340,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + else + fl6.flowi6_mark = t->parms.fwmark; } fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); @@ -1467,6 +1471,7 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) t->parms.flowinfo = p->flowinfo; t->parms.link = p->link; t->parms.proto = p->proto; + t->parms.fwmark = p->fwmark; dst_cache_reset(&t->dst_cache); ip6_tnl_link_config(t); return 0; @@ -1918,6 +1923,9 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], if (data[IFLA_IPTUN_COLLECT_METADATA]) parms->collect_md = true; + + if (data[IFLA_IPTUN_FWMARK]) + parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); } static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[], @@ -2054,6 +2062,8 @@ static size_t ip6_tnl_get_size(const struct net_device *dev) nla_total_size(2) + /* IFLA_IPTUN_COLLECT_METADATA */ nla_total_size(0) + + /* IFLA_IPTUN_FWMARK */ + nla_total_size(4) + 0; } @@ -2069,7 +2079,8 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || - nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto)) + nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto) || + nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || @@ -2081,6 +2092,7 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) if (parm->collect_md) if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA)) goto nla_put_failure; + return 0; nla_put_failure: @@ -2109,6 +2121,7 @@ static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG }, + [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 }, }; static struct rtnl_link_ops ip6_link_ops __read_mostly = { diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 3d8a3b63b4fd..d67ef56454b2 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -657,6 +657,7 @@ vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) t->parms.i_key = p->i_key; t->parms.o_key = p->o_key; t->parms.proto = p->proto; + t->parms.fwmark = p->fwmark; dst_cache_reset(&t->dst_cache); vti6_link_config(t); return 0; @@ -933,6 +934,9 @@ static void vti6_netlink_parms(struct nlattr *data[], if (data[IFLA_VTI_OKEY]) parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]); + + if (data[IFLA_VTI_FWMARK]) + parms->fwmark = nla_get_u32(data[IFLA_VTI_FWMARK]); } static int vti6_newlink(struct net *src_net, struct net_device *dev, @@ -998,6 +1002,8 @@ static size_t vti6_get_size(const struct net_device *dev) nla_total_size(4) + /* IFLA_VTI_OKEY */ nla_total_size(4) + + /* IFLA_VTI_FWMARK */ + nla_total_size(4) + 0; } @@ -1010,7 +1016,8 @@ static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_in6_addr(skb, IFLA_VTI_LOCAL, &parm->laddr) || nla_put_in6_addr(skb, IFLA_VTI_REMOTE, &parm->raddr) || nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) || - nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key)) + nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key) || + nla_put_u32(skb, IFLA_VTI_FWMARK, parm->fwmark)) goto nla_put_failure; return 0; @@ -1024,6 +1031,7 @@ static const struct nla_policy vti6_policy[IFLA_VTI_MAX + 1] = { [IFLA_VTI_REMOTE] = { .len = sizeof(struct in6_addr) }, [IFLA_VTI_IKEY] = { .type = NLA_U32 }, [IFLA_VTI_OKEY] = { .type = NLA_U32 }, + [IFLA_VTI_FWMARK] = { .type = NLA_U32 }, }; static struct rtnl_link_ops vti6_link_ops __read_mostly = { -- cgit v1.2.3-71-gd317 From 7acf8a1e8a28b3d7407a8d8061a7d0766cfac2f4 Mon Sep 17 00:00:00 2001 From: Matthew Whitehead Date: Wed, 19 Apr 2017 12:37:10 -0400 Subject: Replace 2 jiffies with sysctl netdev_budget_usecs to enable softirq tuning Constants used for tuning are generally a bad idea, especially as hardware changes over time. Replace the constant 2 jiffies with sysctl variable netdev_budget_usecs to enable sysadmins to tune the softirq processing. Also document the variable. For example, a very fast machine might tune this to 1000 microseconds, while my regression testing 486DX-25 needs it to be 4000 microseconds on a nearly idle network to prevent time_squeeze from being incremented. Version 2: changed jiffies to microseconds for predictable units. Signed-off-by: Matthew Whitehead Signed-off-by: David S. Miller --- Documentation/sysctl/net.txt | 11 ++++++++++- include/linux/netdevice.h | 1 + include/uapi/linux/sysctl.h | 1 + kernel/sysctl_binary.c | 1 + net/core/dev.c | 4 +++- net/core/sysctl_net_core.c | 8 ++++++++ 6 files changed, 24 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index 2ebabc93014a..14db18c970b1 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -188,7 +188,16 @@ netdev_budget Maximum number of packets taken from all interfaces in one polling cycle (NAPI poll). In one polling cycle interfaces which are registered to polling are -probed in a round-robin manner. +probed in a round-robin manner. Also, a polling cycle may not exceed +netdev_budget_usecs microseconds, even if netdev_budget has not been +exhausted. + +netdev_budget_usecs +--------------------- + +Maximum number of microseconds in one NAPI polling cycle. Polling +will exit when either netdev_budget_usecs have elapsed during the +poll cycle or the number of packets processed reaches netdev_budget. netdev_max_backlog ------------------ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0f3c38ce5417..c49cf21f2b31 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3296,6 +3296,7 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev, void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern int netdev_budget; +extern unsigned int netdev_budget_usecs; /* Called by rtnetlink.c:rtnl_unlock() */ void netdev_run_todo(void); diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index e13d48058b8d..177f5f139b36 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -274,6 +274,7 @@ enum NET_CORE_AEVENT_ETIME=20, NET_CORE_AEVENT_RSEQTH=21, NET_CORE_WARNINGS=22, + NET_CORE_BUDGET_USECS=23, }; /* /proc/sys/net/ethernet */ diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index ece4b177052b..4ee3e49530d2 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -197,6 +197,7 @@ static const struct bin_table bin_net_core_table[] = { { CTL_INT, NET_CORE_AEVENT_ETIME, "xfrm_aevent_etime" }, { CTL_INT, NET_CORE_AEVENT_RSEQTH, "xfrm_aevent_rseqth" }, { CTL_INT, NET_CORE_WARNINGS, "warnings" }, + { CTL_INT, NET_CORE_BUDGET_USECS, "netdev_budget_usecs" }, {}, }; diff --git a/net/core/dev.c b/net/core/dev.c index 5d33e2baab2b..1c53c055b197 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3441,6 +3441,7 @@ EXPORT_SYMBOL(netdev_max_backlog); int netdev_tstamp_prequeue __read_mostly = 1; int netdev_budget __read_mostly = 300; +unsigned int __read_mostly netdev_budget_usecs = 2000; int weight_p __read_mostly = 64; /* old backlog weight */ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */ int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */ @@ -5307,7 +5308,8 @@ out_unlock: static __latent_entropy void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); - unsigned long time_limit = jiffies + 2; + unsigned long time_limit = jiffies + + usecs_to_jiffies(netdev_budget_usecs); int budget = netdev_budget; LIST_HEAD(list); LIST_HEAD(repoll); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 7f9cc400eca0..ea23254b2457 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -452,6 +452,14 @@ static struct ctl_table net_core_table[] = { .extra1 = &one, .extra2 = &max_skb_frags, }, + { + .procname = "netdev_budget_usecs", + .data = &netdev_budget_usecs, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + }, { } }; -- cgit v1.2.3-71-gd317 From b1d9fc41aab11f9520b2e0d57ae872e2ec5d6f32 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 19 Apr 2017 23:01:17 +0200 Subject: bpf: add napi_id read access to __sk_buff Add napi_id access to __sk_buff for socket filter program types, tc program types and other bpf_convert_ctx_access() users. Having access to skb->napi_id is useful for per RX queue listener siloing, f.e. in combination with SO_ATTACH_REUSEPORT_EBPF and when busy polling is used, meaning SO_REUSEPORT enabled listeners can then select the corresponding socket at SYN time already [1]. The skb is marked via skb_mark_napi_id() early in the receive path (e.g., napi_gro_receive()). Currently, sockets can only use SO_INCOMING_NAPI_ID from 6d4339028b35 ("net: Introduce SO_INCOMING_NAPI_ID") as a socket option to look up the NAPI ID associated with the queue for steering, which requires a prior sk_mark_napi_id() after the socket was looked up. Semantics for the __sk_buff napi_id access are similar, meaning if skb->napi_id is < MIN_NAPI_ID (e.g. outgoing packets using sender_cpu), then an invalid napi_id of 0 is returned to the program, otherwise a valid non-zero napi_id. [1] http://netdevconf.org/2.1/slides/apr6/dumazet-BUSY-POLLING-Netdev-2.1.pdf Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + net/core/filter.c | 14 ++++++++++++++ tools/include/uapi/linux/bpf.h | 1 + tools/testing/selftests/bpf/test_verifier.c | 3 +++ 4 files changed, 19 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1e062bb54eec..e553529929f6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -603,6 +603,7 @@ struct __sk_buff { __u32 tc_classid; __u32 data; __u32 data_end; + __u32 napi_id; }; struct bpf_tunnel_key { diff --git a/net/core/filter.c b/net/core/filter.c index 085925834727..9a37860a80fc 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -53,6 +53,7 @@ #include #include #include +#include /** * sk_filter_trim_cap - run a packet through a socket filter @@ -3201,6 +3202,19 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg); else *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); +#endif + break; + + case offsetof(struct __sk_buff, napi_id): +#if defined(CONFIG_NET_RX_BUSY_POLL) + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, napi_id) != 4); + + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sk_buff, napi_id)); + *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1); + *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); +#else + *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); #endif break; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1e062bb54eec..e553529929f6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -603,6 +603,7 @@ struct __sk_buff { __u32 tc_classid; __u32 data; __u32 data_end; + __u32 napi_id; }; struct bpf_tunnel_key { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 6178b65fee59..95a8d5f3ab80 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -772,6 +772,9 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, vlan_tci)), BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, napi_id)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), BPF_EXIT_INSN(), }, .result = ACCEPT, -- cgit v1.2.3-71-gd317 From 557c44be917c322860665be3d28376afa84aa936 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 19 Apr 2017 14:19:43 -0700 Subject: net: ipv6: RTF_PCPU should not be settable from userspace Andrey reported a fault in the IPv6 route code: kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Modules linked in: CPU: 1 PID: 4035 Comm: a.out Not tainted 4.11.0-rc7+ #250 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff880069809600 task.stack: ffff880062dc8000 RIP: 0010:ip6_rt_cache_alloc+0xa6/0x560 net/ipv6/route.c:975 RSP: 0018:ffff880062dced30 EFLAGS: 00010206 RAX: dffffc0000000000 RBX: ffff8800670561c0 RCX: 0000000000000006 RDX: 0000000000000003 RSI: ffff880062dcfb28 RDI: 0000000000000018 RBP: ffff880062dced68 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: ffff880062dcfb28 R14: dffffc0000000000 R15: 0000000000000000 FS: 00007feebe37e7c0(0000) GS:ffff88006cb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000205a0fe4 CR3: 000000006b5c9000 CR4: 00000000000006e0 Call Trace: ip6_pol_route+0x1512/0x1f20 net/ipv6/route.c:1128 ip6_pol_route_output+0x4c/0x60 net/ipv6/route.c:1212 ... Andrey's syzkaller program passes rtmsg.rtmsg_flags with the RTF_PCPU bit set. Flags passed to the kernel are blindly copied to the allocated rt6_info by ip6_route_info_create making a newly inserted route appear as though it is a per-cpu route. ip6_rt_cache_alloc sees the flag set and expects rt->dst.from to be set - which it is not since it is not really a per-cpu copy. The subsequent call to __ip6_dst_alloc then generates the fault. Fix by checking for the flag and failing with EINVAL. Fixes: d52d3997f843f ("ipv6: Create percpu rt6_info") Reported-by: Andrey Konovalov Signed-off-by: David Ahern Acked-by: Martin KaFai Lau Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- include/uapi/linux/ipv6_route.h | 2 +- net/ipv6/route.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h index 85bbb1799df3..d496c02e14bc 100644 --- a/include/uapi/linux/ipv6_route.h +++ b/include/uapi/linux/ipv6_route.h @@ -35,7 +35,7 @@ #define RTF_PREF(pref) ((pref) << 27) #define RTF_PREF_MASK 0x18000000 -#define RTF_PCPU 0x40000000 +#define RTF_PCPU 0x40000000 /* read-only: can not be set by user */ #define RTF_LOCAL 0x80000000 diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9db1418993f2..fb174b590fd3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1854,6 +1854,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) int addr_type; int err = -EINVAL; + /* RTF_PCPU is an internal flag; can not be set by userspace */ + if (cfg->fc_flags & RTF_PCPU) + goto out; + if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) goto out; #ifndef CONFIG_IPV6_SUBTREES -- cgit v1.2.3-71-gd317 From 1f4407e2548827e3e6e7b943640a2da90c611306 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 21 Apr 2017 15:59:52 -0400 Subject: net: Remove NET_CORE_BUDGET_USECS from sysctl binary interface. We are not supposed to add new entries to this thing any more. Thanks to Eric Dumazet for noticing this. Signed-off-by: David S. Miller --- include/uapi/linux/sysctl.h | 1 - kernel/sysctl_binary.c | 1 - 2 files changed, 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 177f5f139b36..e13d48058b8d 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -274,7 +274,6 @@ enum NET_CORE_AEVENT_ETIME=20, NET_CORE_AEVENT_RSEQTH=21, NET_CORE_WARNINGS=22, - NET_CORE_BUDGET_USECS=23, }; /* /proc/sys/net/ethernet */ diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 4ee3e49530d2..ece4b177052b 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -197,7 +197,6 @@ static const struct bin_table bin_net_core_table[] = { { CTL_INT, NET_CORE_AEVENT_ETIME, "xfrm_aevent_etime" }, { CTL_INT, NET_CORE_AEVENT_RSEQTH, "xfrm_aevent_rseqth" }, { CTL_INT, NET_CORE_WARNINGS, "warnings" }, - { CTL_INT, NET_CORE_BUDGET_USECS, "netdev_budget_usecs" }, {}, }; -- cgit v1.2.3-71-gd317 From f43e9b069aeaf0f3d51fa30ddc9c0003e86623b8 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 25 Sep 2016 13:52:44 +0300 Subject: net/devlink: Add E-Switch encapsulation control This is an e-switch global knob to enable HW support for applying encapsulation/decapsulation to VF traffic as part of SRIOV e-switch offloading. The actual encap/decap is carried out (along with the matching and other actions) per offloaded e-switch rules, e.g as done when offloading the TC tunnel key action. Signed-off-by: Roi Dayan Reviewed-by: Or Gerlitz Acked-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- include/net/devlink.h | 2 ++ include/uapi/linux/devlink.h | 7 +++++++ net/core/devlink.c | 26 +++++++++++++++++++++++--- 3 files changed, 32 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/net/devlink.h b/include/net/devlink.h index 24de13f8c94f..ed7687bbf5d0 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -268,6 +268,8 @@ struct devlink_ops { int (*eswitch_mode_set)(struct devlink *devlink, u16 mode); int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode); int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode); + int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode); + int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index b47bee277347..b0e807ac53bb 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -119,6 +119,11 @@ enum devlink_eswitch_inline_mode { DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT, }; +enum devlink_eswitch_encap_mode { + DEVLINK_ESWITCH_ENCAP_MODE_NONE, + DEVLINK_ESWITCH_ENCAP_MODE_BASIC, +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -195,6 +200,8 @@ enum devlink_attr { DEVLINK_ATTR_PAD, + DEVLINK_ATTR_ESWITCH_ENCAP_MODE, /* u8 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 0afac5800b57..b0b87a292e7c 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1397,10 +1397,10 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, u32 seq, int flags) { const struct devlink_ops *ops = devlink->ops; + u8 inline_mode, encap_mode; void *hdr; int err = 0; u16 mode; - u8 inline_mode; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) @@ -1429,6 +1429,15 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, goto nla_put_failure; } + if (ops->eswitch_encap_mode_get) { + err = ops->eswitch_encap_mode_get(devlink, &encap_mode); + if (err) + goto nla_put_failure; + err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, encap_mode); + if (err) + goto nla_put_failure; + } + genlmsg_end(msg, hdr); return 0; @@ -1468,9 +1477,9 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, { struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; - u16 mode; - u8 inline_mode; + u8 inline_mode, encap_mode; int err = 0; + u16 mode; if (!ops) return -EOPNOTSUPP; @@ -1493,6 +1502,16 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, if (err) return err; } + + if (info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) { + if (!ops->eswitch_encap_mode_set) + return -EOPNOTSUPP; + encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]); + err = ops->eswitch_encap_mode_set(devlink, encap_mode); + if (err) + return err; + } + return 0; } @@ -2190,6 +2209,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 }, [DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 }, [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 }, [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 }, }; -- cgit v1.2.3-71-gd317 From 531b374834c891ae2abf800693074df35a7d1a36 Mon Sep 17 00:00:00 2001 From: Gerard Garcia Date: Fri, 21 Apr 2017 10:10:44 +0100 Subject: VSOCK: Add vsockmon tap functions Add tap functions that can be used by the vsock transports to deliver packets to vsockmon virtual network devices. Signed-off-by: Gerard Garcia Signed-off-by: Stefan Hajnoczi Reviewed-by: Jorgen Hansen Signed-off-by: David S. Miller --- MAINTAINERS | 1 + include/net/af_vsock.h | 13 +++++ include/uapi/linux/if_arp.h | 1 + net/vmw_vsock/Makefile | 2 +- net/vmw_vsock/af_vsock_tap.c | 114 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 net/vmw_vsock/af_vsock_tap.c (limited to 'include/uapi') diff --git a/MAINTAINERS b/MAINTAINERS index b283d5ef7b68..fdab4f9e8ac9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13317,6 +13317,7 @@ L: netdev@vger.kernel.org S: Maintained F: include/linux/virtio_vsock.h F: include/uapi/linux/virtio_vsock.h +F: net/vmw_vsock/af_vsock_tap.c F: net/vmw_vsock/virtio_transport_common.c F: net/vmw_vsock/virtio_transport.c F: drivers/vhost/vsock.c diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index f32ed9ac181a..f9fb566e75cf 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -188,4 +188,17 @@ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, void vsock_remove_sock(struct vsock_sock *vsk); void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); +/**** TAP ****/ + +struct vsock_tap { + struct net_device *dev; + struct module *module; + struct list_head list; +}; + +int vsock_init_tap(void); +int vsock_add_tap(struct vsock_tap *vt); +int vsock_remove_tap(struct vsock_tap *vt); +void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque); + #endif /* __AF_VSOCK_H__ */ diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h index 4d024d75d64b..cf73510b9238 100644 --- a/include/uapi/linux/if_arp.h +++ b/include/uapi/linux/if_arp.h @@ -95,6 +95,7 @@ #define ARPHRD_IP6GRE 823 /* GRE over IPv6 */ #define ARPHRD_NETLINK 824 /* Netlink header */ #define ARPHRD_6LOWPAN 825 /* IPv6 over LoWPAN */ +#define ARPHRD_VSOCKMON 826 /* Vsock monitor header */ #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile index bc27c70e0e59..09fc2eb29dc8 100644 --- a/net/vmw_vsock/Makefile +++ b/net/vmw_vsock/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o -vsock-y += af_vsock.o vsock_addr.o +vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ vmci_transport_notify_qstate.o diff --git a/net/vmw_vsock/af_vsock_tap.c b/net/vmw_vsock/af_vsock_tap.c new file mode 100644 index 000000000000..98f09b539366 --- /dev/null +++ b/net/vmw_vsock/af_vsock_tap.c @@ -0,0 +1,114 @@ +/* + * Tap functions for AF_VSOCK sockets. + * + * Code based on net/netlink/af_netlink.c tap functions. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +static DEFINE_SPINLOCK(vsock_tap_lock); +static struct list_head vsock_tap_all __read_mostly = + LIST_HEAD_INIT(vsock_tap_all); + +int vsock_add_tap(struct vsock_tap *vt) +{ + if (unlikely(vt->dev->type != ARPHRD_VSOCKMON)) + return -EINVAL; + + __module_get(vt->module); + + spin_lock(&vsock_tap_lock); + list_add_rcu(&vt->list, &vsock_tap_all); + spin_unlock(&vsock_tap_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(vsock_add_tap); + +int vsock_remove_tap(struct vsock_tap *vt) +{ + struct vsock_tap *tmp; + bool found = false; + + spin_lock(&vsock_tap_lock); + + list_for_each_entry(tmp, &vsock_tap_all, list) { + if (vt == tmp) { + list_del_rcu(&vt->list); + found = true; + goto out; + } + } + + pr_warn("vsock_remove_tap: %p not found\n", vt); +out: + spin_unlock(&vsock_tap_lock); + + synchronize_net(); + + if (found) + module_put(vt->module); + + return found ? 0 : -ENODEV; +} +EXPORT_SYMBOL_GPL(vsock_remove_tap); + +static int __vsock_deliver_tap_skb(struct sk_buff *skb, + struct net_device *dev) +{ + int ret = 0; + struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + + if (nskb) { + dev_hold(dev); + + nskb->dev = dev; + ret = dev_queue_xmit(nskb); + if (unlikely(ret > 0)) + ret = net_xmit_errno(ret); + + dev_put(dev); + } + + return ret; +} + +static void __vsock_deliver_tap(struct sk_buff *skb) +{ + int ret; + struct vsock_tap *tmp; + + list_for_each_entry_rcu(tmp, &vsock_tap_all, list) { + ret = __vsock_deliver_tap_skb(skb, tmp->dev); + if (unlikely(ret)) + break; + } +} + +void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque) +{ + struct sk_buff *skb; + + rcu_read_lock(); + + if (likely(list_empty(&vsock_tap_all))) + goto out; + + skb = build_skb(opaque); + if (skb) { + __vsock_deliver_tap(skb); + consume_skb(skb); + } + +out: + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(vsock_deliver_tap); -- cgit v1.2.3-71-gd317 From 0b2e66448ba20eb30ea62345d6beb9ee2a1ce06b Mon Sep 17 00:00:00 2001 From: Gerard Garcia Date: Fri, 21 Apr 2017 10:10:45 +0100 Subject: VSOCK: Add vsockmon device Add vsockmon virtual network device that receives packets from the vsock transports and exposes them to user space. Based on the nlmon device. Signed-off-by: Gerard Garcia Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- MAINTAINERS | 2 + drivers/net/Kconfig | 8 ++ drivers/net/Makefile | 1 + drivers/net/vsockmon.c | 170 ++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/Kbuild | 1 + include/uapi/linux/vsockmon.h | 60 +++++++++++++++ 6 files changed, 242 insertions(+) create mode 100644 drivers/net/vsockmon.c create mode 100644 include/uapi/linux/vsockmon.h (limited to 'include/uapi') diff --git a/MAINTAINERS b/MAINTAINERS index fdab4f9e8ac9..28ea78b12d0c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13317,9 +13317,11 @@ L: netdev@vger.kernel.org S: Maintained F: include/linux/virtio_vsock.h F: include/uapi/linux/virtio_vsock.h +F: include/uapi/linux/vsockmon.h F: net/vmw_vsock/af_vsock_tap.c F: net/vmw_vsock/virtio_transport_common.c F: net/vmw_vsock/virtio_transport.c +F: drivers/net/vsockmon.c F: drivers/vhost/vsock.c F: drivers/vhost/vsock.h diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 100fbdc9b95c..83a1616903f8 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -355,6 +355,14 @@ config NET_VRF This option enables the support for mapping interfaces into VRF's. The support enables VRF devices. +config VSOCKMON + tristate "Virtual vsock monitoring device" + depends on VHOST_VSOCK + ---help--- + This option enables a monitoring net device for vsock sockets. It is + mostly intended for developers or support to debug vsock issues. If + unsure, say N. + endif # NET_CORE config SUNGEM_PHY diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 57fc47ad5ab3..b2f6556d8848 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_GENEVE) += geneve.o obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o +obj-$(CONFIG_VSOCKMON) += vsockmon.o # # Networking Drivers diff --git a/drivers/net/vsockmon.c b/drivers/net/vsockmon.c new file mode 100644 index 000000000000..7f0136f2dd9d --- /dev/null +++ b/drivers/net/vsockmon.c @@ -0,0 +1,170 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +/* Virtio transport max packet size plus header */ +#define DEFAULT_MTU (VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + \ + sizeof(struct af_vsockmon_hdr)) + +struct pcpu_lstats { + u64 rx_packets; + u64 rx_bytes; + struct u64_stats_sync syncp; +}; + +static int vsockmon_dev_init(struct net_device *dev) +{ + dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); + if (!dev->lstats) + return -ENOMEM; + return 0; +} + +static void vsockmon_dev_uninit(struct net_device *dev) +{ + free_percpu(dev->lstats); +} + +struct vsockmon { + struct vsock_tap vt; +}; + +static int vsockmon_open(struct net_device *dev) +{ + struct vsockmon *vsockmon = netdev_priv(dev); + + vsockmon->vt.dev = dev; + vsockmon->vt.module = THIS_MODULE; + return vsock_add_tap(&vsockmon->vt); +} + +static int vsockmon_close(struct net_device *dev) +{ + struct vsockmon *vsockmon = netdev_priv(dev); + + return vsock_remove_tap(&vsockmon->vt); +} + +static netdev_tx_t vsockmon_xmit(struct sk_buff *skb, struct net_device *dev) +{ + int len = skb->len; + struct pcpu_lstats *stats = this_cpu_ptr(dev->lstats); + + u64_stats_update_begin(&stats->syncp); + stats->rx_bytes += len; + stats->rx_packets++; + u64_stats_update_end(&stats->syncp); + + dev_kfree_skb(skb); + + return NETDEV_TX_OK; +} + +static void +vsockmon_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + int i; + u64 bytes = 0, packets = 0; + + for_each_possible_cpu(i) { + const struct pcpu_lstats *vstats; + u64 tbytes, tpackets; + unsigned int start; + + vstats = per_cpu_ptr(dev->lstats, i); + + do { + start = u64_stats_fetch_begin_irq(&vstats->syncp); + tbytes = vstats->rx_bytes; + tpackets = vstats->rx_packets; + } while (u64_stats_fetch_retry_irq(&vstats->syncp, start)); + + packets += tpackets; + bytes += tbytes; + } + + stats->rx_packets = packets; + stats->tx_packets = 0; + + stats->rx_bytes = bytes; + stats->tx_bytes = 0; +} + +static int vsockmon_is_valid_mtu(int new_mtu) +{ + return new_mtu >= (int)sizeof(struct af_vsockmon_hdr); +} + +static int vsockmon_change_mtu(struct net_device *dev, int new_mtu) +{ + if (!vsockmon_is_valid_mtu(new_mtu)) + return -EINVAL; + + dev->mtu = new_mtu; + return 0; +} + +static const struct net_device_ops vsockmon_ops = { + .ndo_init = vsockmon_dev_init, + .ndo_uninit = vsockmon_dev_uninit, + .ndo_open = vsockmon_open, + .ndo_stop = vsockmon_close, + .ndo_start_xmit = vsockmon_xmit, + .ndo_get_stats64 = vsockmon_get_stats64, + .ndo_change_mtu = vsockmon_change_mtu, +}; + +static u32 always_on(struct net_device *dev) +{ + return 1; +} + +static const struct ethtool_ops vsockmon_ethtool_ops = { + .get_link = always_on, +}; + +static void vsockmon_setup(struct net_device *dev) +{ + dev->type = ARPHRD_VSOCKMON; + dev->priv_flags |= IFF_NO_QUEUE; + + dev->netdev_ops = &vsockmon_ops; + dev->ethtool_ops = &vsockmon_ethtool_ops; + dev->destructor = free_netdev; + + dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | + NETIF_F_HIGHDMA | NETIF_F_LLTX; + + dev->flags = IFF_NOARP; + + dev->mtu = DEFAULT_MTU; +} + +static struct rtnl_link_ops vsockmon_link_ops __read_mostly = { + .kind = "vsockmon", + .priv_size = sizeof(struct vsockmon), + .setup = vsockmon_setup, +}; + +static __init int vsockmon_register(void) +{ + return rtnl_link_register(&vsockmon_link_ops); +} + +static __exit void vsockmon_unregister(void) +{ + rtnl_link_unregister(&vsockmon_link_ops); +} + +module_init(vsockmon_register); +module_exit(vsockmon_unregister); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Gerard Garcia "); +MODULE_DESCRIPTION("Vsock monitoring device. Based on nlmon device."); +MODULE_ALIAS_RTNL_LINK("vsockmon"); diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index f8d9fed17ba9..6b0e2758585f 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -477,6 +477,7 @@ header-y += virtio_types.h header-y += virtio_vsock.h header-y += virtio_crypto.h header-y += vm_sockets.h +header-y += vsockmon.h header-y += vt.h header-y += vtpm_proxy.h header-y += wait.h diff --git a/include/uapi/linux/vsockmon.h b/include/uapi/linux/vsockmon.h new file mode 100644 index 000000000000..a08b522ef597 --- /dev/null +++ b/include/uapi/linux/vsockmon.h @@ -0,0 +1,60 @@ +#ifndef _UAPI_VSOCKMON_H +#define _UAPI_VSOCKMON_H + +#include + +/* + * vsockmon is the AF_VSOCK packet capture device. Packets captured have the + * following layout: + * + * +-----------------------------------+ + * | vsockmon header | + * | (struct af_vsockmon_hdr) | + * +-----------------------------------+ + * | transport header | + * | (af_vsockmon_hdr->len bytes long) | + * +-----------------------------------+ + * | payload | + * | (until end of packet) | + * +-----------------------------------+ + * + * The vsockmon header is a transport-independent description of the packet. + * It duplicates some of the information from the transport header so that + * no transport-specific knowledge is necessary to process packets. + * + * The transport header is useful for low-level transport-specific packet + * analysis. Transport type is given in af_vsockmon_hdr->transport and + * transport header length is given in af_vsockmon_hdr->len. + * + * If af_vsockmon_hdr->op is AF_VSOCK_OP_PAYLOAD then the payload follows the + * transport header. Other ops do not have a payload. + */ + +struct af_vsockmon_hdr { + __le64 src_cid; + __le64 dst_cid; + __le32 src_port; + __le32 dst_port; + __le16 op; /* enum af_vsockmon_op */ + __le16 transport; /* enum af_vsockmon_transport */ + __le16 len; /* Transport header length */ + __u8 reserved[2]; +}; + +enum af_vsockmon_op { + AF_VSOCK_OP_UNKNOWN = 0, + AF_VSOCK_OP_CONNECT = 1, + AF_VSOCK_OP_DISCONNECT = 2, + AF_VSOCK_OP_CONTROL = 3, + AF_VSOCK_OP_PAYLOAD = 4, +}; + +enum af_vsockmon_transport { + AF_VSOCK_TRANSPORT_UNKNOWN = 0, + AF_VSOCK_TRANSPORT_NO_INFO = 1, /* No transport information */ + + /* Transport header type: struct virtio_vsock_hdr */ + AF_VSOCK_TRANSPORT_VIRTIO = 2, +}; + +#endif -- cgit v1.2.3-71-gd317 From 4a69a864209e9ab436d4a58e8028ac96cc873d15 Mon Sep 17 00:00:00 2001 From: Mike Maloney Date: Fri, 21 Apr 2017 10:56:11 -0400 Subject: packet: add PACKET_FANOUT_FLAG_UNIQUEID to assign new fanout group id. Fanout uses a per net global namespace. A process that intends to create a new fanout group can accidentally join an existing group. It is not possible to detect this. Add socket option PACKET_FANOUT_FLAG_UNIQUEID. When specified the supplied fanout group id must be set to 0, and the kernel chooses an id that is not already in use. This is an ephemeral flag so that other sockets can be added to this group using setsockopt, but NOT specifying this flag. The current getsockopt(..., PACKET_FANOUT, ...) can be used to retrieve the new group id. We assume that there are not a lot of fanout groups and that this is not a high frequency call. The method assigns ids starting at zero and increases until it finds an unused id. It keeps track of the last assigned id, and uses it as a starting point to find new ids. Signed-off-by: Mike Maloney Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/if_packet.h | 1 + net/packet/af_packet.c | 44 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 9e7edfd8141e..4df96a7dd4fa 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -66,6 +66,7 @@ struct sockaddr_ll { #define PACKET_FANOUT_CBPF 6 #define PACKET_FANOUT_EBPF 7 #define PACKET_FANOUT_FLAG_ROLLOVER 0x1000 +#define PACKET_FANOUT_FLAG_UNIQUEID 0x2000 #define PACKET_FANOUT_FLAG_DEFRAG 0x8000 struct tpacket_stats { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8489beff5c25..94052f42058b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1496,6 +1496,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, DEFINE_MUTEX(fanout_mutex); EXPORT_SYMBOL_GPL(fanout_mutex); static LIST_HEAD(fanout_list); +static u16 fanout_next_id; static void __fanout_link(struct sock *sk, struct packet_sock *po) { @@ -1629,6 +1630,36 @@ static void fanout_release_data(struct packet_fanout *f) }; } +static bool __fanout_id_is_free(struct sock *sk, u16 candidate_id) +{ + struct packet_fanout *f; + + list_for_each_entry(f, &fanout_list, list) { + if (f->id == candidate_id && + read_pnet(&f->net) == sock_net(sk)) { + return false; + } + } + return true; +} + +static bool fanout_find_new_id(struct sock *sk, u16 *new_id) +{ + u16 id = fanout_next_id; + + do { + if (__fanout_id_is_free(sk, id)) { + *new_id = id; + fanout_next_id = id + 1; + return true; + } + + id++; + } while (id != fanout_next_id); + + return false; +} + static int fanout_add(struct sock *sk, u16 id, u16 type_flags) { struct packet_rollover *rollover = NULL; @@ -1676,6 +1707,19 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) po->rollover = rollover; } + if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) { + if (id != 0) { + err = -EINVAL; + goto out; + } + if (!fanout_find_new_id(sk, &id)) { + err = -ENOMEM; + goto out; + } + /* ephemeral flag for the first socket in the group: drop it */ + flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8); + } + match = NULL; list_for_each_entry(f, &fanout_list, list) { if (f->id == id && -- cgit v1.2.3-71-gd317 From 120645513f55a4ac5543120d9e79925d30a0156f Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 21 Apr 2017 16:48:06 -0700 Subject: openvswitch: Add eventmask support to CT action. Add a new optional conntrack action attribute OVS_CT_ATTR_EVENTMASK, which can be used in conjunction with the commit flag (OVS_CT_ATTR_COMMIT) to set the mask of bits specifying which conntrack events (IPCT_*) should be delivered via the Netfilter netlink multicast groups. Default behavior depends on the system configuration, but typically a lot of events are delivered. This can be very chatty for the NFNLGRP_CONNTRACK_UPDATE group, even if only some types of events are of interest. Netfilter core init_conntrack() adds the event cache extension, so we only need to set the ctmask value. However, if the system is configured without support for events, the setting will be skipped due to extension not being found. Signed-off-by: Jarno Rajahalme Reviewed-by: Greg Rose Acked-by: Joe Stringer Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 12 ++++++++++++ net/openvswitch/conntrack.c | 27 +++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 66d1c3ccfd8e..61b7d36dfe34 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -693,6 +693,17 @@ struct ovs_action_hash { * nothing if the connection is already committed will check that the current * packet is in conntrack entry's original direction. If directionality does * not match, will delete the existing conntrack entry and commit a new one. + * @OVS_CT_ATTR_EVENTMASK: Mask of bits indicating which conntrack event types + * (enum ip_conntrack_events IPCT_*) should be reported. For any bit set to + * zero, the corresponding event type is not generated. Default behavior + * depends on system configuration, but typically all event types are + * generated, hence listening on NFNLGRP_CONNTRACK_UPDATE events may get a lot + * of events. Explicitly passing this attribute allows limiting the updates + * received to the events of interest. The bit 1 << IPCT_NEW, 1 << + * IPCT_RELATED, and 1 << IPCT_DESTROY must be set to ones for those events to + * be received on NFNLGRP_CONNTRACK_NEW and NFNLGRP_CONNTRACK_DESTROY groups, + * respectively. Remaining bits control the changes for which an event is + * delivered on the NFNLGRP_CONNTRACK_UPDATE group. */ enum ovs_ct_attr { OVS_CT_ATTR_UNSPEC, @@ -704,6 +715,7 @@ enum ovs_ct_attr { related connections. */ OVS_CT_ATTR_NAT, /* Nested OVS_NAT_ATTR_* */ OVS_CT_ATTR_FORCE_COMMIT, /* No argument */ + OVS_CT_ATTR_EVENTMASK, /* u32 mask of IPCT_* events. */ __OVS_CT_ATTR_MAX }; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 58de4c2da673..4f7c3b5c080b 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -66,7 +66,9 @@ struct ovs_conntrack_info { u8 commit : 1; u8 nat : 3; /* enum ovs_ct_nat */ u8 force : 1; + u8 have_eventmask : 1; u16 family; + u32 eventmask; /* Mask of 1 << IPCT_*. */ struct md_mark mark; struct md_labels labels; #ifdef CONFIG_NF_NAT_NEEDED @@ -1007,6 +1009,20 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, if (!ct) return 0; + /* Set the conntrack event mask if given. NEW and DELETE events have + * their own groups, but the NFNLGRP_CONNTRACK_UPDATE group listener + * typically would receive many kinds of updates. Setting the event + * mask allows those events to be filtered. The set event mask will + * remain in effect for the lifetime of the connection unless changed + * by a further CT action with both the commit flag and the eventmask + * option. */ + if (info->have_eventmask) { + struct nf_conntrack_ecache *cache = nf_ct_ecache_find(ct); + + if (cache) + cache->ctmask = info->eventmask; + } + /* Apply changes before confirming the connection so that the initial * conntrack NEW netlink event carries the values given in the CT * action. @@ -1238,6 +1254,8 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { /* NAT length is checked when parsing the nested attributes. */ [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX }, #endif + [OVS_CT_ATTR_EVENTMASK] = { .minlen = sizeof(u32), + .maxlen = sizeof(u32) }, }; static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, @@ -1316,6 +1334,11 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, break; } #endif + case OVS_CT_ATTR_EVENTMASK: + info->have_eventmask = true; + info->eventmask = nla_get_u32(a); + break; + default: OVS_NLERR(log, "Unknown conntrack attr (%d)", type); @@ -1515,6 +1538,10 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, ct_info->helper->name)) return -EMSGSIZE; } + if (ct_info->have_eventmask && + nla_put_u32(skb, OVS_CT_ATTR_EVENTMASK, ct_info->eventmask)) + return -EMSGSIZE; + #ifdef CONFIG_NF_NAT_NEEDED if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb)) return -EMSGSIZE; -- cgit v1.2.3-71-gd317 From 53b56da83d7899de375a9de153fd7f5397de85e6 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 17 Apr 2017 21:18:57 +0800 Subject: netfilter: ctnetlink: make it safer when updating ct->status After converting to use rcu for conntrack hash, one CPU may update the ct->status via ctnetlink, while another CPU may process the packets and update the ct->status. So the non-atomic operation "ct->status |= status;" via ctnetlink becomes unsafe, and this may clear the IPS_DYING_BIT bit set by another CPU unexpectedly. For example: CPU0 CPU1 ctnetlink_change_status __nf_conntrack_find_get old = ct->status nf_ct_gc_expired - nf_ct_kill - test_and_set_bit(IPS_DYING_BIT new = old | status; - ct->status = new; <-- oops, _DYING_ is cleared! Now using a series of atomic bit operation to solve the above issue. Also note, user shouldn't set IPS_TEMPLATE, IPS_SEQ_ADJUST directly, so make these two bits be unchangable too. If we set the IPS_TEMPLATE_BIT, ct will be freed by nf_ct_tmpl_free, but actually it is alloced by nf_conntrack_alloc. If we set the IPS_SEQ_ADJUST_BIT, this may cause the NULL pointer deference, as the nfct_seqadj(ct) maybe NULL. Last, add some comments to describe the logic change due to the commit a963d710f367 ("netfilter: ctnetlink: Fix regression in CTA_STATUS processing"), which makes me feel a little confusing. Fixes: 76507f69c44e ("[NETFILTER]: nf_conntrack: use RCU for conntrack hash") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 13 ++++++--- net/netfilter/nf_conntrack_netlink.c | 33 ++++++++++++++++------ 2 files changed, 33 insertions(+), 13 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 6a8e33dd4ecb..38fc383139f0 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -82,10 +82,6 @@ enum ip_conntrack_status { IPS_DYING_BIT = 9, IPS_DYING = (1 << IPS_DYING_BIT), - /* Bits that cannot be altered from userland. */ - IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | - IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING), - /* Connection has fixed timeout. */ IPS_FIXED_TIMEOUT_BIT = 10, IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), @@ -101,6 +97,15 @@ enum ip_conntrack_status { /* Conntrack got a helper explicitly attached via CT target. */ IPS_HELPER_BIT = 13, IPS_HELPER = (1 << IPS_HELPER_BIT), + + /* Be careful here, modifying these bits can make things messy, + * so don't let users modify them directly. + */ + IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | + IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | + IPS_SEQ_ADJUST | IPS_TEMPLATE), + + __IPS_MAX_BIT = 14, }; /* Connection tracking event types */ diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index e5f97777b1f4..86deed6a8db4 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1419,6 +1419,24 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, } #endif +static void +__ctnetlink_change_status(struct nf_conn *ct, unsigned long on, + unsigned long off) +{ + unsigned int bit; + + /* Ignore these unchangable bits */ + on &= ~IPS_UNCHANGEABLE_MASK; + off &= ~IPS_UNCHANGEABLE_MASK; + + for (bit = 0; bit < __IPS_MAX_BIT; bit++) { + if (on & (1 << bit)) + set_bit(bit, &ct->status); + else if (off & (1 << bit)) + clear_bit(bit, &ct->status); + } +} + static int ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) { @@ -1438,10 +1456,7 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) /* ASSURED bit can only be set */ return -EBUSY; - /* Be careful here, modifying NAT bits can screw up things, - * so don't let users modify them directly if they don't pass - * nf_nat_range. */ - ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK); + __ctnetlink_change_status(ct, status, 0); return 0; } @@ -1628,7 +1643,7 @@ ctnetlink_change_seq_adj(struct nf_conn *ct, if (ret < 0) return ret; - ct->status |= IPS_SEQ_ADJUST; + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); } if (cda[CTA_SEQ_ADJ_REPLY]) { @@ -1637,7 +1652,7 @@ ctnetlink_change_seq_adj(struct nf_conn *ct, if (ret < 0) return ret; - ct->status |= IPS_SEQ_ADJUST; + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); } return 0; @@ -2289,10 +2304,10 @@ ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[]) /* This check is less strict than ctnetlink_change_status() * because callers often flip IPS_EXPECTED bits when sending * an NFQA_CT attribute to the kernel. So ignore the - * unchangeable bits but do not error out. + * unchangeable bits but do not error out. Also user programs + * are allowed to clear the bits that they are allowed to change. */ - ct->status = (status & ~IPS_UNCHANGEABLE_MASK) | - (ct->status & IPS_UNCHANGEABLE_MASK); + __ctnetlink_change_status(ct, status, ~status); return 0; } -- cgit v1.2.3-71-gd317 From 46c2fa39877ed70415ee2b1acfb9129e956f6de4 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 20 Apr 2017 14:45:47 -0700 Subject: net/tcp_fastopen: Add snmp counter for blackhole detection This counter records the number of times the firewall blackhole issue is detected and active TFO is disabled. Signed-off-by: Wei Wang Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- include/uapi/linux/snmp.h | 1 + net/ipv4/proc.c | 1 + net/ipv4/tcp_fastopen.c | 5 +++-- net/ipv4/tcp_input.c | 4 ++-- 5 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/net/tcp.h b/include/net/tcp.h index c1abc2abbdcb..da28bef1d82b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1507,7 +1507,7 @@ struct tcp_fastopen_context { }; extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; -void tcp_fastopen_active_disable(void); +void tcp_fastopen_active_disable(struct sock *sk); bool tcp_fastopen_active_should_disable(struct sock *sk); void tcp_fastopen_active_disable_ofo_check(struct sock *sk); void tcp_fastopen_active_timeout_reset(void); diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index cec0e171d20c..95cffcb21dfd 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -259,6 +259,7 @@ enum LINUX_MIB_TCPFASTOPENPASSIVEFAIL, /* TCPFastOpenPassiveFail */ LINUX_MIB_TCPFASTOPENLISTENOVERFLOW, /* TCPFastOpenListenOverflow */ LINUX_MIB_TCPFASTOPENCOOKIEREQD, /* TCPFastOpenCookieReqd */ + LINUX_MIB_TCPFASTOPENBLACKHOLE, /* TCPFastOpenBlackholeDetect */ LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES, /* TCPSpuriousRtxHostQueues */ LINUX_MIB_BUSYPOLLRXPACKETS, /* BusyPollRxPackets */ LINUX_MIB_TCPAUTOCORKING, /* TCPAutoCorking */ diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 4ccbf464d1ac..fa44e752a9a3 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -281,6 +281,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL), SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), + SNMP_MIB_ITEM("TCPFastOpenBlackhole", LINUX_MIB_TCPFASTOPENBLACKHOLE), SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS), SNMP_MIB_ITEM("TCPAutoCorking", LINUX_MIB_TCPAUTOCORKING), diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index ff2d30ffc6f3..4af82b914dd4 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -410,10 +410,11 @@ static unsigned long tfo_active_disable_stamp __read_mostly; /* Disable active TFO and record current jiffies and * tfo_active_disable_times */ -void tcp_fastopen_active_disable(void) +void tcp_fastopen_active_disable(struct sock *sk) { atomic_inc(&tfo_active_disable_times); tfo_active_disable_stamp = jiffies; + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE); } /* Reset tfo_active_disable_times to 0 */ @@ -469,7 +470,7 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk) if (p && !rb_next(p)) { skb = rb_entry(p, struct sk_buff, rbnode); if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { - tcp_fastopen_active_disable(); + tcp_fastopen_active_disable(sk); return; } } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9f342a67dc74..5af2f04f8859 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5307,7 +5307,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, */ if (tp->syn_fastopen && !tp->data_segs_in && sk->sk_state == TCP_ESTABLISHED) - tcp_fastopen_active_disable(); + tcp_fastopen_active_disable(sk); tcp_send_challenge_ack(sk, skb); } goto discard; @@ -6061,7 +6061,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { /* Receive out of order FIN after close() */ if (tp->syn_fastopen && th->fin) - tcp_fastopen_active_disable(); + tcp_fastopen_active_disable(sk); tcp_done(sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); return 1; -- cgit v1.2.3-71-gd317 From a577d8f793ff2fd514915686079e3c09bcf0df11 Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Sat, 22 Apr 2017 16:52:47 -0400 Subject: cls_flower: add support for matching MPLS fields (v2) Add support to the tc flower classifier to match based on fields in MPLS labels (TTL, Bottom of Stack, TC field, Label). Signed-off-by: Benjamin LaHaise Signed-off-by: Benjamin LaHaise Reviewed-by: Jakub Kicinski Cc: "David S. Miller" Cc: Simon Horman Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Cc: Eric Dumazet Cc: Hadar Hen Zion Cc: Gao Feng Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 5 +++ net/sched/cls_flower.c | 74 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 7a69f2a4ca0c..f1129e383b2a 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -432,6 +432,11 @@ enum { TCA_FLOWER_KEY_ARP_THA, /* ETH_ALEN */ TCA_FLOWER_KEY_ARP_THA_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_MPLS_TTL, /* u8 - 8 bits */ + TCA_FLOWER_KEY_MPLS_BOS, /* u8 - 1 bit */ + TCA_FLOWER_KEY_MPLS_TC, /* u8 - 3 bits */ + TCA_FLOWER_KEY_MPLS_LABEL, /* be32 - 20 bits */ + __TCA_FLOWER_MAX, }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 31ee3404aeb4..3ecf07666df3 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -47,6 +48,7 @@ struct fl_flow_key { struct flow_dissector_key_ipv6_addrs enc_ipv6; }; struct flow_dissector_key_ports enc_tp; + struct flow_dissector_key_mpls mpls; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -418,6 +420,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_ARP_SHA_MASK] = { .len = ETH_ALEN }, [TCA_FLOWER_KEY_ARP_THA] = { .len = ETH_ALEN }, [TCA_FLOWER_KEY_ARP_THA_MASK] = { .len = ETH_ALEN }, + [TCA_FLOWER_KEY_MPLS_TTL] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_MPLS_BOS] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_MPLS_TC] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_MPLS_LABEL] = { .type = NLA_U32 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -433,6 +439,31 @@ static void fl_set_key_val(struct nlattr **tb, memcpy(mask, nla_data(tb[mask_type]), len); } +static void fl_set_key_mpls(struct nlattr **tb, + struct flow_dissector_key_mpls *key_val, + struct flow_dissector_key_mpls *key_mask) +{ + if (tb[TCA_FLOWER_KEY_MPLS_TTL]) { + key_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]); + key_mask->mpls_ttl = MPLS_TTL_MASK; + } + if (tb[TCA_FLOWER_KEY_MPLS_BOS]) { + key_val->mpls_bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_BOS]); + key_mask->mpls_bos = MPLS_BOS_MASK; + } + if (tb[TCA_FLOWER_KEY_MPLS_TC]) { + key_val->mpls_tc = + nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TC]) & MPLS_TC_MASK; + key_mask->mpls_tc = MPLS_TC_MASK; + } + if (tb[TCA_FLOWER_KEY_MPLS_LABEL]) { + key_val->mpls_label = + nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_LABEL]) & + MPLS_LABEL_MASK; + key_mask->mpls_label = MPLS_LABEL_MASK; + } +} + static void fl_set_key_vlan(struct nlattr **tb, struct flow_dissector_key_vlan *key_val, struct flow_dissector_key_vlan *key_mask) @@ -589,6 +620,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb, &mask->icmp.code, TCA_FLOWER_KEY_ICMPV6_CODE_MASK, sizeof(key->icmp.code)); + } else if (key->basic.n_proto == htons(ETH_P_MPLS_UC) || + key->basic.n_proto == htons(ETH_P_MPLS_MC)) { + fl_set_key_mpls(tb, &key->mpls, &mask->mpls); } else if (key->basic.n_proto == htons(ETH_P_ARP) || key->basic.n_proto == htons(ETH_P_RARP)) { fl_set_key_val(tb, &key->arp.sip, TCA_FLOWER_KEY_ARP_SIP, @@ -724,6 +758,8 @@ static void fl_init_dissector(struct cls_fl_head *head, FLOW_DISSECTOR_KEY_ICMP, icmp); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_ARP, arp); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_MPLS, mpls); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_VLAN, vlan); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, @@ -991,6 +1027,41 @@ static int fl_dump_key_val(struct sk_buff *skb, return 0; } +static int fl_dump_key_mpls(struct sk_buff *skb, + struct flow_dissector_key_mpls *mpls_key, + struct flow_dissector_key_mpls *mpls_mask) +{ + int err; + + if (!memchr_inv(mpls_mask, 0, sizeof(*mpls_mask))) + return 0; + if (mpls_mask->mpls_ttl) { + err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TTL, + mpls_key->mpls_ttl); + if (err) + return err; + } + if (mpls_mask->mpls_tc) { + err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TC, + mpls_key->mpls_tc); + if (err) + return err; + } + if (mpls_mask->mpls_label) { + err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_LABEL, + mpls_key->mpls_label); + if (err) + return err; + } + if (mpls_mask->mpls_bos) { + err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_BOS, + mpls_key->mpls_bos); + if (err) + return err; + } + return 0; +} + static int fl_dump_key_vlan(struct sk_buff *skb, struct flow_dissector_key_vlan *vlan_key, struct flow_dissector_key_vlan *vlan_mask) @@ -1096,6 +1167,9 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, sizeof(key->basic.n_proto))) goto nla_put_failure; + if (fl_dump_key_mpls(skb, &key->mpls, &mask->mpls)) + goto nla_put_failure; + if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan)) goto nla_put_failure; -- cgit v1.2.3-71-gd317 From a8f820a380a2a06fc4fe1a54159067958f800929 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 25 Apr 2017 08:19:44 +0200 Subject: can: add Virtual CAN Tunnel driver (vxcan) Similar to the virtual ethernet driver veth, vxcan implements a local CAN traffic tunnel between two virtual CAN network devices. See Kconfig entry for details. Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- drivers/net/can/Kconfig | 18 +++ drivers/net/can/Makefile | 1 + drivers/net/can/vxcan.c | 316 +++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/can/vxcan.h | 12 ++ 4 files changed, 347 insertions(+) create mode 100644 drivers/net/can/vxcan.c create mode 100644 include/uapi/linux/can/vxcan.h (limited to 'include/uapi') diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index aa20b69d2a1a..ac4ff394bc56 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -9,6 +9,24 @@ config CAN_VCAN This driver can also be built as a module. If so, the module will be called vcan. +config CAN_VXCAN + tristate "Virtual CAN Tunnel (vxcan)" + ---help--- + Similar to the virtual ethernet driver veth, vxcan implements a + local CAN traffic tunnel between two virtual CAN network devices. + When creating a vxcan, two vxcan devices are created as pair. + When one end receives the packet it appears on its pair and vice + versa. The vxcan can be used for cross namespace communication. + + In opposite to vcan loopback devices the vxcan only forwards CAN + frames to its pair and does *not* provide a local echo of sent + CAN frames. To disable a potential echo in af_can.c the vxcan driver + announces IFF_ECHO in the interface flags. To have a clean start + in each namespace the CAN GW hop counter is set to zero. + + This driver can also be built as a module. If so, the module + will be called vxcan. + config CAN_SLCAN tristate "Serial / USB serial CAN Adaptors (slcan)" depends on TTY diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile index 8581e2b3e87f..4aabbee133b8 100644 --- a/drivers/net/can/Makefile +++ b/drivers/net/can/Makefile @@ -3,6 +3,7 @@ # obj-$(CONFIG_CAN_VCAN) += vcan.o +obj-$(CONFIG_CAN_VXCAN) += vxcan.o obj-$(CONFIG_CAN_SLCAN) += slcan.o obj-$(CONFIG_CAN_DEV) += can-dev.o diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c new file mode 100644 index 000000000000..7fbb24795681 --- /dev/null +++ b/drivers/net/can/vxcan.c @@ -0,0 +1,316 @@ +/* + * vxcan.c - Virtual CAN Tunnel for cross namespace communication + * + * This code is derived from drivers/net/can/vcan.c for the virtual CAN + * specific parts and from drivers/net/veth.c to implement the netlink API + * for network interface pairs in a common and established way. + * + * Copyright (c) 2017 Oliver Hartkopp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the version 2 of the GNU General Public License + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "vxcan" + +MODULE_DESCRIPTION("Virtual CAN Tunnel"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Oliver Hartkopp "); +MODULE_ALIAS_RTNL_LINK(DRV_NAME); + +struct vxcan_priv { + struct net_device __rcu *peer; +}; + +static netdev_tx_t vxcan_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct vxcan_priv *priv = netdev_priv(dev); + struct net_device *peer; + struct canfd_frame *cfd = (struct canfd_frame *)skb->data; + struct net_device_stats *peerstats, *srcstats = &dev->stats; + + if (can_dropped_invalid_skb(dev, skb)) + return NETDEV_TX_OK; + + rcu_read_lock(); + peer = rcu_dereference(priv->peer); + if (unlikely(!peer)) { + kfree_skb(skb); + dev->stats.tx_dropped++; + goto out_unlock; + } + + skb = can_create_echo_skb(skb); + if (!skb) + goto out_unlock; + + /* reset CAN GW hop counter */ + skb->csum_start = 0; + skb->pkt_type = PACKET_BROADCAST; + skb->dev = peer; + skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (netif_rx_ni(skb) == NET_RX_SUCCESS) { + srcstats->tx_packets++; + srcstats->tx_bytes += cfd->len; + peerstats = &peer->stats; + peerstats->rx_packets++; + peerstats->rx_bytes += cfd->len; + } + +out_unlock: + rcu_read_unlock(); + return NETDEV_TX_OK; +} + + +static int vxcan_open(struct net_device *dev) +{ + struct vxcan_priv *priv = netdev_priv(dev); + struct net_device *peer = rtnl_dereference(priv->peer); + + if (!peer) + return -ENOTCONN; + + if (peer->flags & IFF_UP) { + netif_carrier_on(dev); + netif_carrier_on(peer); + } + return 0; +} + +static int vxcan_close(struct net_device *dev) +{ + struct vxcan_priv *priv = netdev_priv(dev); + struct net_device *peer = rtnl_dereference(priv->peer); + + netif_carrier_off(dev); + if (peer) + netif_carrier_off(peer); + + return 0; +} + +static int vxcan_get_iflink(const struct net_device *dev) +{ + struct vxcan_priv *priv = netdev_priv(dev); + struct net_device *peer; + int iflink; + + rcu_read_lock(); + peer = rcu_dereference(priv->peer); + iflink = peer ? peer->ifindex : 0; + rcu_read_unlock(); + + return iflink; +} + +static int vxcan_change_mtu(struct net_device *dev, int new_mtu) +{ + /* Do not allow changing the MTU while running */ + if (dev->flags & IFF_UP) + return -EBUSY; + + if (new_mtu != CAN_MTU && new_mtu != CANFD_MTU) + return -EINVAL; + + dev->mtu = new_mtu; + return 0; +} + +static const struct net_device_ops vxcan_netdev_ops = { + .ndo_open = vxcan_open, + .ndo_stop = vxcan_close, + .ndo_start_xmit = vxcan_xmit, + .ndo_get_iflink = vxcan_get_iflink, + .ndo_change_mtu = vxcan_change_mtu, +}; + +static void vxcan_setup(struct net_device *dev) +{ + dev->type = ARPHRD_CAN; + dev->mtu = CAN_MTU; + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->tx_queue_len = 0; + dev->flags = (IFF_NOARP|IFF_ECHO); + dev->netdev_ops = &vxcan_netdev_ops; + dev->destructor = free_netdev; +} + +/* forward declaration for rtnl_create_link() */ +static struct rtnl_link_ops vxcan_link_ops; + +static int vxcan_newlink(struct net *net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct vxcan_priv *priv; + struct net_device *peer; + struct net *peer_net; + + struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb; + char ifname[IFNAMSIZ]; + unsigned char name_assign_type; + struct ifinfomsg *ifmp = NULL; + int err; + + /* register peer device */ + if (data && data[VXCAN_INFO_PEER]) { + struct nlattr *nla_peer; + + nla_peer = data[VXCAN_INFO_PEER]; + ifmp = nla_data(nla_peer); + err = rtnl_nla_parse_ifla(peer_tb, + nla_data(nla_peer) + + sizeof(struct ifinfomsg), + nla_len(nla_peer) - + sizeof(struct ifinfomsg), + NULL); + if (err < 0) + return err; + + tbp = peer_tb; + } + + if (tbp[IFLA_IFNAME]) { + nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); + name_assign_type = NET_NAME_USER; + } else { + snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); + name_assign_type = NET_NAME_ENUM; + } + + peer_net = rtnl_link_get_net(net, tbp); + if (IS_ERR(peer_net)) + return PTR_ERR(peer_net); + + peer = rtnl_create_link(peer_net, ifname, name_assign_type, + &vxcan_link_ops, tbp); + if (IS_ERR(peer)) { + put_net(peer_net); + return PTR_ERR(peer); + } + + if (ifmp && dev->ifindex) + peer->ifindex = ifmp->ifi_index; + + err = register_netdevice(peer); + put_net(peer_net); + peer_net = NULL; + if (err < 0) { + free_netdev(peer); + return err; + } + + netif_carrier_off(peer); + + err = rtnl_configure_link(peer, ifmp); + if (err < 0) { + unregister_netdevice(peer); + return err; + } + + /* register first device */ + if (tb[IFLA_IFNAME]) + nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); + else + snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); + + err = register_netdevice(dev); + if (err < 0) { + unregister_netdevice(peer); + return err; + } + + netif_carrier_off(dev); + + /* cross link the device pair */ + priv = netdev_priv(dev); + rcu_assign_pointer(priv->peer, peer); + + priv = netdev_priv(peer); + rcu_assign_pointer(priv->peer, dev); + + return 0; +} + +static void vxcan_dellink(struct net_device *dev, struct list_head *head) +{ + struct vxcan_priv *priv; + struct net_device *peer; + + priv = netdev_priv(dev); + peer = rtnl_dereference(priv->peer); + + /* Note : dellink() is called from default_device_exit_batch(), + * before a rcu_synchronize() point. The devices are guaranteed + * not being freed before one RCU grace period. + */ + RCU_INIT_POINTER(priv->peer, NULL); + unregister_netdevice_queue(dev, head); + + if (peer) { + priv = netdev_priv(peer); + RCU_INIT_POINTER(priv->peer, NULL); + unregister_netdevice_queue(peer, head); + } +} + +static const struct nla_policy vxcan_policy[VXCAN_INFO_MAX + 1] = { + [VXCAN_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, +}; + +static struct net *vxcan_get_link_net(const struct net_device *dev) +{ + struct vxcan_priv *priv = netdev_priv(dev); + struct net_device *peer = rtnl_dereference(priv->peer); + + return peer ? dev_net(peer) : dev_net(dev); +} + +static struct rtnl_link_ops vxcan_link_ops = { + .kind = DRV_NAME, + .priv_size = sizeof(struct vxcan_priv), + .setup = vxcan_setup, + .newlink = vxcan_newlink, + .dellink = vxcan_dellink, + .policy = vxcan_policy, + .maxtype = VXCAN_INFO_MAX, + .get_link_net = vxcan_get_link_net, +}; + +static __init int vxcan_init(void) +{ + pr_info("vxcan: Virtual CAN Tunnel driver\n"); + + return rtnl_link_register(&vxcan_link_ops); +} + +static __exit void vxcan_exit(void) +{ + rtnl_link_unregister(&vxcan_link_ops); +} + +module_init(vxcan_init); +module_exit(vxcan_exit); diff --git a/include/uapi/linux/can/vxcan.h b/include/uapi/linux/can/vxcan.h new file mode 100644 index 000000000000..ffb0b7156f7e --- /dev/null +++ b/include/uapi/linux/can/vxcan.h @@ -0,0 +1,12 @@ +#ifndef _UAPI_CAN_VXCAN_H +#define _UAPI_CAN_VXCAN_H + +enum { + VXCAN_INFO_UNSPEC, + VXCAN_INFO_PEER, + + __VXCAN_INFO_MAX +#define VXCAN_INFO_MAX (__VXCAN_INFO_MAX - 1) +}; + +#endif -- cgit v1.2.3-71-gd317 From b5cdae3291f7be7a34e75affe4c0ec1f7f328b64 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 18 Apr 2017 15:36:58 -0400 Subject: net: Generic XDP This provides a generic SKB based non-optimized XDP path which is used if either the driver lacks a specific XDP implementation, or the user requests it via a new IFLA_XDP_FLAGS value named XDP_FLAGS_SKB_MODE. It is arguable that perhaps I should have required something like this as part of the initial XDP feature merge. I believe this is critical for two reasons: 1) Accessibility. More people can play with XDP with less dependencies. Yes I know we have XDP support in virtio_net, but that just creates another depedency for learning how to use this facility. I wrote this to make life easier for the XDP newbies. 2) As a model for what the expected semantics are. If there is a pure generic core implementation, it serves as a semantic example for driver folks adding XDP support. One thing I have not tried to address here is the issue of XDP_PACKET_HEADROOM, thanks to Daniel for spotting that. It seems incredibly expensive to do a skb_cow(skb, XDP_PACKET_HEADROOM) or whatever even if the XDP program doesn't try to push headers at all. I think we really need the verifier to somehow propagate whether certain XDP helpers are used or not. v5: - Handle both negative and positive offset after running prog - Fix mac length in XDP_TX case (Alexei) - Use rcu_dereference_protected() in free_netdev (kbuild test robot) v4: - Fix MAC header adjustmnet before calling prog (David Ahern) - Disable LRO when generic XDP is installed (Michael Chan) - Bypass qdisc et al. on XDP_TX and record the event (Alexei) - Do not perform generic XDP on reinjected packets (DaveM) v3: - Make sure XDP program sees packet at MAC header, push back MAC header if we do XDP_TX. (Alexei) - Elide GRO when generic XDP is in use. (Alexei) - Add XDP_FLAG_SKB_MODE flag which the user can use to request generic XDP even if the driver has an XDP implementation. (Alexei) - Report whether SKB mode is in use in rtnl_xdp_fill() via XDP_FLAGS attribute. (Daniel) v2: - Add some "fall through" comments in switch statements based upon feedback from Andrew Lunn - Use RCU for generic xdp_prog, thanks to Johannes Berg. Tested-by: Andy Gospodarek Tested-by: Jesper Dangaard Brouer Tested-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 +++ include/uapi/linux/if_link.h | 4 +- net/core/dev.c | 155 +++++++++++++++++++++++++++++++++++++++++-- net/core/gro_cells.c | 2 +- net/core/rtnetlink.c | 40 ++++++----- 5 files changed, 187 insertions(+), 22 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5d5267febd56..46d220c2bf92 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1905,9 +1905,17 @@ struct net_device { struct lock_class_key *qdisc_tx_busylock; struct lock_class_key *qdisc_running_key; bool proto_down; + struct bpf_prog __rcu *xdp_prog; }; #define to_net_dev(d) container_of(d, struct net_device, dev) +static inline bool netif_elide_gro(const struct net_device *dev) +{ + if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog) + return true; + return false; +} + #define NETDEV_ALIGN 32 static inline diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8b405afb2376..633aa0276d32 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -887,7 +887,9 @@ enum { /* XDP section */ #define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) -#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST) +#define XDP_FLAGS_SKB_MODE (2U << 0) +#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \ + XDP_FLAGS_SKB_MODE) enum { IFLA_XDP_UNSPEC, diff --git a/net/core/dev.c b/net/core/dev.c index db6e31564d06..1b3317c026c6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -95,6 +95,7 @@ #include #include #include +#include #include #include #include @@ -4251,6 +4252,125 @@ static int __netif_receive_skb(struct sk_buff *skb) return ret; } +static struct static_key generic_xdp_needed __read_mostly; + +static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp) +{ + struct bpf_prog *new = xdp->prog; + int ret = 0; + + switch (xdp->command) { + case XDP_SETUP_PROG: { + struct bpf_prog *old = rtnl_dereference(dev->xdp_prog); + + rcu_assign_pointer(dev->xdp_prog, new); + if (old) + bpf_prog_put(old); + + if (old && !new) { + static_key_slow_dec(&generic_xdp_needed); + } else if (new && !old) { + static_key_slow_inc(&generic_xdp_needed); + dev_disable_lro(dev); + } + break; + } + + case XDP_QUERY_PROG: + xdp->prog_attached = !!rcu_access_pointer(dev->xdp_prog); + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static u32 netif_receive_generic_xdp(struct sk_buff *skb, + struct bpf_prog *xdp_prog) +{ + struct xdp_buff xdp; + u32 act = XDP_DROP; + void *orig_data; + int hlen, off; + u32 mac_len; + + /* Reinjected packets coming from act_mirred or similar should + * not get XDP generic processing. + */ + if (skb_cloned(skb)) + return XDP_PASS; + + if (skb_linearize(skb)) + goto do_drop; + + /* The XDP program wants to see the packet starting at the MAC + * header. + */ + mac_len = skb->data - skb_mac_header(skb); + hlen = skb_headlen(skb) + mac_len; + xdp.data = skb->data - mac_len; + xdp.data_end = xdp.data + hlen; + xdp.data_hard_start = skb->data - skb_headroom(skb); + orig_data = xdp.data; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + off = xdp.data - orig_data; + if (off > 0) + __skb_pull(skb, off); + else if (off < 0) + __skb_push(skb, -off); + + switch (act) { + case XDP_TX: + __skb_push(skb, mac_len); + /* fall through */ + case XDP_PASS: + break; + + default: + bpf_warn_invalid_xdp_action(act); + /* fall through */ + case XDP_ABORTED: + trace_xdp_exception(skb->dev, xdp_prog, act); + /* fall through */ + case XDP_DROP: + do_drop: + kfree_skb(skb); + break; + } + + return act; +} + +/* When doing generic XDP we have to bypass the qdisc layer and the + * network taps in order to match in-driver-XDP behavior. + */ +static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) +{ + struct net_device *dev = skb->dev; + struct netdev_queue *txq; + bool free_skb = true; + int cpu, rc; + + txq = netdev_pick_tx(dev, skb, NULL); + cpu = smp_processor_id(); + HARD_TX_LOCK(dev, txq, cpu); + if (!netif_xmit_stopped(txq)) { + rc = netdev_start_xmit(skb, dev, txq, 0); + if (dev_xmit_complete(rc)) + free_skb = false; + } + HARD_TX_UNLOCK(dev, txq); + if (free_skb) { + trace_xdp_exception(dev, xdp_prog, XDP_TX); + kfree_skb(skb); + } +} + static int netif_receive_skb_internal(struct sk_buff *skb) { int ret; @@ -4262,6 +4382,21 @@ static int netif_receive_skb_internal(struct sk_buff *skb) rcu_read_lock(); + if (static_key_false(&generic_xdp_needed)) { + struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog); + + if (xdp_prog) { + u32 act = netif_receive_generic_xdp(skb, xdp_prog); + + if (act != XDP_PASS) { + rcu_read_unlock(); + if (act == XDP_TX) + generic_xdp_tx(skb, xdp_prog); + return NET_RX_DROP; + } + } + } + #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; @@ -4494,7 +4629,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff enum gro_result ret; int grow; - if (!(skb->dev->features & NETIF_F_GRO)) + if (netif_elide_gro(skb->dev)) goto normal; if (skb->csum_bad) @@ -6723,6 +6858,7 @@ EXPORT_SYMBOL(dev_change_proto_down); */ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags) { + int (*xdp_op)(struct net_device *dev, struct netdev_xdp *xdp); const struct net_device_ops *ops = dev->netdev_ops; struct bpf_prog *prog = NULL; struct netdev_xdp xdp; @@ -6730,14 +6866,16 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags) ASSERT_RTNL(); - if (!ops->ndo_xdp) - return -EOPNOTSUPP; + xdp_op = ops->ndo_xdp; + if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE)) + xdp_op = generic_xdp_install; + if (fd >= 0) { if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) { memset(&xdp, 0, sizeof(xdp)); xdp.command = XDP_QUERY_PROG; - err = ops->ndo_xdp(dev, &xdp); + err = xdp_op(dev, &xdp); if (err < 0) return err; if (xdp.prog_attached) @@ -6753,7 +6891,7 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags) xdp.command = XDP_SETUP_PROG; xdp.prog = prog; - err = ops->ndo_xdp(dev, &xdp); + err = xdp_op(dev, &xdp); if (err < 0 && prog) bpf_prog_put(prog); @@ -7793,6 +7931,7 @@ EXPORT_SYMBOL(alloc_netdev_mqs); void free_netdev(struct net_device *dev) { struct napi_struct *p, *n; + struct bpf_prog *prog; might_sleep(); netif_free_tx_queues(dev); @@ -7811,6 +7950,12 @@ void free_netdev(struct net_device *dev) free_percpu(dev->pcpu_refcnt); dev->pcpu_refcnt = NULL; + prog = rcu_dereference_protected(dev->xdp_prog, 1); + if (prog) { + bpf_prog_put(prog); + static_key_slow_dec(&generic_xdp_needed); + } + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { netdev_freemem(dev); diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index c98bbfbd26b8..814e58a3ce8b 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -13,7 +13,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) struct net_device *dev = skb->dev; struct gro_cell *cell; - if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) + if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) return netif_rx(skb); cell = this_cpu_ptr(gcells->cells); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 088f9c8b4196..9031a6c8bfa7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -896,15 +896,13 @@ static size_t rtnl_port_size(const struct net_device *dev, return port_self_size; } -static size_t rtnl_xdp_size(const struct net_device *dev) +static size_t rtnl_xdp_size(void) { size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */ - nla_total_size(1); /* XDP_ATTACHED */ + nla_total_size(1) + /* XDP_ATTACHED */ + nla_total_size(4); /* XDP_FLAGS */ - if (!dev->netdev_ops->ndo_xdp) - return 0; - else - return xdp_size; + return xdp_size; } static noinline size_t if_nlmsg_size(const struct net_device *dev, @@ -943,7 +941,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ - + rtnl_xdp_size(dev) /* IFLA_XDP */ + + rtnl_xdp_size() /* IFLA_XDP */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ } @@ -1251,23 +1249,35 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) { - struct netdev_xdp xdp_op = {}; struct nlattr *xdp; + u32 xdp_flags = 0; + u8 val = 0; int err; - if (!dev->netdev_ops->ndo_xdp) - return 0; xdp = nla_nest_start(skb, IFLA_XDP); if (!xdp) return -EMSGSIZE; - xdp_op.command = XDP_QUERY_PROG; - err = dev->netdev_ops->ndo_xdp(dev, &xdp_op); - if (err) - goto err_cancel; - err = nla_put_u8(skb, IFLA_XDP_ATTACHED, xdp_op.prog_attached); + if (rcu_access_pointer(dev->xdp_prog)) { + xdp_flags = XDP_FLAGS_SKB_MODE; + val = 1; + } else if (dev->netdev_ops->ndo_xdp) { + struct netdev_xdp xdp_op = {}; + + xdp_op.command = XDP_QUERY_PROG; + err = dev->netdev_ops->ndo_xdp(dev, &xdp_op); + if (err) + goto err_cancel; + val = xdp_op.prog_attached; + } + err = nla_put_u8(skb, IFLA_XDP_ATTACHED, val); if (err) goto err_cancel; + if (xdp_flags) { + err = nla_put_u32(skb, IFLA_XDP_FLAGS, xdp_flags); + if (err) + goto err_cancel; + } nla_nest_end(skb, xdp); return 0; -- cgit v1.2.3-71-gd317 From cc47dd684ee04f9f49f081002a74ef1ba9d14cc8 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 6 Apr 2017 16:33:14 -0600 Subject: IB/vmw_pvrdma: Spare annotate imm_data imm_data is copied directly from the ib_send_wr and ib_wc which have it marked as __be32, copy that mark into the uapi structures as well. Signed-off-by: Jason Gunthorpe Tested-by: Adit Ranadive Acked-by: Adit Ranadive Signed-off-by: Doug Ledford --- include/uapi/rdma/vmw_pvrdma-abi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h index 5016abc9ee97..c8c1d2d6df4d 100644 --- a/include/uapi/rdma/vmw_pvrdma-abi.h +++ b/include/uapi/rdma/vmw_pvrdma-abi.h @@ -222,7 +222,7 @@ struct pvrdma_sq_wqe_hdr { __u32 opcode; /* operation type */ __u32 send_flags; /* wr flags */ union { - __u32 imm_data; + __be32 imm_data; __u32 invalidate_rkey; } ex; __u32 reserved; @@ -273,7 +273,7 @@ struct pvrdma_cqe { __u32 opcode; __u32 status; __u32 byte_len; - __u32 imm_data; + __be32 imm_data; __u32 src_qp; __u32 wc_flags; __u32 vendor_err; -- cgit v1.2.3-71-gd317 From 16719199a43f0740113041fb34a0854b1d7f2111 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Wed, 1 Mar 2017 03:12:03 +0300 Subject: uapi: fix linux/nfsd/cld.h userspace compilation errors Include and consistently use types it provides to fix the following linux/nfsd/cld.h userspace compilation errors: /usr/include/linux/nfsd/cld.h:40:2: error: unknown type name 'uint16_t' uint16_t cn_len; /* length of cm_id */ /usr/include/linux/nfsd/cld.h:46:2: error: unknown type name 'uint8_t' uint8_t cm_vers; /* upcall version */ /usr/include/linux/nfsd/cld.h:47:2: error: unknown type name 'uint8_t' uint8_t cm_cmd; /* upcall command */ /usr/include/linux/nfsd/cld.h:48:2: error: unknown type name 'int16_t' int16_t cm_status; /* return code */ /usr/include/linux/nfsd/cld.h:49:2: error: unknown type name 'uint32_t' uint32_t cm_xid; /* transaction id */ /usr/include/linux/nfsd/cld.h:51:3: error: unknown type name 'int64_t' int64_t cm_gracetime; /* grace period start time */ Signed-off-by: Dmitry V. Levin Signed-off-by: J. Bruce Fields --- include/uapi/linux/nfsd/cld.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nfsd/cld.h b/include/uapi/linux/nfsd/cld.h index f14a9ab06f1f..ec260274be0c 100644 --- a/include/uapi/linux/nfsd/cld.h +++ b/include/uapi/linux/nfsd/cld.h @@ -22,6 +22,8 @@ #ifndef _NFSD_CLD_H #define _NFSD_CLD_H +#include + /* latest upcall version available */ #define CLD_UPCALL_VERSION 1 @@ -37,18 +39,18 @@ enum cld_command { /* representation of long-form NFSv4 client ID */ struct cld_name { - uint16_t cn_len; /* length of cm_id */ + __u16 cn_len; /* length of cm_id */ unsigned char cn_id[NFS4_OPAQUE_LIMIT]; /* client-provided */ } __attribute__((packed)); /* message struct for communication with userspace */ struct cld_msg { - uint8_t cm_vers; /* upcall version */ - uint8_t cm_cmd; /* upcall command */ - int16_t cm_status; /* return code */ - uint32_t cm_xid; /* transaction id */ + __u8 cm_vers; /* upcall version */ + __u8 cm_cmd; /* upcall command */ + __s16 cm_status; /* return code */ + __u32 cm_xid; /* transaction id */ union { - int64_t cm_gracetime; /* grace period start time */ + __s64 cm_gracetime; /* grace period start time */ struct cld_name cm_name; } __attribute__((packed)) cm_u; } __attribute__((packed)); -- cgit v1.2.3-71-gd317 From ca986ad9bcd3893c8b0b4cc2cafcc8cf1554409c Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Fri, 21 Apr 2017 13:05:00 +0100 Subject: nl80211: allow multiple active scheduled scan requests This patch implements the idea to have multiple scheduled scan requests running concurrently. It mainly illustrates how to deal with the incoming request from user-space in terms of backward compatibility. In order to use multiple scheduled scans user-space needs to provide a flag attribute NL80211_ATTR_SCHED_SCAN_MULTI to indicate support. If not the request is treated as a legacy scan. Drivers currently supporting scheduled scan are now indicating they support a single scheduled scan request. This obsoletes WIPHY_FLAG_SUPPORTS_SCHED_SCAN. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel [clean up netlink destroy path to avoid allocations, code cleanups] Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 2 +- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 2 +- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 2 +- drivers/net/wireless/ti/wlcore/main.c | 2 +- include/net/cfg80211.h | 9 +- include/uapi/linux/nl80211.h | 12 ++- net/wireless/core.c | 29 +++--- net/wireless/core.h | 11 +- net/wireless/nl80211.c | 63 ++++++++--- net/wireless/rdev-ops.h | 2 +- net/wireless/scan.c | 115 +++++++++++++++++---- net/wireless/trace.h | 18 ++-- 13 files changed, 205 insertions(+), 64 deletions(-) (limited to 'include/uapi') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 0c118b7c362c..1906412afa70 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -3973,7 +3973,7 @@ int ath6kl_cfg80211_init(struct ath6kl *ar) WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD; if (test_bit(ATH6KL_FW_CAPABILITY_SCHED_SCAN_V2, ar->fw_capabilities)) - ar->wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN; + ar->wiphy->max_sched_scan_reqs = 1; if (test_bit(ATH6KL_FW_CAPABILITY_INACTIVITY_TIMEOUT, ar->fw_capabilities)) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 8c7f1ef288c6..c71173dc9965 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -6374,11 +6374,11 @@ err: static void brcmf_wiphy_pno_params(struct wiphy *wiphy) { /* scheduled scan settings */ + wiphy->max_sched_scan_reqs = 1; wiphy->max_sched_scan_ssids = BRCMF_PNO_MAX_PFN_COUNT; wiphy->max_match_sets = BRCMF_PNO_MAX_PFN_COUNT; wiphy->max_sched_scan_ie_len = BRCMF_SCAN_IE_LEN_MAX; wiphy->max_sched_scan_plan_interval = BRCMF_PNO_SCHED_SCAN_MAX_PERIOD; - wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN; } #ifdef CONFIG_PM diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 5cdd95775ba6..8c58d47100a0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -620,7 +620,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) else hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT; - hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN; + hw->wiphy->max_sched_scan_reqs = 1; hw->wiphy->max_sched_scan_ssids = PROBE_OPTION_MAX; hw->wiphy->max_match_sets = IWL_SCAN_MAX_PROFILES; /* we create the 802.11 header and zero length SSID IE. */ diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 49b4c805b7d5..9927bd5aac56 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -4297,7 +4297,6 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) wiphy->flags |= WIPHY_FLAG_HAVE_AP_SME | WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD | WIPHY_FLAG_AP_UAPSD | - WIPHY_FLAG_SUPPORTS_SCHED_SCAN | WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL | WIPHY_FLAG_HAS_CHANNEL_SWITCH | WIPHY_FLAG_PS_ON_BY_DEFAULT; @@ -4316,6 +4315,7 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2 | NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P; + wiphy->max_sched_scan_reqs = 1; wiphy->max_sched_scan_ssids = MWIFIEX_MAX_SSID_LIST_LENGTH; wiphy->max_sched_scan_ie_len = MWIFIEX_MAX_VSIE_LEN; wiphy->max_match_sets = MWIFIEX_MAX_SSID_LIST_LENGTH; diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index a21fda910529..382ec15ec1af 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -6128,6 +6128,7 @@ static int wl1271_init_ieee80211(struct wl1271 *wl) wl->hw->wiphy->max_scan_ie_len = WL1271_CMD_TEMPL_MAX_SIZE - sizeof(struct ieee80211_header); + wl->hw->wiphy->max_sched_scan_reqs = 1; wl->hw->wiphy->max_sched_scan_ie_len = WL1271_CMD_TEMPL_MAX_SIZE - sizeof(struct ieee80211_header); @@ -6135,7 +6136,6 @@ static int wl1271_init_ieee80211(struct wl1271 *wl) wl->hw->wiphy->flags |= WIPHY_FLAG_AP_UAPSD | WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL | - WIPHY_FLAG_SUPPORTS_SCHED_SCAN | WIPHY_FLAG_HAS_CHANNEL_SWITCH; wl->hw->wiphy->features |= NL80211_FEATURE_AP_SCAN; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index af958938b3b1..43c0f389c273 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1678,6 +1678,8 @@ struct cfg80211_bss_select_adjust { * @rcu_head: RCU callback used to free the struct * @owner_nlportid: netlink portid of owner (if this should is a request * owned by a particular socket) + * @nl_owner_dead: netlink owner socket was closed - this request be freed + * @list: for keeping list of requests. * @delay: delay in seconds to use before starting the first scan * cycle. The driver may ignore this parameter and start * immediately (or at any other time), if this feature is not @@ -1722,6 +1724,8 @@ struct cfg80211_sched_scan_request { unsigned long scan_start; struct rcu_head rcu_head; u32 owner_nlportid; + bool nl_owner_dead; + struct list_head list; /* keep last */ struct ieee80211_channel *channels[0]; @@ -3213,7 +3217,7 @@ enum wiphy_flags { WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), WIPHY_FLAG_IBSS_RSN = BIT(8), WIPHY_FLAG_MESH_AUTH = BIT(10), - WIPHY_FLAG_SUPPORTS_SCHED_SCAN = BIT(11), + /* use hole at 11 */ /* use hole at 12 */ WIPHY_FLAG_SUPPORTS_FW_ROAM = BIT(13), WIPHY_FLAG_AP_UAPSD = BIT(14), @@ -3551,6 +3555,8 @@ struct wiphy_iftype_ext_capab { * this variable determines its size * @max_scan_ssids: maximum number of SSIDs the device can scan for in * any given scan + * @max_sched_scan_reqs: maximum number of scheduled scan requests that + * the device can run concurrently. * @max_sched_scan_ssids: maximum number of SSIDs the device can scan * for in any given scheduled scan * @max_match_sets: maximum number of match sets the device can handle @@ -3687,6 +3693,7 @@ struct wiphy { int bss_priv_size; u8 max_scan_ssids; + u8 max_sched_scan_reqs; u8 max_sched_scan_ssids; u8 max_match_sets; u16 max_scan_ie_len; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6095a6c4c412..f34127d241e5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -387,7 +387,9 @@ * are used. Extra IEs can also be passed from the userspace by * using the %NL80211_ATTR_IE attribute. The first cycle of the * scheduled scan can be delayed by %NL80211_ATTR_SCHED_SCAN_DELAY - * is supplied. + * is supplied. If the device supports multiple concurrent scheduled + * scans, it will allow such when the caller provides the flag attribute + * %NL80211_ATTR_SCHED_SCAN_MULTI to indicate user-space support for it. * @NL80211_CMD_STOP_SCHED_SCAN: stop a scheduled scan. Returns -ENOENT if * scheduled scan is not running. The caller may assume that as soon * as the call returns, it is safe to start a new scheduled scan again. @@ -2081,6 +2083,11 @@ enum nl80211_commands { * @NL80211_ATTR_PMK: PMK for the PMKSA identified by %NL80211_ATTR_PMKID. * This is used with @NL80211_CMD_SET_PMKSA. * + * @NL80211_ATTR_SCHED_SCAN_MULTI: flag attribute which user-space shall use to + * indicate that it supports multiple active scheduled scan requests. + * @NL80211_ATTR_SCHED_SCAN_MAX_REQS: indicates maximum number of scheduled + * scan request that may be active for the device (u32). + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2500,6 +2507,9 @@ enum nl80211_attrs { NL80211_ATTR_PMK, + NL80211_ATTR_SCHED_SCAN_MULTI, + NL80211_ATTR_SCHED_SCAN_MAX_REQS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/core.c b/net/wireless/core.c index 4ea28de3a636..a3c0c48afb85 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -330,14 +330,16 @@ static void cfg80211_destroy_iface_wk(struct work_struct *work) static void cfg80211_sched_scan_stop_wk(struct work_struct *work) { struct cfg80211_registered_device *rdev; + struct cfg80211_sched_scan_request *req, *tmp; rdev = container_of(work, struct cfg80211_registered_device, sched_scan_stop_wk); rtnl_lock(); - - __cfg80211_stop_sched_scan(rdev, false); - + list_for_each_entry_safe(req, tmp, &rdev->sched_scan_req_list, list) { + if (req->nl_owner_dead) + cfg80211_stop_sched_scan_req(rdev, req, false); + } rtnl_unlock(); } @@ -452,6 +454,7 @@ use_default_name: spin_lock_init(&rdev->beacon_registrations_lock); spin_lock_init(&rdev->bss_lock); INIT_LIST_HEAD(&rdev->bss_list); + INIT_LIST_HEAD(&rdev->sched_scan_req_list); INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done); INIT_WORK(&rdev->sched_scan_results_wk, __cfg80211_sched_scan_results); INIT_LIST_HEAD(&rdev->mlme_unreg); @@ -1028,7 +1031,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; - struct cfg80211_sched_scan_request *sched_scan_req; + struct cfg80211_sched_scan_request *pos, *tmp; ASSERT_RTNL(); ASSERT_WDEV_LOCK(wdev); @@ -1039,9 +1042,11 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: - sched_scan_req = rtnl_dereference(rdev->sched_scan_req); - if (sched_scan_req && dev == sched_scan_req->dev) - __cfg80211_stop_sched_scan(rdev, false); + list_for_each_entry_safe(pos, tmp, &rdev->sched_scan_req_list, + list) { + if (dev == pos->dev) + cfg80211_stop_sched_scan_req(rdev, pos, false); + } #ifdef CONFIG_CFG80211_WEXT kfree(wdev->wext.ie); @@ -1116,7 +1121,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev; - struct cfg80211_sched_scan_request *sched_scan_req; + struct cfg80211_sched_scan_request *pos, *tmp; if (!wdev) return NOTIFY_DONE; @@ -1193,10 +1198,10 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, ___cfg80211_scan_done(rdev, false); } - sched_scan_req = rtnl_dereference(rdev->sched_scan_req); - if (WARN_ON(sched_scan_req && - sched_scan_req->dev == wdev->netdev)) { - __cfg80211_stop_sched_scan(rdev, false); + list_for_each_entry_safe(pos, tmp, + &rdev->sched_scan_req_list, list) { + if (WARN_ON(pos && pos->dev == wdev->netdev)) + cfg80211_stop_sched_scan_req(rdev, pos, false); } rdev->opencount--; diff --git a/net/wireless/core.h b/net/wireless/core.h index f9b748e3425a..06eaf96053a8 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -74,7 +74,7 @@ struct cfg80211_registered_device { u32 bss_entries; struct cfg80211_scan_request *scan_req; /* protected by RTNL */ struct sk_buff *scan_msg; - struct cfg80211_sched_scan_request __rcu *sched_scan_req; + struct list_head sched_scan_req_list; unsigned long suspend_at; struct work_struct scan_done_wk; struct work_struct sched_scan_results_wk; @@ -416,9 +416,16 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, void __cfg80211_scan_done(struct work_struct *wk); void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool send_message); +void cfg80211_add_sched_scan_req(struct cfg80211_registered_device *rdev, + struct cfg80211_sched_scan_request *req); +int cfg80211_sched_scan_req_possible(struct cfg80211_registered_device *rdev, + bool want_multi); void __cfg80211_sched_scan_results(struct work_struct *wk); +int cfg80211_stop_sched_scan_req(struct cfg80211_registered_device *rdev, + struct cfg80211_sched_scan_request *req, + bool driver_initiated); int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, - bool driver_initiated); + u64 reqid, bool driver_initiated); void cfg80211_upload_connect_keys(struct wireless_dev *wdev); int cfg80211_change_iface(struct cfg80211_registered_device *rdev, struct net_device *dev, enum nl80211_iftype ntype, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 45f5f418e562..ac7e2314f9ec 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -419,6 +419,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { .len = FILS_ERP_MAX_RRK_LEN }, [NL80211_ATTR_FILS_CACHE_ID] = { .len = 2 }, [NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN }, + [NL80211_ATTR_SCHED_SCAN_MULTI] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -1376,7 +1377,7 @@ static int nl80211_add_commands_unsplit(struct cfg80211_registered_device *rdev, CMD(tdls_mgmt, TDLS_MGMT); CMD(tdls_oper, TDLS_OPER); } - if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) + if (rdev->wiphy.max_sched_scan_reqs) CMD(sched_scan_start, START_SCHED_SCAN); CMD(probe_client, PROBE_CLIENT); CMD(set_noack_map, SET_NOACK_MAP); @@ -1815,6 +1816,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG)) goto nla_put_failure; + if (rdev->wiphy.max_sched_scan_reqs && + nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_MAX_REQS, + rdev->wiphy.max_sched_scan_reqs)) + goto nla_put_failure; + if (nla_put(msg, NL80211_ATTR_EXT_FEATURES, sizeof(rdev->wiphy.ext_features), rdev->wiphy.ext_features)) @@ -7336,14 +7342,16 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_sched_scan_request *sched_scan_req; + bool want_multi; int err; - if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) || - !rdev->ops->sched_scan_start) + if (!rdev->wiphy.max_sched_scan_reqs || !rdev->ops->sched_scan_start) return -EOPNOTSUPP; - if (rdev->sched_scan_req) - return -EINPROGRESS; + want_multi = info->attrs[NL80211_ATTR_SCHED_SCAN_MULTI]; + err = cfg80211_sched_scan_req_possible(rdev, want_multi); + if (err) + return err; sched_scan_req = nl80211_parse_sched_scan(&rdev->wiphy, wdev, info->attrs, @@ -7353,6 +7361,14 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (err) goto out_err; + /* leave request id zero for legacy request + * or if driver does not support multi-scheduled scan + */ + if (want_multi && rdev->wiphy.max_sched_scan_reqs > 1) { + while (!sched_scan_req->reqid) + sched_scan_req->reqid = rdev->wiphy.cookie_counter++; + } + err = rdev_sched_scan_start(rdev, dev, sched_scan_req); if (err) goto out_free; @@ -7363,7 +7379,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) sched_scan_req->owner_nlportid = info->snd_portid; - rcu_assign_pointer(rdev->sched_scan_req, sched_scan_req); + cfg80211_add_sched_scan_req(rdev, sched_scan_req); nl80211_send_sched_scan(sched_scan_req, NL80211_CMD_START_SCHED_SCAN); return 0; @@ -7377,13 +7393,27 @@ out_err: static int nl80211_stop_sched_scan(struct sk_buff *skb, struct genl_info *info) { + struct cfg80211_sched_scan_request *req; struct cfg80211_registered_device *rdev = info->user_ptr[0]; + u64 cookie; - if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) || - !rdev->ops->sched_scan_stop) + if (!rdev->wiphy.max_sched_scan_reqs || !rdev->ops->sched_scan_stop) return -EOPNOTSUPP; - return __cfg80211_stop_sched_scan(rdev, false); + if (info->attrs[NL80211_ATTR_COOKIE]) { + cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); + return __cfg80211_stop_sched_scan(rdev, cookie, false); + } + + req = list_first_or_null_rcu(&rdev->sched_scan_req_list, + struct cfg80211_sched_scan_request, + list); + if (!req || req->reqid || + (req->owner_nlportid && + req->owner_nlportid != info->snd_portid)) + return -ENOENT; + + return cfg80211_stop_sched_scan_req(rdev, req, false); } static int nl80211_start_radar_detection(struct sk_buff *skb, @@ -14883,16 +14913,15 @@ static int nl80211_netlink_notify(struct notifier_block * nb, rcu_read_lock(); list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { - struct cfg80211_sched_scan_request *sched_scan_req = - rcu_dereference(rdev->sched_scan_req); - - if (sched_scan_req && notify->portid && - sched_scan_req->owner_nlportid == notify->portid) { - sched_scan_req->owner_nlportid = 0; + struct cfg80211_sched_scan_request *sched_scan_req; - if (rdev->ops->sched_scan_stop && - rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) + list_for_each_entry_rcu(sched_scan_req, + &rdev->sched_scan_req_list, + list) { + if (sched_scan_req->owner_nlportid == notify->portid) { + sched_scan_req->nl_owner_dead = true; schedule_work(&rdev->sched_scan_stop_wk); + } } list_for_each_entry_rcu(wdev, &rdev->wiphy.wdev_list, list) { diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index e4a99989dd06..783f89c3e504 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -813,7 +813,7 @@ rdev_sched_scan_start(struct cfg80211_registered_device *rdev, struct cfg80211_sched_scan_request *request) { int ret; - trace_rdev_sched_scan_start(&rdev->wiphy, dev, request); + trace_rdev_sched_scan_start(&rdev->wiphy, dev, request->reqid); ret = rdev->ops->sched_scan_start(&rdev->wiphy, dev, request); trace_rdev_return_int(&rdev->wiphy, ret); return ret; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 6f4996c0f4df..bd9feed95c1e 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -300,6 +300,70 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, } EXPORT_SYMBOL(cfg80211_scan_done); +void cfg80211_add_sched_scan_req(struct cfg80211_registered_device *rdev, + struct cfg80211_sched_scan_request *req) +{ + ASSERT_RTNL(); + + list_add_rcu(&req->list, &rdev->sched_scan_req_list); +} + +static void cfg80211_del_sched_scan_req(struct cfg80211_registered_device *rdev, + struct cfg80211_sched_scan_request *req) +{ + ASSERT_RTNL(); + + list_del_rcu(&req->list); + kfree_rcu(req, rcu_head); +} + +static struct cfg80211_sched_scan_request * +cfg80211_find_sched_scan_req(struct cfg80211_registered_device *rdev, u64 reqid) +{ + struct cfg80211_sched_scan_request *pos; + + ASSERT_RTNL(); + + list_for_each_entry(pos, &rdev->sched_scan_req_list, list) { + if (pos->reqid == reqid) + return pos; + } + return ERR_PTR(-ENOENT); +} + +/* + * Determines if a scheduled scan request can be handled. When a legacy + * scheduled scan is running no other scheduled scan is allowed regardless + * whether the request is for legacy or multi-support scan. When a multi-support + * scheduled scan is running a request for legacy scan is not allowed. In this + * case a request for multi-support scan can be handled if resources are + * available, ie. struct wiphy::max_sched_scan_reqs limit is not yet reached. + */ +int cfg80211_sched_scan_req_possible(struct cfg80211_registered_device *rdev, + bool want_multi) +{ + struct cfg80211_sched_scan_request *pos; + int i = 0; + + list_for_each_entry(pos, &rdev->sched_scan_req_list, list) { + /* request id zero means legacy in progress */ + if (!i && !pos->reqid) + return -EINPROGRESS; + i++; + } + + if (i) { + /* no legacy allowed when multi request(s) are active */ + if (!want_multi) + return -EINPROGRESS; + + /* resource limit reached */ + if (i == rdev->wiphy.max_sched_scan_reqs) + return -ENOSPC; + } + return 0; +} + void __cfg80211_sched_scan_results(struct work_struct *wk) { struct cfg80211_registered_device *rdev; @@ -310,10 +374,10 @@ void __cfg80211_sched_scan_results(struct work_struct *wk) rtnl_lock(); - request = rtnl_dereference(rdev->sched_scan_req); + request = cfg80211_find_sched_scan_req(rdev, 0); /* we don't have sched_scan_req anymore if the scan is stopping */ - if (request) { + if (!IS_ERR(request)) { if (request->flags & NL80211_SCAN_FLAG_FLUSH) { /* flush entries from previous scans */ spin_lock_bh(&rdev->bss_lock); @@ -329,10 +393,17 @@ void __cfg80211_sched_scan_results(struct work_struct *wk) void cfg80211_sched_scan_results(struct wiphy *wiphy) { + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct cfg80211_sched_scan_request *request; + trace_cfg80211_sched_scan_results(wiphy); /* ignore if we're not scanning */ - if (rcu_access_pointer(wiphy_to_rdev(wiphy)->sched_scan_req)) + rtnl_lock(); + request = cfg80211_find_sched_scan_req(rdev, 0); + rtnl_unlock(); + + if (!IS_ERR(request)) queue_work(cfg80211_wq, &wiphy_to_rdev(wiphy)->sched_scan_results_wk); } @@ -346,7 +417,7 @@ void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy) trace_cfg80211_sched_scan_stopped(wiphy); - __cfg80211_stop_sched_scan(rdev, true); + __cfg80211_stop_sched_scan(rdev, 0, true); } EXPORT_SYMBOL(cfg80211_sched_scan_stopped_rtnl); @@ -358,34 +429,40 @@ void cfg80211_sched_scan_stopped(struct wiphy *wiphy) } EXPORT_SYMBOL(cfg80211_sched_scan_stopped); -int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, - bool driver_initiated) +int cfg80211_stop_sched_scan_req(struct cfg80211_registered_device *rdev, + struct cfg80211_sched_scan_request *req, + bool driver_initiated) { - struct cfg80211_sched_scan_request *sched_scan_req; - struct net_device *dev; - ASSERT_RTNL(); - if (!rdev->sched_scan_req) - return -ENOENT; - - sched_scan_req = rtnl_dereference(rdev->sched_scan_req); - dev = sched_scan_req->dev; - if (!driver_initiated) { - int err = rdev_sched_scan_stop(rdev, dev); + int err = rdev_sched_scan_stop(rdev, req->dev); if (err) return err; } - nl80211_send_sched_scan(sched_scan_req, NL80211_CMD_SCHED_SCAN_STOPPED); + nl80211_send_sched_scan(req, NL80211_CMD_SCHED_SCAN_STOPPED); - RCU_INIT_POINTER(rdev->sched_scan_req, NULL); - kfree_rcu(sched_scan_req, rcu_head); + cfg80211_del_sched_scan_req(rdev, req); return 0; } +int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, + u64 reqid, bool driver_initiated) +{ + struct cfg80211_sched_scan_request *sched_scan_req; + + ASSERT_RTNL(); + + sched_scan_req = cfg80211_find_sched_scan_req(rdev, reqid); + if (IS_ERR(sched_scan_req)) + return PTR_ERR(sched_scan_req); + + return cfg80211_stop_sched_scan_req(rdev, sched_scan_req, + driver_initiated); +} + void cfg80211_bss_age(struct cfg80211_registered_device *rdev, unsigned long age_secs) { diff --git a/net/wireless/trace.h b/net/wireless/trace.h index fd55786f0462..52935c48b342 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1610,20 +1610,26 @@ DEFINE_EVENT(tx_rx_evt, rdev_set_antenna, TP_ARGS(wiphy, rx, tx) ); -TRACE_EVENT(rdev_sched_scan_start, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - struct cfg80211_sched_scan_request *request), - TP_ARGS(wiphy, netdev, request), +DECLARE_EVENT_CLASS(wiphy_netdev_id_evt, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u64 id), + TP_ARGS(wiphy, netdev, id), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY + __field(u64, id) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; + __entry->id = id; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT, - WIPHY_PR_ARG, NETDEV_PR_ARG) + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", id: %llu", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->id) +); + +DEFINE_EVENT(wiphy_netdev_id_evt, rdev_sched_scan_start, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u64 id), + TP_ARGS(wiphy, netdev, id) ); TRACE_EVENT(rdev_tdls_mgmt, -- cgit v1.2.3-71-gd317 From 3007e3529ce1efd9c370a7b81633e45f730ae35b Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Fri, 21 Apr 2017 13:05:01 +0100 Subject: nl80211: add support for BSSIDs in scheduled scan matchsets This patch allows for the scheduled scan request to specify matchsets for specific BSSIDs. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel [docs, netlink policy fix] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 +++++- include/uapi/linux/nl80211.h | 4 ++++ net/wireless/nl80211.c | 40 ++++++++++++++++++++++++++++++---------- 3 files changed, 39 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 43c0f389c273..4058518e267a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1613,11 +1613,15 @@ static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask) /** * struct cfg80211_match_set - sets of attributes to match * - * @ssid: SSID to be matched; may be zero-length for no match (RSSI only) + * @ssid: SSID to be matched; may be zero-length in case of BSSID match + * or no match (RSSI only) + * @bssid: BSSID to be matched; may be all-zero BSSID in case of SSID match + * or no match (RSSI only) * @rssi_thold: don't report scan results below this threshold (in s32 dBm) */ struct cfg80211_match_set { struct cfg80211_ssid ssid; + u8 bssid[ETH_ALEN]; s32 rssi_thold; }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f34127d241e5..b8c44b98f12d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3194,6 +3194,7 @@ enum nl80211_reg_rule_attr { * @__NL80211_SCHED_SCAN_MATCH_ATTR_INVALID: attribute number 0 is reserved * @NL80211_SCHED_SCAN_MATCH_ATTR_SSID: SSID to be used for matching, * only report BSS with matching SSID. + * (This cannot be used together with BSSID.) * @NL80211_SCHED_SCAN_MATCH_ATTR_RSSI: RSSI threshold (in dBm) for reporting a * BSS in scan results. Filtering is turned off if not specified. Note that * if this attribute is in a match set of its own, then it is treated as @@ -3209,6 +3210,8 @@ enum nl80211_reg_rule_attr { * BSS-es in the specified band is to be adjusted before doing * RSSI-based BSS selection. The attribute value is a packed structure * value as specified by &struct nl80211_bss_select_rssi_adjust. + * @NL80211_SCHED_SCAN_MATCH_ATTR_BSSID: BSSID to be used for matching + * (this cannot be used together with SSID). * @NL80211_SCHED_SCAN_MATCH_ATTR_MAX: highest scheduled scan filter * attribute number currently defined * @__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST: internal use @@ -3220,6 +3223,7 @@ enum nl80211_sched_scan_match_attr { NL80211_SCHED_SCAN_MATCH_ATTR_RSSI, NL80211_SCHED_SCAN_MATCH_ATTR_RELATIVE_RSSI, NL80211_SCHED_SCAN_MATCH_ATTR_RSSI_ADJUST, + NL80211_SCHED_SCAN_MATCH_ATTR_BSSID, /* keep last */ __NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ac7e2314f9ec..dce69a87d4d0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -497,6 +497,7 @@ static const struct nla_policy nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = { [NL80211_SCHED_SCAN_MATCH_ATTR_SSID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_SSID_LEN }, + [NL80211_SCHED_SCAN_MATCH_ATTR_BSSID] = { .len = ETH_ALEN }, [NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 }, }; @@ -7036,8 +7037,15 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, NULL); if (err) return ERR_PTR(err); + + /* SSID and BSSID are mutually exclusive */ + if (tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID] && + tb[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID]) + return ERR_PTR(-EINVAL); + /* add other standalone attributes here */ - if (tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID]) { + if (tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID] || + tb[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID]) { n_match_sets++; continue; } @@ -7208,7 +7216,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, nla_for_each_nested(attr, attrs[NL80211_ATTR_SCHED_SCAN_MATCH], tmp) { - struct nlattr *ssid, *rssi; + struct nlattr *ssid, *bssid, *rssi; err = nla_parse_nested(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, @@ -7217,7 +7225,8 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, if (err) goto out_free; ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID]; - if (ssid) { + bssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID]; + if (ssid || bssid) { if (WARN_ON(i >= n_match_sets)) { /* this indicates a programming error, * the loop above should have verified @@ -7227,14 +7236,25 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, goto out_free; } - if (nla_len(ssid) > IEEE80211_MAX_SSID_LEN) { - err = -EINVAL; - goto out_free; + if (ssid) { + if (nla_len(ssid) > IEEE80211_MAX_SSID_LEN) { + err = -EINVAL; + goto out_free; + } + memcpy(request->match_sets[i].ssid.ssid, + nla_data(ssid), nla_len(ssid)); + request->match_sets[i].ssid.ssid_len = + nla_len(ssid); + } + if (bssid) { + if (nla_len(bssid) != ETH_ALEN) { + err = -EINVAL; + goto out_free; + } + memcpy(request->match_sets[i].bssid, + nla_data(bssid), ETH_ALEN); } - memcpy(request->match_sets[i].ssid.ssid, - nla_data(ssid), nla_len(ssid)); - request->match_sets[i].ssid.ssid_len = - nla_len(ssid); + /* special attribute - old implementation w/a */ request->match_sets[i].rssi_thold = default_match_rssi; -- cgit v1.2.3-71-gd317 From 1741937d475d91ed95abb37f07e8571e23b9a7fe Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Wed, 26 Apr 2017 14:50:00 +0100 Subject: uapi: change the type of struct statx_timestamp.tv_nsec to unsigned The comment asserting that the value of struct statx_timestamp.tv_nsec must be negative when statx_timestamp.tv_sec is negative, is wrong, as could be seen from the following example: #define _FILE_OFFSET_BITS 64 #include #include #include #include #include #include #include int main(void) { static const struct timespec ts[2] = { { .tv_nsec = UTIME_OMIT }, { .tv_sec = -2, .tv_nsec = 42 } }; assert(utimensat(AT_FDCWD, ".", ts, 0) == 0); struct stat st; assert(stat(".", &st) == 0); printf("st_mtim.tv_sec = %lld, st_mtim.tv_nsec = %lu\n", (long long) st.st_mtim.tv_sec, (unsigned long) st.st_mtim.tv_nsec); struct statx stx; assert(syscall(__NR_statx, AT_FDCWD, ".", 0, 0, &stx) == 0); printf("stx_mtime.tv_sec = %lld, stx_mtime.tv_nsec = %lu\n", (long long) stx.stx_mtime.tv_sec, (unsigned long) stx.stx_mtime.tv_nsec); return 0; } It expectedly prints: st_mtim.tv_sec = -2, st_mtim.tv_nsec = 42 stx_mtime.tv_sec = -2, stx_mtime.tv_nsec = 42 The more generic comment asserting that the value of struct statx_timestamp.tv_nsec might be negative is confusing to say the least. It contradicts both the struct stat.st_[acm]time_nsec tradition and struct timespec.tv_nsec requirements in utimensat syscall. If statx syscall ever returns a stx_[acm]time containing a negative tv_nsec that cannot be passed unmodified to utimensat syscall, it will cause an immense confusion. Fix this source of confusion by changing the type of struct statx_timestamp.tv_nsec from __s32 to __u32. Fixes: a528d35e8bfc ("statx: Add a system call to make enhanced file info available") Signed-off-by: Dmitry V. Levin Signed-off-by: David Howells cc: linux-api@vger.kernel.org cc: mtk.manpages@gmail.com Signed-off-by: Al Viro --- include/uapi/linux/stat.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index d538897b8e08..17b10304c393 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -48,17 +48,13 @@ * tv_sec holds the number of seconds before (negative) or after (positive) * 00:00:00 1st January 1970 UTC. * - * tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is - * negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time. - * - * Note that if both tv_sec and tv_nsec are non-zero, then the two values must - * either be both positive or both negative. + * tv_nsec holds a number of nanoseconds (0..999,999,999) after the tv_sec time. * * __reserved is held in case we need a yet finer resolution. */ struct statx_timestamp { __s64 tv_sec; - __s32 tv_nsec; + __u32 tv_nsec; __s32 __reserved; }; -- cgit v1.2.3-71-gd317 From 99f906e9ad7b6e79ffeda30f45906a8448b9d6a2 Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Wed, 26 Apr 2017 14:48:09 +0100 Subject: bridge: add per-port broadcast flood flag Support for l2 multicast flood control was added in commit b6cb5ac8331b ("net: bridge: add per-port multicast flood flag"). It allows broadcast as it was introduced specifically for unknown multicast flood control. But as broadcast is a special case of multicast, this may also need to be disabled. For this purpose, introduce a flag to disable the flooding of received l2 broadcasts. This approach is backwards compatible and provides flexibility in filtering for the desired packet types. Cc: Nikolay Aleksandrov Signed-off-by: Mike Manning Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + include/uapi/linux/if_link.h | 1 + net/bridge/br_forward.c | 24 +++++++++++++++++------- net/bridge/br_if.c | 2 +- net/bridge/br_netlink.c | 3 +++ net/bridge/br_sysfs_if.c | 2 ++ 6 files changed, 25 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index c5847dc75a93..0c16866a7aac 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -48,6 +48,7 @@ struct br_ip_list { #define BR_MCAST_FLOOD BIT(11) #define BR_MULTICAST_TO_UNICAST BIT(12) #define BR_VLAN_TUNNEL BIT(13) +#define BR_BCAST_FLOOD BIT(14) #define BR_DEFAULT_AGEING_TIME (300 * HZ) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 633aa0276d32..8e56ac70e0d1 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -323,6 +323,7 @@ enum { IFLA_BRPORT_MCAST_FLOOD, IFLA_BRPORT_MCAST_TO_UCAST, IFLA_BRPORT_VLAN_TUNNEL, + IFLA_BRPORT_BCAST_FLOOD, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 902af6ba481c..48fb17417fac 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -183,13 +183,23 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, struct net_bridge_port *p; list_for_each_entry_rcu(p, &br->port_list, list) { - /* Do not flood unicast traffic to ports that turn it off */ - if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD)) - continue; - /* Do not flood if mc off, except for traffic we originate */ - if (pkt_type == BR_PKT_MULTICAST && - !(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev) - continue; + /* Do not flood unicast traffic to ports that turn it off, nor + * other traffic if flood off, except for traffic we originate + */ + switch (pkt_type) { + case BR_PKT_UNICAST: + if (!(p->flags & BR_FLOOD)) + continue; + break; + case BR_PKT_MULTICAST: + if (!(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev) + continue; + break; + case BR_PKT_BROADCAST: + if (!(p->flags & BR_BCAST_FLOOD) && skb->dev != br->dev) + continue; + break; + } /* Do not flood to ports that enable proxy ARP */ if (p->flags & BR_PROXYARP) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index f3544d96155c..7f8d05cf9065 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -361,7 +361,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; p->port_no = index; - p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD; + p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD; br_init_port(p); br_set_state(p, BR_STATE_DISABLED); br_stp_port_timer_init(p); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 650986473577..a572db710d4e 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -189,6 +189,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, !!(p->flags & BR_FLOOD)) || nla_put_u8(skb, IFLA_BRPORT_MCAST_FLOOD, !!(p->flags & BR_MCAST_FLOOD)) || + nla_put_u8(skb, IFLA_BRPORT_BCAST_FLOOD, + !!(p->flags & BR_BCAST_FLOOD)) || nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) || nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI, !!(p->flags & BR_PROXYARP_WIFI)) || @@ -683,6 +685,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD); br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_TO_UCAST, BR_MULTICAST_TO_UNICAST); + br_set_port_flag(p, tb, IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD); br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP); br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI); diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 79aee759aba5..5d5d413a6cf8 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -173,6 +173,7 @@ BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD); BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP); BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI); BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD); +BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -221,6 +222,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_proxyarp, &brport_attr_proxyarp_wifi, &brport_attr_multicast_flood, + &brport_attr_broadcast_flood, NULL }; -- cgit v1.2.3-71-gd317 From 2c156ac71c6b2518f3d589190f2a8872a8764faf Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 27 Mar 2017 15:15:14 +0530 Subject: misc: Add host side PCI driver for PCI test function device Add PCI endpoint test driver that can verify base address register, legacy interrupt/MSI interrupt and read/write/copy buffers between host and device. The corresponding pci-epf-test function driver should be used on the EP side. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Bjorn Helgaas --- drivers/misc/Kconfig | 7 + drivers/misc/Makefile | 1 + drivers/misc/pci_endpoint_test.c | 534 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/Kbuild | 1 + include/uapi/linux/pcitest.h | 19 ++ 5 files changed, 562 insertions(+) create mode 100644 drivers/misc/pci_endpoint_test.c create mode 100644 include/uapi/linux/pcitest.h (limited to 'include/uapi') diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index c290990d73ed..527b115c4e23 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -771,6 +771,13 @@ config PANEL_BOOT_MESSAGE endif # PANEL +config PCI_ENDPOINT_TEST + depends on PCI + tristate "PCI Endpoint Test driver" + ---help--- + Enable this configuration option to enable the host side test driver + for PCI Endpoint. + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 7a3ea89339b4..6e139cd70421 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -54,6 +54,7 @@ obj-$(CONFIG_ECHO) += echo/ obj-$(CONFIG_VEXPRESS_SYSCFG) += vexpress-syscfg.o obj-$(CONFIG_CXL_BASE) += cxl/ obj-$(CONFIG_PANEL) += panel.o +obj-$(CONFIG_PCI_ENDPOINT_TEST) += pci_endpoint_test.o lkdtm-$(CONFIG_LKDTM) += lkdtm_core.o lkdtm-$(CONFIG_LKDTM) += lkdtm_bugs.o diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c new file mode 100644 index 000000000000..09c10f426b64 --- /dev/null +++ b/drivers/misc/pci_endpoint_test.c @@ -0,0 +1,534 @@ +/** + * Host side test driver to test endpoint functionality + * + * Copyright (C) 2017 Texas Instruments + * Author: Kishon Vijay Abraham I + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 of + * the License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#define DRV_MODULE_NAME "pci-endpoint-test" + +#define PCI_ENDPOINT_TEST_MAGIC 0x0 + +#define PCI_ENDPOINT_TEST_COMMAND 0x4 +#define COMMAND_RAISE_LEGACY_IRQ BIT(0) +#define COMMAND_RAISE_MSI_IRQ BIT(1) +#define MSI_NUMBER_SHIFT 2 +/* 6 bits for MSI number */ +#define COMMAND_READ BIT(8) +#define COMMAND_WRITE BIT(9) +#define COMMAND_COPY BIT(10) + +#define PCI_ENDPOINT_TEST_STATUS 0x8 +#define STATUS_READ_SUCCESS BIT(0) +#define STATUS_READ_FAIL BIT(1) +#define STATUS_WRITE_SUCCESS BIT(2) +#define STATUS_WRITE_FAIL BIT(3) +#define STATUS_COPY_SUCCESS BIT(4) +#define STATUS_COPY_FAIL BIT(5) +#define STATUS_IRQ_RAISED BIT(6) +#define STATUS_SRC_ADDR_INVALID BIT(7) +#define STATUS_DST_ADDR_INVALID BIT(8) + +#define PCI_ENDPOINT_TEST_LOWER_SRC_ADDR 0xc +#define PCI_ENDPOINT_TEST_UPPER_SRC_ADDR 0x10 + +#define PCI_ENDPOINT_TEST_LOWER_DST_ADDR 0x14 +#define PCI_ENDPOINT_TEST_UPPER_DST_ADDR 0x18 + +#define PCI_ENDPOINT_TEST_SIZE 0x1c +#define PCI_ENDPOINT_TEST_CHECKSUM 0x20 + +static DEFINE_IDA(pci_endpoint_test_ida); + +#define to_endpoint_test(priv) container_of((priv), struct pci_endpoint_test, \ + miscdev) +enum pci_barno { + BAR_0, + BAR_1, + BAR_2, + BAR_3, + BAR_4, + BAR_5, +}; + +struct pci_endpoint_test { + struct pci_dev *pdev; + void __iomem *base; + void __iomem *bar[6]; + struct completion irq_raised; + int last_irq; + /* mutex to protect the ioctls */ + struct mutex mutex; + struct miscdevice miscdev; +}; + +static int bar_size[] = { 4, 512, 1024, 16384, 131072, 1048576 }; + +static inline u32 pci_endpoint_test_readl(struct pci_endpoint_test *test, + u32 offset) +{ + return readl(test->base + offset); +} + +static inline void pci_endpoint_test_writel(struct pci_endpoint_test *test, + u32 offset, u32 value) +{ + writel(value, test->base + offset); +} + +static inline u32 pci_endpoint_test_bar_readl(struct pci_endpoint_test *test, + int bar, int offset) +{ + return readl(test->bar[bar] + offset); +} + +static inline void pci_endpoint_test_bar_writel(struct pci_endpoint_test *test, + int bar, u32 offset, u32 value) +{ + writel(value, test->bar[bar] + offset); +} + +static irqreturn_t pci_endpoint_test_irqhandler(int irq, void *dev_id) +{ + struct pci_endpoint_test *test = dev_id; + u32 reg; + + reg = pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_STATUS); + if (reg & STATUS_IRQ_RAISED) { + test->last_irq = irq; + complete(&test->irq_raised); + reg &= ~STATUS_IRQ_RAISED; + } + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_STATUS, + reg); + + return IRQ_HANDLED; +} + +static bool pci_endpoint_test_bar(struct pci_endpoint_test *test, + enum pci_barno barno) +{ + int j; + u32 val; + int size; + + if (!test->bar[barno]) + return false; + + size = bar_size[barno]; + + for (j = 0; j < size; j += 4) + pci_endpoint_test_bar_writel(test, barno, j, 0xA0A0A0A0); + + for (j = 0; j < size; j += 4) { + val = pci_endpoint_test_bar_readl(test, barno, j); + if (val != 0xA0A0A0A0) + return false; + } + + return true; +} + +static bool pci_endpoint_test_legacy_irq(struct pci_endpoint_test *test) +{ + u32 val; + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, + COMMAND_RAISE_LEGACY_IRQ); + val = wait_for_completion_timeout(&test->irq_raised, + msecs_to_jiffies(1000)); + if (!val) + return false; + + return true; +} + +static bool pci_endpoint_test_msi_irq(struct pci_endpoint_test *test, + u8 msi_num) +{ + u32 val; + struct pci_dev *pdev = test->pdev; + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, + msi_num << MSI_NUMBER_SHIFT | + COMMAND_RAISE_MSI_IRQ); + val = wait_for_completion_timeout(&test->irq_raised, + msecs_to_jiffies(1000)); + if (!val) + return false; + + if (test->last_irq - pdev->irq == msi_num - 1) + return true; + + return false; +} + +static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size) +{ + bool ret = false; + void *src_addr; + void *dst_addr; + dma_addr_t src_phys_addr; + dma_addr_t dst_phys_addr; + struct pci_dev *pdev = test->pdev; + struct device *dev = &pdev->dev; + u32 src_crc32; + u32 dst_crc32; + + src_addr = dma_alloc_coherent(dev, size, &src_phys_addr, GFP_KERNEL); + if (!src_addr) { + dev_err(dev, "failed to allocate source buffer\n"); + ret = false; + goto err; + } + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_SRC_ADDR, + lower_32_bits(src_phys_addr)); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_SRC_ADDR, + upper_32_bits(src_phys_addr)); + + get_random_bytes(src_addr, size); + src_crc32 = crc32_le(~0, src_addr, size); + + dst_addr = dma_alloc_coherent(dev, size, &dst_phys_addr, GFP_KERNEL); + if (!dst_addr) { + dev_err(dev, "failed to allocate destination address\n"); + ret = false; + goto err_src_addr; + } + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_DST_ADDR, + lower_32_bits(dst_phys_addr)); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_DST_ADDR, + upper_32_bits(dst_phys_addr)); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, + size); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, + 1 << MSI_NUMBER_SHIFT | COMMAND_COPY); + + wait_for_completion(&test->irq_raised); + + dst_crc32 = crc32_le(~0, dst_addr, size); + if (dst_crc32 == src_crc32) + ret = true; + + dma_free_coherent(dev, size, dst_addr, dst_phys_addr); + +err_src_addr: + dma_free_coherent(dev, size, src_addr, src_phys_addr); + +err: + return ret; +} + +static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size) +{ + bool ret = false; + u32 reg; + void *addr; + dma_addr_t phys_addr; + struct pci_dev *pdev = test->pdev; + struct device *dev = &pdev->dev; + u32 crc32; + + addr = dma_alloc_coherent(dev, size, &phys_addr, GFP_KERNEL); + if (!addr) { + dev_err(dev, "failed to allocate address\n"); + ret = false; + goto err; + } + + get_random_bytes(addr, size); + + crc32 = crc32_le(~0, addr, size); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_CHECKSUM, + crc32); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_SRC_ADDR, + lower_32_bits(phys_addr)); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_SRC_ADDR, + upper_32_bits(phys_addr)); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, size); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, + 1 << MSI_NUMBER_SHIFT | COMMAND_READ); + + wait_for_completion(&test->irq_raised); + + reg = pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_STATUS); + if (reg & STATUS_READ_SUCCESS) + ret = true; + + dma_free_coherent(dev, size, addr, phys_addr); + +err: + return ret; +} + +static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size) +{ + bool ret = false; + void *addr; + dma_addr_t phys_addr; + struct pci_dev *pdev = test->pdev; + struct device *dev = &pdev->dev; + u32 crc32; + + addr = dma_alloc_coherent(dev, size, &phys_addr, GFP_KERNEL); + if (!addr) { + dev_err(dev, "failed to allocate destination address\n"); + ret = false; + goto err; + } + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_DST_ADDR, + lower_32_bits(phys_addr)); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_DST_ADDR, + upper_32_bits(phys_addr)); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, size); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, + 1 << MSI_NUMBER_SHIFT | COMMAND_WRITE); + + wait_for_completion(&test->irq_raised); + + crc32 = crc32_le(~0, addr, size); + if (crc32 == pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_CHECKSUM)) + ret = true; + + dma_free_coherent(dev, size, addr, phys_addr); +err: + return ret; +} + +static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int ret = -EINVAL; + enum pci_barno bar; + struct pci_endpoint_test *test = to_endpoint_test(file->private_data); + + mutex_lock(&test->mutex); + switch (cmd) { + case PCITEST_BAR: + bar = arg; + if (bar < 0 || bar > 5) + goto ret; + ret = pci_endpoint_test_bar(test, bar); + break; + case PCITEST_LEGACY_IRQ: + ret = pci_endpoint_test_legacy_irq(test); + break; + case PCITEST_MSI: + ret = pci_endpoint_test_msi_irq(test, arg); + break; + case PCITEST_WRITE: + ret = pci_endpoint_test_write(test, arg); + break; + case PCITEST_READ: + ret = pci_endpoint_test_read(test, arg); + break; + case PCITEST_COPY: + ret = pci_endpoint_test_copy(test, arg); + break; + } + +ret: + mutex_unlock(&test->mutex); + return ret; +} + +static const struct file_operations pci_endpoint_test_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = pci_endpoint_test_ioctl, +}; + +static int pci_endpoint_test_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int i; + int err; + int irq; + int id; + char name[20]; + enum pci_barno bar; + void __iomem *base; + struct device *dev = &pdev->dev; + struct pci_endpoint_test *test; + struct miscdevice *misc_device; + + if (pci_is_bridge(pdev)) + return -ENODEV; + + test = devm_kzalloc(dev, sizeof(*test), GFP_KERNEL); + if (!test) + return -ENOMEM; + + test->pdev = pdev; + init_completion(&test->irq_raised); + mutex_init(&test->mutex); + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Cannot enable PCI device\n"); + return err; + } + + err = pci_request_regions(pdev, DRV_MODULE_NAME); + if (err) { + dev_err(dev, "Cannot obtain PCI resources\n"); + goto err_disable_pdev; + } + + pci_set_master(pdev); + + irq = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI); + if (irq < 0) + dev_err(dev, "failed to get MSI interrupts\n"); + + err = devm_request_irq(dev, pdev->irq, pci_endpoint_test_irqhandler, + IRQF_SHARED, DRV_MODULE_NAME, test); + if (err) { + dev_err(dev, "failed to request IRQ %d\n", pdev->irq); + goto err_disable_msi; + } + + for (i = 1; i < irq; i++) { + err = devm_request_irq(dev, pdev->irq + i, + pci_endpoint_test_irqhandler, + IRQF_SHARED, DRV_MODULE_NAME, test); + if (err) + dev_err(dev, "failed to request IRQ %d for MSI %d\n", + pdev->irq + i, i + 1); + } + + for (bar = BAR_0; bar <= BAR_5; bar++) { + base = pci_ioremap_bar(pdev, bar); + if (!base) { + dev_err(dev, "failed to read BAR%d\n", bar); + WARN_ON(bar == BAR_0); + } + test->bar[bar] = base; + } + + test->base = test->bar[0]; + if (!test->base) { + dev_err(dev, "Cannot perform PCI test without BAR0\n"); + goto err_iounmap; + } + + pci_set_drvdata(pdev, test); + + id = ida_simple_get(&pci_endpoint_test_ida, 0, 0, GFP_KERNEL); + if (id < 0) { + dev_err(dev, "unable to get id\n"); + goto err_iounmap; + } + + snprintf(name, sizeof(name), DRV_MODULE_NAME ".%d", id); + misc_device = &test->miscdev; + misc_device->minor = MISC_DYNAMIC_MINOR; + misc_device->name = name; + misc_device->fops = &pci_endpoint_test_fops, + + err = misc_register(misc_device); + if (err) { + dev_err(dev, "failed to register device\n"); + goto err_ida_remove; + } + + return 0; + +err_ida_remove: + ida_simple_remove(&pci_endpoint_test_ida, id); + +err_iounmap: + for (bar = BAR_0; bar <= BAR_5; bar++) { + if (test->bar[bar]) + pci_iounmap(pdev, test->bar[bar]); + } + +err_disable_msi: + pci_disable_msi(pdev); + pci_release_regions(pdev); + +err_disable_pdev: + pci_disable_device(pdev); + + return err; +} + +static void pci_endpoint_test_remove(struct pci_dev *pdev) +{ + int id; + enum pci_barno bar; + struct pci_endpoint_test *test = pci_get_drvdata(pdev); + struct miscdevice *misc_device = &test->miscdev; + + if (sscanf(misc_device->name, DRV_MODULE_NAME ".%d", &id) != 1) + return; + + misc_deregister(&test->miscdev); + ida_simple_remove(&pci_endpoint_test_ida, id); + for (bar = BAR_0; bar <= BAR_5; bar++) { + if (test->bar[bar]) + pci_iounmap(pdev, test->bar[bar]); + } + pci_disable_msi(pdev); + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +static const struct pci_device_id pci_endpoint_test_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA74x) }, + { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA72x) }, + { } +}; +MODULE_DEVICE_TABLE(pci, pci_endpoint_test_tbl); + +static struct pci_driver pci_endpoint_test_driver = { + .name = DRV_MODULE_NAME, + .id_table = pci_endpoint_test_tbl, + .probe = pci_endpoint_test_probe, + .remove = pci_endpoint_test_remove, +}; +module_pci_driver(pci_endpoint_test_driver); + +MODULE_DESCRIPTION("PCI ENDPOINT TEST HOST DRIVER"); +MODULE_AUTHOR("Kishon Vijay Abraham I "); +MODULE_LICENSE("GPL v2"); diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index dd9820b1c779..baee6db08287 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -333,6 +333,7 @@ header-y += parport.h header-y += patchkey.h header-y += pci.h header-y += pci_regs.h +header-y += pcitest.h header-y += perf_event.h header-y += personality.h header-y += pfkeyv2.h diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h new file mode 100644 index 000000000000..a6aa10c45ad1 --- /dev/null +++ b/include/uapi/linux/pcitest.h @@ -0,0 +1,19 @@ +/** + * pcitest.h - PCI test uapi defines + * + * Copyright (C) 2017 Texas Instruments + * Author: Kishon Vijay Abraham I + * + */ + +#ifndef __UAPI_LINUX_PCITEST_H +#define __UAPI_LINUX_PCITEST_H + +#define PCITEST_BAR _IO('P', 0x1) +#define PCITEST_LEGACY_IRQ _IO('P', 0x2) +#define PCITEST_MSI _IOW('P', 0x3, int) +#define PCITEST_WRITE _IOW('P', 0x4, unsigned long) +#define PCITEST_READ _IOW('P', 0x5, unsigned long) +#define PCITEST_COPY _IOW('P', 0x6, unsigned long) + +#endif /* __UAPI_LINUX_PCITEST_H */ -- cgit v1.2.3-71-gd317 From f92faaba11d862ad91139486db24f801aeabd68c Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Thu, 27 Apr 2017 13:35:32 +0300 Subject: RDMA/qedr: properly check atomic capabilities After checking the path upwards towards root complex, actualy check root complex atomic_req capability, and not our own NIC. Verify that the PCIe device control register's atomic egress block is cleared in the path. Verify that the PCIe version is at least 2. Signed-off-by: Ram Amrani Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/main.c | 77 +++++++++++++++++++++++---------------- include/uapi/linux/pci_regs.h | 1 + 2 files changed, 47 insertions(+), 31 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index ced0461d6e9f..c64dabe8ae6e 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -340,43 +340,58 @@ static void qedr_remove_sysfiles(struct qedr_dev *dev) static void qedr_pci_set_atomic(struct qedr_dev *dev, struct pci_dev *pdev) { struct pci_dev *bridge; - u32 val; - - dev->atomic_cap = IB_ATOMIC_NONE; + u32 ctl2, cap2; + u16 flags; + int rc; bridge = pdev->bus->self; if (!bridge) - return; - - /* Check whether we are connected directly or via a switch */ - while (bridge && bridge->bus->parent) { - DP_DEBUG(dev, QEDR_MSG_INIT, - "Device is not connected directly to root. bridge->bus->number=%d primary=%d\n", - bridge->bus->number, bridge->bus->primary); - /* Need to check Atomic Op Routing Supported all the way to - * root complex. - */ - pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val); - if (!(val & PCI_EXP_DEVCAP2_ATOMIC_ROUTE)) { - pcie_capability_clear_word(pdev, - PCI_EXP_DEVCTL2, - PCI_EXP_DEVCTL2_ATOMIC_REQ); - return; - } + goto disable; + + /* Check atomic routing support all the way to root complex */ + while (bridge->bus->parent) { + rc = pcie_capability_read_word(bridge, PCI_EXP_FLAGS, &flags); + if (rc || ((flags & PCI_EXP_FLAGS_VERS) < 2)) + goto disable; + + rc = pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &cap2); + if (rc) + goto disable; + + rc = pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2, &ctl2); + if (rc) + goto disable; + + if (!(cap2 & PCI_EXP_DEVCAP2_ATOMIC_ROUTE) || + (ctl2 & PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK)) + goto disable; bridge = bridge->bus->parent->self; } - bridge = pdev->bus->self; - /* according to bridge capability */ - pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val); - if (val & PCI_EXP_DEVCAP2_ATOMIC_COMP64) { - pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2, - PCI_EXP_DEVCTL2_ATOMIC_REQ); - dev->atomic_cap = IB_ATOMIC_GLOB; - } else { - pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL2, - PCI_EXP_DEVCTL2_ATOMIC_REQ); - } + rc = pcie_capability_read_word(bridge, PCI_EXP_FLAGS, &flags); + if (rc || ((flags & PCI_EXP_FLAGS_VERS) < 2)) + goto disable; + + rc = pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &cap2); + if (rc || !(cap2 & PCI_EXP_DEVCAP2_ATOMIC_COMP64)) + goto disable; + + /* Set atomic operations */ + pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2, + PCI_EXP_DEVCTL2_ATOMIC_REQ); + dev->atomic_cap = IB_ATOMIC_GLOB; + + DP_DEBUG(dev, QEDR_MSG_INIT, "Atomic capability enabled\n"); + + return; + +disable: + pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL2, + PCI_EXP_DEVCTL2_ATOMIC_REQ); + dev->atomic_cap = IB_ATOMIC_NONE; + + DP_DEBUG(dev, QEDR_MSG_INIT, "Atomic capability disabled\n"); + } static const struct qed_rdma_ops *qed_ops; diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 634c9c44ed6c..18a26c16bd80 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -630,6 +630,7 @@ #define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ #define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */ #define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040 /* Set Atomic requests */ +#define PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK 0x0080 /* Block atomic egress */ #define PCI_EXP_DEVCTL2_IDO_REQ_EN 0x0100 /* Allow IDO for requests */ #define PCI_EXP_DEVCTL2_IDO_CMP_EN 0x0200 /* Allow IDO for completions */ #define PCI_EXP_DEVCTL2_LTR_EN 0x0400 /* Enable LTR mechanism */ -- cgit v1.2.3-71-gd317 From d7b1eeb2ca039d04f1a1fcb241920cb112b4b52a Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 7 Apr 2017 18:39:07 +0800 Subject: drm/amdgpu:fix race condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sequence is protected by spinlock so don't access sequence in paramter seq when invoking this function. ~0 means to get the latest sequence number and 0 means none to get. Change-Id: Ib7a03f3cf5594deeb4ad333cc59b47a6bddfd1ad Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 3 +++ include/uapi/drm/amdgpu_drm.h | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index cf0500671353..90d1ac8a80f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -273,6 +273,9 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, spin_lock(&ctx->ring_lock); + if (seq == ~0ull) + seq = ctx->rings[ring->idx].sequence - 1; + if (seq >= cring->sequence) { spin_unlock(&ctx->ring_lock); return ERR_PTR(-EINVAL); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 516a9f285730..92262d81d41e 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -295,7 +295,10 @@ union drm_amdgpu_gem_wait_idle { }; struct drm_amdgpu_wait_cs_in { - /** Command submission handle */ + /* Command submission handle + * handle equals 0 means none to wait for + * handle equal ~0ull meanas wait for the latest sequence number + */ __u64 handle; /** Absolute timeout to wait */ __u64 timeout; -- cgit v1.2.3-71-gd317 From 080b24ebdf230c80fe63e64633c0d52aca5d1a8e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 10 Apr 2017 15:32:43 -0400 Subject: drm/amdgpu: fix spelling in header comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 92262d81d41e..95260e5043af 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -297,7 +297,7 @@ union drm_amdgpu_gem_wait_idle { struct drm_amdgpu_wait_cs_in { /* Command submission handle * handle equals 0 means none to wait for - * handle equal ~0ull meanas wait for the latest sequence number + * handle equals ~0ull means wait for the latest sequence number */ __u64 handle; /** Absolute timeout to wait */ -- cgit v1.2.3-71-gd317 From 408bfe7c3c5d036947b509356f494dc6b46025ff Mon Sep 17 00:00:00 2001 From: Junwei Zhang Date: Thu, 27 Apr 2017 11:12:07 +0800 Subject: drm/amdgpu: export more gpu info for gfx9 v2: 64-bit aligned for gpu info v3: squash in wave_front_fix Signed-off-by: Ken Wang Signed-off-by: Junwei Zhang Reviewed-by: Alex Deucher Reviewed-by: Qiang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 11 +++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 +++ include/uapi/drm/amdgpu_drm.h | 19 +++++++++++++++++++ 4 files changed, 37 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a4fc54c70f30..c9f935710d40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -967,6 +967,9 @@ struct amdgpu_gfx_config { unsigned mc_arb_ramcfg; unsigned gb_addr_config; unsigned num_rbs; + unsigned gs_vgt_table_depth; + unsigned gs_prim_buffer_depth; + unsigned max_gs_waves_per_vgt; uint32_t tile_mode_array[32]; uint32_t macrotile_mode_array[16]; @@ -981,6 +984,7 @@ struct amdgpu_gfx_config { struct amdgpu_cu_info { uint32_t number; /* total active CU number */ uint32_t ao_cu_mask; + uint32_t wave_front_size; uint32_t bitmap[4][4]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 832be632478f..21f616cdb279 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -546,10 +546,21 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (amdgpu_ngg) { dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[PRIM].gpu_addr; + dev_info.prim_buf_size = adev->gfx.ngg.buf[PRIM].size; dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[POS].gpu_addr; + dev_info.pos_buf_size = adev->gfx.ngg.buf[POS].size; dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[CNTL].gpu_addr; + dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[CNTL].size; dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[PARAM].gpu_addr; + dev_info.param_buf_size = adev->gfx.ngg.buf[PARAM].size; } + dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size; + dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs; + dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh; + dev_info.num_tcc_blocks = adev->gfx.config.max_texture_channel_caches; + dev_info.gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth; + dev_info.gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth; + dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_waves_per_vgt; return copy_to_user(out, &dev_info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 9a9cb90e2f5f..210d21c085f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -785,6 +785,9 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.sc_prim_fifo_size_backend = 0x100; adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + adev->gfx.config.gs_vgt_table_depth = 32; + adev->gfx.config.gs_prim_buffer_depth = 1792; + adev->gfx.config.max_gs_waves_per_vgt = 32; gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; break; default: diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 95260e5043af..6c249e5cfb09 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -767,6 +767,25 @@ struct drm_amdgpu_info_device { __u64 cntl_sb_buf_gpu_addr; /* NGG Parameter Cache */ __u64 param_buf_gpu_addr; + __u32 prim_buf_size; + __u32 pos_buf_size; + __u32 cntl_sb_buf_size; + __u32 param_buf_size; + /* wavefront size*/ + __u32 wave_front_size; + /* shader visible vgprs*/ + __u32 num_shader_visible_vgprs; + /* CU per shader array*/ + __u32 num_cu_per_sh; + /* number of tcc blocks*/ + __u32 num_tcc_blocks; + /* gs vgt table depth*/ + __u32 gs_vgt_table_depth; + /* gs primitive buffer depth*/ + __u32 gs_prim_buffer_depth; + /* max gs wavefront per vgt*/ + __u32 max_gs_waves_per_vgt; + __u32 _pad1; }; struct drm_amdgpu_info_hw_ip { -- cgit v1.2.3-71-gd317 From 9c8268def6127a9d9888b822a74becb80dfeab6f Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Thu, 6 Apr 2017 16:14:05 -0700 Subject: fscrypt: Move key structure and constants to uapi This commit exposes the necessary constants and structures for a userspace program to pass filesystem encryption keys into the keyring. The fscrypt_key structure was already part of the kernel ABI, this change just makes it so programs no longer have to redeclare these structures (like e4crypt in e2fsprogs currently does). Note that we do not expose the other FS_*_KEY_SIZE constants as they are not necessary. Only XTS is supported for contents_encryption_mode, so currently FS_MAX_KEY_SIZE bytes of key material must always be passed to the kernel. This commit also removes __packed from fscrypt_key as it does not contain any implicit padding and does not refer to an on-disk structure. Signed-off-by: Joe Richey Signed-off-by: Theodore Ts'o --- fs/crypto/fscrypt_private.h | 11 ----------- include/uapi/linux/fs.h | 13 +++++++++++++ 2 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index e39696e64494..e08ca6d1ca0f 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -22,10 +22,6 @@ #define FS_AES_256_CBC_KEY_SIZE 32 #define FS_AES_256_CTS_KEY_SIZE 32 #define FS_AES_256_XTS_KEY_SIZE 64 -#define FS_MAX_KEY_SIZE 64 - -#define FS_KEY_DESC_PREFIX "fscrypt:" -#define FS_KEY_DESC_PREFIX_SIZE 8 #define FS_KEY_DERIVATION_NONCE_SIZE 16 @@ -51,13 +47,6 @@ struct fscrypt_context { #define FS_ENCRYPTION_CONTEXT_FORMAT_V1 1 -/* This is passed in from userspace into the kernel keyring */ -struct fscrypt_key { - u32 mode; - u8 raw[FS_MAX_KEY_SIZE]; - u32 size; -} __packed; - /* * A pointer to this structure is stored in the file system's in-core * representation of an inode. diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 048a85e9f017..9691fda01245 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -285,6 +285,19 @@ struct fscrypt_policy { #define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) #define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy) +/* Parameters for passing an encryption key into the kernel keyring */ +#define FS_KEY_DESC_PREFIX "fscrypt:" +#define FS_KEY_DESC_PREFIX_SIZE 8 + +/* Structure that userspace passes to the kernel keyring */ +#define FS_MAX_KEY_SIZE 64 + +struct fscrypt_key { + __u32 mode; + __u8 raw[FS_MAX_KEY_SIZE]; + __u32 size; +}; + /* * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) * -- cgit v1.2.3-71-gd317 From 960e6994ad9cf10ddd4d3680a2d6cf5159c93a83 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Thu, 6 Apr 2017 16:14:28 -0700 Subject: fscrypt: Remove __packed from fscrypt_policy This commit removes __packed from fscrypt_policy as it does not contain any implicit padding and does not refer to an on-disk structure. Even though this is a change to a UAPI file, no users will be broken as the structure doesn't change. Signed-off-by: Joe Richey Signed-off-by: Theodore Ts'o --- include/uapi/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 9691fda01245..24e61a54feaa 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -279,7 +279,7 @@ struct fscrypt_policy { __u8 filenames_encryption_mode; __u8 flags; __u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; -} __packed; +}; #define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy) #define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) -- cgit v1.2.3-71-gd317 From 5d4e3443287b28a6e9c3436adbadba2497880bd1 Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Wed, 26 Apr 2017 16:41:23 -0700 Subject: bpf: Fix inaccurate helper function description The description inside uapi/linux/bpf.h about bpf_get_socket_uid helper function is no longer valid. It returns overflowuid rather than 0 when failed. Signed-off-by: Chenbo Feng Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e553529929f6..945a1f5f63c5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -481,8 +481,7 @@ union bpf_attr { * u32 bpf_get_socket_uid(skb) * Get the owner uid of the socket stored inside sk_buff. * @skb: pointer to skb - * Return: uid of the socket owner on success or 0 if the socket pointer - * inside sk_buff is NULL + * Return: uid of the socket owner on success or overflowuid if failed. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ -- cgit v1.2.3-71-gd317 From 26202873bb51fafdaa51be3e8de7aab9beb49f70 Mon Sep 17 00:00:00 2001 From: Hans-Christian Noren Egtvedt Date: Sun, 26 Feb 2017 12:56:39 +0100 Subject: avr32: remove support for AVR32 architecture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch drops support for AVR32 architecture from the Linux kernel. The AVR32 architecture is not keeping up with the development of the kernel, and since it shares so much of the drivers with Atmel ARM SoC, it is starting to hinder these drivers to develop swiftly. Also, all AVR32 AP7 SoC processors are end of lifed from Atmel (now Microchip). Finally, the GCC toolchain is stuck at version 4.2.x, and has not received any patches since the last release from Atmel; 4.2.4-atmel.1.1.3.avr32linux.1. When building kernel v4.10, this toolchain is no longer able to properly link the network stack. Haavard and I have came to the conclusion that we feel keeping AVR32 on life support offers more obstacles for Atmel ARMs, than it gives joy to AVR32 users. I also suspect there are very few AVR32 users left today, if anybody at all. Signed-off-by: Hans-Christian Noren Egtvedt Signed-off-by: Håvard Skinnemoen Signed-off-by: Nicolas Ferre Acked-by: Andy Shevchenko Acked-by: Boris Brezillon --- CREDITS | 8 + MAINTAINERS | 15 - arch/avr32/Kconfig | 288 --- arch/avr32/Kconfig.debug | 9 - arch/avr32/Makefile | 84 - arch/avr32/boards/atngw100/Kconfig | 65 - arch/avr32/boards/atngw100/Kconfig_mrmt | 80 - arch/avr32/boards/atngw100/Makefile | 3 - arch/avr32/boards/atngw100/evklcd10x.c | 178 -- arch/avr32/boards/atngw100/flash.c | 98 - arch/avr32/boards/atngw100/mrmt.c | 382 ---- arch/avr32/boards/atngw100/setup.c | 324 --- arch/avr32/boards/atstk1000/Kconfig | 109 - arch/avr32/boards/atstk1000/Makefile | 5 - arch/avr32/boards/atstk1000/atstk1000.h | 17 - arch/avr32/boards/atstk1000/atstk1002.c | 330 --- arch/avr32/boards/atstk1000/atstk1003.c | 162 -- arch/avr32/boards/atstk1000/atstk1004.c | 164 -- arch/avr32/boards/atstk1000/flash.c | 98 - arch/avr32/boards/atstk1000/setup.c | 127 -- arch/avr32/boards/favr-32/Kconfig | 22 - arch/avr32/boards/favr-32/Makefile | 1 - arch/avr32/boards/favr-32/flash.c | 98 - arch/avr32/boards/favr-32/setup.c | 366 --- arch/avr32/boards/hammerhead/Kconfig | 43 - arch/avr32/boards/hammerhead/Makefile | 1 - arch/avr32/boards/hammerhead/flash.c | 381 ---- arch/avr32/boards/hammerhead/flash.h | 6 - arch/avr32/boards/hammerhead/setup.c | 247 -- arch/avr32/boards/merisc/Kconfig | 5 - arch/avr32/boards/merisc/Makefile | 1 - arch/avr32/boards/merisc/display.c | 65 - arch/avr32/boards/merisc/flash.c | 139 -- arch/avr32/boards/merisc/merisc.h | 18 - arch/avr32/boards/merisc/merisc_sysfs.c | 64 - arch/avr32/boards/merisc/setup.c | 305 --- arch/avr32/boards/mimc200/Makefile | 1 - arch/avr32/boards/mimc200/flash.c | 143 -- arch/avr32/boards/mimc200/setup.c | 236 -- arch/avr32/boot/images/.gitignore | 4 - arch/avr32/boot/images/Makefile | 57 - arch/avr32/boot/u-boot/Makefile | 3 - arch/avr32/boot/u-boot/empty.S | 1 - arch/avr32/boot/u-boot/head.S | 83 - arch/avr32/configs/atngw100_defconfig | 142 -- arch/avr32/configs/atngw100_evklcd100_defconfig | 158 -- arch/avr32/configs/atngw100_evklcd101_defconfig | 157 -- arch/avr32/configs/atngw100_mrmt_defconfig | 136 -- arch/avr32/configs/atngw100mkii_defconfig | 144 -- .../avr32/configs/atngw100mkii_evklcd100_defconfig | 161 -- .../avr32/configs/atngw100mkii_evklcd101_defconfig | 160 -- arch/avr32/configs/atstk1002_defconfig | 157 -- arch/avr32/configs/atstk1003_defconfig | 137 -- arch/avr32/configs/atstk1004_defconfig | 135 -- arch/avr32/configs/atstk1006_defconfig | 160 -- arch/avr32/configs/favr-32_defconfig | 143 -- arch/avr32/configs/hammerhead_defconfig | 145 -- arch/avr32/configs/merisc_defconfig | 115 - arch/avr32/configs/mimc200_defconfig | 114 - arch/avr32/include/asm/Kbuild | 23 - arch/avr32/include/asm/addrspace.h | 43 - arch/avr32/include/asm/asm-offsets.h | 1 - arch/avr32/include/asm/asm.h | 102 - arch/avr32/include/asm/atomic.h | 243 -- arch/avr32/include/asm/barrier.h | 22 - arch/avr32/include/asm/bitops.h | 314 --- arch/avr32/include/asm/bug.h | 78 - arch/avr32/include/asm/bugs.h | 15 - arch/avr32/include/asm/cache.h | 38 - arch/avr32/include/asm/cacheflush.h | 132 -- arch/avr32/include/asm/checksum.h | 150 -- arch/avr32/include/asm/cmpxchg.h | 115 - arch/avr32/include/asm/current.h | 15 - arch/avr32/include/asm/dma-mapping.h | 14 - arch/avr32/include/asm/dma.h | 8 - arch/avr32/include/asm/elf.h | 105 - arch/avr32/include/asm/fb.h | 21 - arch/avr32/include/asm/ftrace.h | 1 - arch/avr32/include/asm/gpio.h | 6 - arch/avr32/include/asm/hardirq.h | 6 - arch/avr32/include/asm/hw_irq.h | 9 - arch/avr32/include/asm/io.h | 329 --- arch/avr32/include/asm/irq.h | 24 - arch/avr32/include/asm/irqflags.h | 61 - arch/avr32/include/asm/kdebug.h | 12 - arch/avr32/include/asm/kmap_types.h | 10 - arch/avr32/include/asm/kprobes.h | 54 - arch/avr32/include/asm/linkage.h | 7 - arch/avr32/include/asm/mmu.h | 10 - arch/avr32/include/asm/mmu_context.h | 150 -- arch/avr32/include/asm/module.h | 26 - arch/avr32/include/asm/ocd.h | 543 ----- arch/avr32/include/asm/page.h | 104 - arch/avr32/include/asm/pci.h | 8 - arch/avr32/include/asm/pgalloc.h | 102 - arch/avr32/include/asm/pgtable-2level.h | 48 - arch/avr32/include/asm/pgtable.h | 347 --- arch/avr32/include/asm/processor.h | 166 -- arch/avr32/include/asm/ptrace.h | 45 - arch/avr32/include/asm/serial.h | 13 - arch/avr32/include/asm/setup.h | 144 -- arch/avr32/include/asm/shmparam.h | 6 - arch/avr32/include/asm/signal.h | 31 - arch/avr32/include/asm/string.h | 17 - arch/avr32/include/asm/switch_to.h | 49 - arch/avr32/include/asm/syscalls.h | 21 - arch/avr32/include/asm/sysreg.h | 291 --- arch/avr32/include/asm/termios.h | 23 - arch/avr32/include/asm/thread_info.h | 103 - arch/avr32/include/asm/timex.h | 39 - arch/avr32/include/asm/tlb.h | 32 - arch/avr32/include/asm/tlbflush.h | 32 - arch/avr32/include/asm/traps.h | 23 - arch/avr32/include/asm/types.h | 19 - arch/avr32/include/asm/uaccess.h | 337 --- arch/avr32/include/asm/ucontext.h | 12 - arch/avr32/include/asm/unaligned.h | 21 - arch/avr32/include/asm/unistd.h | 44 - arch/avr32/include/asm/user.h | 65 - arch/avr32/include/uapi/asm/Kbuild | 36 - arch/avr32/include/uapi/asm/auxvec.h | 4 - arch/avr32/include/uapi/asm/byteorder.h | 9 - arch/avr32/include/uapi/asm/cachectl.h | 11 - arch/avr32/include/uapi/asm/msgbuf.h | 31 - arch/avr32/include/uapi/asm/posix_types.h | 37 - arch/avr32/include/uapi/asm/ptrace.h | 126 -- arch/avr32/include/uapi/asm/sembuf.h | 25 - arch/avr32/include/uapi/asm/setup.h | 16 - arch/avr32/include/uapi/asm/shmbuf.h | 42 - arch/avr32/include/uapi/asm/sigcontext.h | 34 - arch/avr32/include/uapi/asm/signal.h | 121 - arch/avr32/include/uapi/asm/socket.h | 95 - arch/avr32/include/uapi/asm/sockios.h | 13 - arch/avr32/include/uapi/asm/stat.h | 79 - arch/avr32/include/uapi/asm/swab.h | 35 - arch/avr32/include/uapi/asm/termbits.h | 196 -- arch/avr32/include/uapi/asm/termios.h | 49 - arch/avr32/include/uapi/asm/types.h | 13 - arch/avr32/include/uapi/asm/unistd.h | 347 --- arch/avr32/kernel/.gitignore | 1 - arch/avr32/kernel/Makefile | 15 - arch/avr32/kernel/asm-offsets.c | 24 - arch/avr32/kernel/avr32_ksyms.c | 70 - arch/avr32/kernel/cpu.c | 410 ---- arch/avr32/kernel/entry-avr32b.S | 877 -------- arch/avr32/kernel/head.S | 22 - arch/avr32/kernel/irq.c | 28 - arch/avr32/kernel/kprobes.c | 267 --- arch/avr32/kernel/module.c | 291 --- arch/avr32/kernel/nmi_debug.c | 83 - arch/avr32/kernel/ocd.c | 167 -- arch/avr32/kernel/process.c | 358 --- arch/avr32/kernel/ptrace.c | 357 --- arch/avr32/kernel/setup.c | 609 ----- arch/avr32/kernel/signal.c | 288 --- arch/avr32/kernel/stacktrace.c | 56 - arch/avr32/kernel/switch_to.S | 35 - arch/avr32/kernel/syscall-stubs.S | 153 -- arch/avr32/kernel/syscall_table.S | 347 --- arch/avr32/kernel/time.c | 161 -- arch/avr32/kernel/traps.c | 262 --- arch/avr32/kernel/vmlinux.lds.S | 89 - arch/avr32/lib/Makefile | 11 - arch/avr32/lib/__avr32_asr64.S | 31 - arch/avr32/lib/__avr32_lsl64.S | 31 - arch/avr32/lib/__avr32_lsr64.S | 31 - arch/avr32/lib/clear_user.S | 76 - arch/avr32/lib/copy_user.S | 119 - arch/avr32/lib/csum_partial.S | 47 - arch/avr32/lib/csum_partial_copy_generic.S | 99 - arch/avr32/lib/delay.c | 57 - arch/avr32/lib/findbit.S | 185 -- arch/avr32/lib/io-readsb.S | 49 - arch/avr32/lib/io-readsl.S | 24 - arch/avr32/lib/io-readsw.S | 43 - arch/avr32/lib/io-writesb.S | 52 - arch/avr32/lib/io-writesl.S | 20 - arch/avr32/lib/io-writesw.S | 38 - arch/avr32/lib/memcpy.S | 72 - arch/avr32/lib/memset.S | 72 - arch/avr32/lib/strncpy_from_user.S | 60 - arch/avr32/lib/strnlen_user.S | 67 - arch/avr32/mach-at32ap/Kconfig | 31 - arch/avr32/mach-at32ap/Makefile | 8 - arch/avr32/mach-at32ap/at32ap700x.c | 2368 -------------------- arch/avr32/mach-at32ap/clock.c | 334 --- arch/avr32/mach-at32ap/clock.h | 35 - arch/avr32/mach-at32ap/extint.c | 271 --- arch/avr32/mach-at32ap/hmatrix.c | 88 - arch/avr32/mach-at32ap/hsmc.c | 282 --- arch/avr32/mach-at32ap/hsmc.h | 127 -- arch/avr32/mach-at32ap/include/mach/at32ap700x.h | 245 -- arch/avr32/mach-at32ap/include/mach/board.h | 115 - arch/avr32/mach-at32ap/include/mach/chip.h | 19 - arch/avr32/mach-at32ap/include/mach/cpu.h | 23 - arch/avr32/mach-at32ap/include/mach/gpio.h | 45 - arch/avr32/mach-at32ap/include/mach/hmatrix.h | 55 - arch/avr32/mach-at32ap/include/mach/init.h | 18 - arch/avr32/mach-at32ap/include/mach/io.h | 38 - arch/avr32/mach-at32ap/include/mach/irq.h | 14 - arch/avr32/mach-at32ap/include/mach/pm.h | 27 - arch/avr32/mach-at32ap/include/mach/portmux.h | 30 - arch/avr32/mach-at32ap/include/mach/smc.h | 113 - arch/avr32/mach-at32ap/include/mach/sram.h | 30 - arch/avr32/mach-at32ap/intc.c | 200 -- arch/avr32/mach-at32ap/intc.h | 329 --- arch/avr32/mach-at32ap/pdc.c | 47 - arch/avr32/mach-at32ap/pio.c | 470 ---- arch/avr32/mach-at32ap/pio.h | 180 -- arch/avr32/mach-at32ap/pm-at32ap700x.S | 167 -- arch/avr32/mach-at32ap/pm.c | 243 -- arch/avr32/mach-at32ap/pm.h | 112 - arch/avr32/mach-at32ap/sdramc.h | 76 - arch/avr32/mm/Makefile | 6 - arch/avr32/mm/cache.c | 163 -- arch/avr32/mm/clear_page.S | 25 - arch/avr32/mm/copy_page.S | 28 - arch/avr32/mm/dma-coherent.c | 202 -- arch/avr32/mm/fault.c | 268 --- arch/avr32/mm/init.c | 125 -- arch/avr32/mm/ioremap.c | 93 - arch/avr32/mm/tlb.c | 375 ---- arch/avr32/oprofile/Makefile | 8 - arch/avr32/oprofile/backtrace.c | 81 - arch/avr32/oprofile/op_model_avr32.c | 236 -- include/uapi/linux/elf-em.h | 1 - 226 files changed, 8 insertions(+), 27045 deletions(-) delete mode 100644 arch/avr32/Kconfig delete mode 100644 arch/avr32/Kconfig.debug delete mode 100644 arch/avr32/Makefile delete mode 100644 arch/avr32/boards/atngw100/Kconfig delete mode 100644 arch/avr32/boards/atngw100/Kconfig_mrmt delete mode 100644 arch/avr32/boards/atngw100/Makefile delete mode 100644 arch/avr32/boards/atngw100/evklcd10x.c delete mode 100644 arch/avr32/boards/atngw100/flash.c delete mode 100644 arch/avr32/boards/atngw100/mrmt.c delete mode 100644 arch/avr32/boards/atngw100/setup.c delete mode 100644 arch/avr32/boards/atstk1000/Kconfig delete mode 100644 arch/avr32/boards/atstk1000/Makefile delete mode 100644 arch/avr32/boards/atstk1000/atstk1000.h delete mode 100644 arch/avr32/boards/atstk1000/atstk1002.c delete mode 100644 arch/avr32/boards/atstk1000/atstk1003.c delete mode 100644 arch/avr32/boards/atstk1000/atstk1004.c delete mode 100644 arch/avr32/boards/atstk1000/flash.c delete mode 100644 arch/avr32/boards/atstk1000/setup.c delete mode 100644 arch/avr32/boards/favr-32/Kconfig delete mode 100644 arch/avr32/boards/favr-32/Makefile delete mode 100644 arch/avr32/boards/favr-32/flash.c delete mode 100644 arch/avr32/boards/favr-32/setup.c delete mode 100644 arch/avr32/boards/hammerhead/Kconfig delete mode 100644 arch/avr32/boards/hammerhead/Makefile delete mode 100644 arch/avr32/boards/hammerhead/flash.c delete mode 100644 arch/avr32/boards/hammerhead/flash.h delete mode 100644 arch/avr32/boards/hammerhead/setup.c delete mode 100644 arch/avr32/boards/merisc/Kconfig delete mode 100644 arch/avr32/boards/merisc/Makefile delete mode 100644 arch/avr32/boards/merisc/display.c delete mode 100644 arch/avr32/boards/merisc/flash.c delete mode 100644 arch/avr32/boards/merisc/merisc.h delete mode 100644 arch/avr32/boards/merisc/merisc_sysfs.c delete mode 100644 arch/avr32/boards/merisc/setup.c delete mode 100644 arch/avr32/boards/mimc200/Makefile delete mode 100644 arch/avr32/boards/mimc200/flash.c delete mode 100644 arch/avr32/boards/mimc200/setup.c delete mode 100644 arch/avr32/boot/images/.gitignore delete mode 100644 arch/avr32/boot/images/Makefile delete mode 100644 arch/avr32/boot/u-boot/Makefile delete mode 100644 arch/avr32/boot/u-boot/empty.S delete mode 100644 arch/avr32/boot/u-boot/head.S delete mode 100644 arch/avr32/configs/atngw100_defconfig delete mode 100644 arch/avr32/configs/atngw100_evklcd100_defconfig delete mode 100644 arch/avr32/configs/atngw100_evklcd101_defconfig delete mode 100644 arch/avr32/configs/atngw100_mrmt_defconfig delete mode 100644 arch/avr32/configs/atngw100mkii_defconfig delete mode 100644 arch/avr32/configs/atngw100mkii_evklcd100_defconfig delete mode 100644 arch/avr32/configs/atngw100mkii_evklcd101_defconfig delete mode 100644 arch/avr32/configs/atstk1002_defconfig delete mode 100644 arch/avr32/configs/atstk1003_defconfig delete mode 100644 arch/avr32/configs/atstk1004_defconfig delete mode 100644 arch/avr32/configs/atstk1006_defconfig delete mode 100644 arch/avr32/configs/favr-32_defconfig delete mode 100644 arch/avr32/configs/hammerhead_defconfig delete mode 100644 arch/avr32/configs/merisc_defconfig delete mode 100644 arch/avr32/configs/mimc200_defconfig delete mode 100644 arch/avr32/include/asm/Kbuild delete mode 100644 arch/avr32/include/asm/addrspace.h delete mode 100644 arch/avr32/include/asm/asm-offsets.h delete mode 100644 arch/avr32/include/asm/asm.h delete mode 100644 arch/avr32/include/asm/atomic.h delete mode 100644 arch/avr32/include/asm/barrier.h delete mode 100644 arch/avr32/include/asm/bitops.h delete mode 100644 arch/avr32/include/asm/bug.h delete mode 100644 arch/avr32/include/asm/bugs.h delete mode 100644 arch/avr32/include/asm/cache.h delete mode 100644 arch/avr32/include/asm/cacheflush.h delete mode 100644 arch/avr32/include/asm/checksum.h delete mode 100644 arch/avr32/include/asm/cmpxchg.h delete mode 100644 arch/avr32/include/asm/current.h delete mode 100644 arch/avr32/include/asm/dma-mapping.h delete mode 100644 arch/avr32/include/asm/dma.h delete mode 100644 arch/avr32/include/asm/elf.h delete mode 100644 arch/avr32/include/asm/fb.h delete mode 100644 arch/avr32/include/asm/ftrace.h delete mode 100644 arch/avr32/include/asm/gpio.h delete mode 100644 arch/avr32/include/asm/hardirq.h delete mode 100644 arch/avr32/include/asm/hw_irq.h delete mode 100644 arch/avr32/include/asm/io.h delete mode 100644 arch/avr32/include/asm/irq.h delete mode 100644 arch/avr32/include/asm/irqflags.h delete mode 100644 arch/avr32/include/asm/kdebug.h delete mode 100644 arch/avr32/include/asm/kmap_types.h delete mode 100644 arch/avr32/include/asm/kprobes.h delete mode 100644 arch/avr32/include/asm/linkage.h delete mode 100644 arch/avr32/include/asm/mmu.h delete mode 100644 arch/avr32/include/asm/mmu_context.h delete mode 100644 arch/avr32/include/asm/module.h delete mode 100644 arch/avr32/include/asm/ocd.h delete mode 100644 arch/avr32/include/asm/page.h delete mode 100644 arch/avr32/include/asm/pci.h delete mode 100644 arch/avr32/include/asm/pgalloc.h delete mode 100644 arch/avr32/include/asm/pgtable-2level.h delete mode 100644 arch/avr32/include/asm/pgtable.h delete mode 100644 arch/avr32/include/asm/processor.h delete mode 100644 arch/avr32/include/asm/ptrace.h delete mode 100644 arch/avr32/include/asm/serial.h delete mode 100644 arch/avr32/include/asm/setup.h delete mode 100644 arch/avr32/include/asm/shmparam.h delete mode 100644 arch/avr32/include/asm/signal.h delete mode 100644 arch/avr32/include/asm/string.h delete mode 100644 arch/avr32/include/asm/switch_to.h delete mode 100644 arch/avr32/include/asm/syscalls.h delete mode 100644 arch/avr32/include/asm/sysreg.h delete mode 100644 arch/avr32/include/asm/termios.h delete mode 100644 arch/avr32/include/asm/thread_info.h delete mode 100644 arch/avr32/include/asm/timex.h delete mode 100644 arch/avr32/include/asm/tlb.h delete mode 100644 arch/avr32/include/asm/tlbflush.h delete mode 100644 arch/avr32/include/asm/traps.h delete mode 100644 arch/avr32/include/asm/types.h delete mode 100644 arch/avr32/include/asm/uaccess.h delete mode 100644 arch/avr32/include/asm/ucontext.h delete mode 100644 arch/avr32/include/asm/unaligned.h delete mode 100644 arch/avr32/include/asm/unistd.h delete mode 100644 arch/avr32/include/asm/user.h delete mode 100644 arch/avr32/include/uapi/asm/Kbuild delete mode 100644 arch/avr32/include/uapi/asm/auxvec.h delete mode 100644 arch/avr32/include/uapi/asm/byteorder.h delete mode 100644 arch/avr32/include/uapi/asm/cachectl.h delete mode 100644 arch/avr32/include/uapi/asm/msgbuf.h delete mode 100644 arch/avr32/include/uapi/asm/posix_types.h delete mode 100644 arch/avr32/include/uapi/asm/ptrace.h delete mode 100644 arch/avr32/include/uapi/asm/sembuf.h delete mode 100644 arch/avr32/include/uapi/asm/setup.h delete mode 100644 arch/avr32/include/uapi/asm/shmbuf.h delete mode 100644 arch/avr32/include/uapi/asm/sigcontext.h delete mode 100644 arch/avr32/include/uapi/asm/signal.h delete mode 100644 arch/avr32/include/uapi/asm/socket.h delete mode 100644 arch/avr32/include/uapi/asm/sockios.h delete mode 100644 arch/avr32/include/uapi/asm/stat.h delete mode 100644 arch/avr32/include/uapi/asm/swab.h delete mode 100644 arch/avr32/include/uapi/asm/termbits.h delete mode 100644 arch/avr32/include/uapi/asm/termios.h delete mode 100644 arch/avr32/include/uapi/asm/types.h delete mode 100644 arch/avr32/include/uapi/asm/unistd.h delete mode 100644 arch/avr32/kernel/.gitignore delete mode 100644 arch/avr32/kernel/Makefile delete mode 100644 arch/avr32/kernel/asm-offsets.c delete mode 100644 arch/avr32/kernel/avr32_ksyms.c delete mode 100644 arch/avr32/kernel/cpu.c delete mode 100644 arch/avr32/kernel/entry-avr32b.S delete mode 100644 arch/avr32/kernel/head.S delete mode 100644 arch/avr32/kernel/irq.c delete mode 100644 arch/avr32/kernel/kprobes.c delete mode 100644 arch/avr32/kernel/module.c delete mode 100644 arch/avr32/kernel/nmi_debug.c delete mode 100644 arch/avr32/kernel/ocd.c delete mode 100644 arch/avr32/kernel/process.c delete mode 100644 arch/avr32/kernel/ptrace.c delete mode 100644 arch/avr32/kernel/setup.c delete mode 100644 arch/avr32/kernel/signal.c delete mode 100644 arch/avr32/kernel/stacktrace.c delete mode 100644 arch/avr32/kernel/switch_to.S delete mode 100644 arch/avr32/kernel/syscall-stubs.S delete mode 100644 arch/avr32/kernel/syscall_table.S delete mode 100644 arch/avr32/kernel/time.c delete mode 100644 arch/avr32/kernel/traps.c delete mode 100644 arch/avr32/kernel/vmlinux.lds.S delete mode 100644 arch/avr32/lib/Makefile delete mode 100644 arch/avr32/lib/__avr32_asr64.S delete mode 100644 arch/avr32/lib/__avr32_lsl64.S delete mode 100644 arch/avr32/lib/__avr32_lsr64.S delete mode 100644 arch/avr32/lib/clear_user.S delete mode 100644 arch/avr32/lib/copy_user.S delete mode 100644 arch/avr32/lib/csum_partial.S delete mode 100644 arch/avr32/lib/csum_partial_copy_generic.S delete mode 100644 arch/avr32/lib/delay.c delete mode 100644 arch/avr32/lib/findbit.S delete mode 100644 arch/avr32/lib/io-readsb.S delete mode 100644 arch/avr32/lib/io-readsl.S delete mode 100644 arch/avr32/lib/io-readsw.S delete mode 100644 arch/avr32/lib/io-writesb.S delete mode 100644 arch/avr32/lib/io-writesl.S delete mode 100644 arch/avr32/lib/io-writesw.S delete mode 100644 arch/avr32/lib/memcpy.S delete mode 100644 arch/avr32/lib/memset.S delete mode 100644 arch/avr32/lib/strncpy_from_user.S delete mode 100644 arch/avr32/lib/strnlen_user.S delete mode 100644 arch/avr32/mach-at32ap/Kconfig delete mode 100644 arch/avr32/mach-at32ap/Makefile delete mode 100644 arch/avr32/mach-at32ap/at32ap700x.c delete mode 100644 arch/avr32/mach-at32ap/clock.c delete mode 100644 arch/avr32/mach-at32ap/clock.h delete mode 100644 arch/avr32/mach-at32ap/extint.c delete mode 100644 arch/avr32/mach-at32ap/hmatrix.c delete mode 100644 arch/avr32/mach-at32ap/hsmc.c delete mode 100644 arch/avr32/mach-at32ap/hsmc.h delete mode 100644 arch/avr32/mach-at32ap/include/mach/at32ap700x.h delete mode 100644 arch/avr32/mach-at32ap/include/mach/board.h delete mode 100644 arch/avr32/mach-at32ap/include/mach/chip.h delete mode 100644 arch/avr32/mach-at32ap/include/mach/cpu.h delete mode 100644 arch/avr32/mach-at32ap/include/mach/gpio.h delete mode 100644 arch/avr32/mach-at32ap/include/mach/hmatrix.h delete mode 100644 arch/avr32/mach-at32ap/include/mach/init.h delete mode 100644 arch/avr32/mach-at32ap/include/mach/io.h delete mode 100644 arch/avr32/mach-at32ap/include/mach/irq.h delete mode 100644 arch/avr32/mach-at32ap/include/mach/pm.h delete mode 100644 arch/avr32/mach-at32ap/include/mach/portmux.h delete mode 100644 arch/avr32/mach-at32ap/include/mach/smc.h delete mode 100644 arch/avr32/mach-at32ap/include/mach/sram.h delete mode 100644 arch/avr32/mach-at32ap/intc.c delete mode 100644 arch/avr32/mach-at32ap/intc.h delete mode 100644 arch/avr32/mach-at32ap/pdc.c delete mode 100644 arch/avr32/mach-at32ap/pio.c delete mode 100644 arch/avr32/mach-at32ap/pio.h delete mode 100644 arch/avr32/mach-at32ap/pm-at32ap700x.S delete mode 100644 arch/avr32/mach-at32ap/pm.c delete mode 100644 arch/avr32/mach-at32ap/pm.h delete mode 100644 arch/avr32/mach-at32ap/sdramc.h delete mode 100644 arch/avr32/mm/Makefile delete mode 100644 arch/avr32/mm/cache.c delete mode 100644 arch/avr32/mm/clear_page.S delete mode 100644 arch/avr32/mm/copy_page.S delete mode 100644 arch/avr32/mm/dma-coherent.c delete mode 100644 arch/avr32/mm/fault.c delete mode 100644 arch/avr32/mm/init.c delete mode 100644 arch/avr32/mm/ioremap.c delete mode 100644 arch/avr32/mm/tlb.c delete mode 100644 arch/avr32/oprofile/Makefile delete mode 100644 arch/avr32/oprofile/backtrace.c delete mode 100644 arch/avr32/oprofile/op_model_avr32.c (limited to 'include/uapi') diff --git a/CREDITS b/CREDITS index c5626bf06264..5d09c26d69cd 100644 --- a/CREDITS +++ b/CREDITS @@ -1034,6 +1034,10 @@ S: 2037 Walnut #6 S: Boulder, Colorado 80302 S: USA +N: Hans-Christian Noren Egtvedt +E: egtvedt@samfundet.no +D: AVR32 architecture maintainer. + N: Heiko Eißfeldt E: heiko@colossus.escape.de heiko@unifix.de D: verify_area stuff, generic SCSI fixes @@ -3398,6 +3402,10 @@ S: Suite 101 S: Markham, Ontario L3R 2Z6 S: Canada +N: Haavard Skinnemoen +M: Haavard Skinnemoen +D: AVR32 architecture port to Linux and maintainer. + N: Rick Sladkey E: jrs@world.std.com D: utility hacker: Emacs, NFS server, mount, kmem-ps, UPS debugger, strace, GDB diff --git a/MAINTAINERS b/MAINTAINERS index 38d3e4ed7208..91c5fdbe5674 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2327,21 +2327,6 @@ S: Maintained F: drivers/auxdisplay/ F: include/linux/cfag12864b.h -AVR32 ARCHITECTURE -M: Haavard Skinnemoen -M: Hans-Christian Egtvedt -W: http://www.atmel.com/products/AVR32/ -W: http://mirror.egtvedt.no/avr32linux.org/ -W: http://avrfreaks.net/ -S: Maintained -F: arch/avr32/ - -AVR32/AT32AP MACHINE SUPPORT -M: Haavard Skinnemoen -M: Hans-Christian Egtvedt -S: Maintained -F: arch/avr32/mach-at32ap/ - AX.25 NETWORK LAYER M: Ralf Baechle L: linux-hams@vger.kernel.org diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig deleted file mode 100644 index 7e75d45e20cd..000000000000 --- a/arch/avr32/Kconfig +++ /dev/null @@ -1,288 +0,0 @@ -config AVR32 - def_bool y - # With EXPERT=n, we get lots of stuff automatically selected - # that we usually don't need on AVR32. - select EXPERT - select HAVE_CLK - select HAVE_EXIT_THREAD - select HAVE_OPROFILE - select HAVE_KPROBES - select VIRT_TO_BUS - select GENERIC_IRQ_PROBE - select GENERIC_ATOMIC64 - select HARDIRQS_SW_RESEND - select GENERIC_IRQ_SHOW - select ARCH_HAVE_CUSTOM_GPIO_H - select ARCH_WANT_IPC_PARSE_VERSION - select ARCH_HAVE_NMI_SAFE_CMPXCHG - select GENERIC_CLOCKEVENTS - select HAVE_MOD_ARCH_SPECIFIC - select MODULES_USE_ELF_RELA - select HAVE_NMI - help - AVR32 is a high-performance 32-bit RISC microprocessor core, - designed for cost-sensitive embedded applications, with particular - emphasis on low power consumption and high code density. - - There is an AVR32 Linux project with a web page at - http://avr32linux.org/. - -config STACKTRACE_SUPPORT - def_bool y - -config LOCKDEP_SUPPORT - def_bool y - -config TRACE_IRQFLAGS_SUPPORT - def_bool y - -config RWSEM_GENERIC_SPINLOCK - def_bool y - -config RWSEM_XCHGADD_ALGORITHM - def_bool n - -config ARCH_HAS_ILOG2_U32 - def_bool n - -config ARCH_HAS_ILOG2_U64 - def_bool n - -config GENERIC_HWEIGHT - def_bool y - -config GENERIC_CALIBRATE_DELAY - def_bool y - -config GENERIC_BUG - def_bool y - depends on BUG - -source "init/Kconfig" - -source "kernel/Kconfig.freezer" - -menu "System Type and features" - -config SUBARCH_AVR32B - bool -config MMU - bool -config PERFORMANCE_COUNTERS - bool - -config PLATFORM_AT32AP - bool - select SUBARCH_AVR32B - select MMU - select PERFORMANCE_COUNTERS - select GPIOLIB - select GENERIC_ALLOCATOR - select HAVE_FB_ATMEL - -# -# CPU types -# - -# AP7000 derivatives -config CPU_AT32AP700X - bool - select PLATFORM_AT32AP -config CPU_AT32AP7000 - bool - select CPU_AT32AP700X -config CPU_AT32AP7001 - bool - select CPU_AT32AP700X -config CPU_AT32AP7002 - bool - select CPU_AT32AP700X - -# AP700X boards -config BOARD_ATNGW100_COMMON - bool - select CPU_AT32AP7000 - -choice - prompt "AVR32 board type" - default BOARD_ATSTK1000 - -config BOARD_ATSTK1000 - bool "ATSTK1000 evaluation board" - -config BOARD_ATNGW100_MKI - bool "ATNGW100 Network Gateway" - select BOARD_ATNGW100_COMMON - -config BOARD_ATNGW100_MKII - bool "ATNGW100 mkII Network Gateway" - select BOARD_ATNGW100_COMMON - -config BOARD_HAMMERHEAD - bool "Hammerhead board" - select CPU_AT32AP7000 - select USB_ARCH_HAS_HCD - help - The Hammerhead platform is built around an AVR32 32-bit microcontroller from Atmel. - It offers versatile peripherals, such as ethernet, usb device, usb host etc. - - The board also incorporates a power supply and is a Power over Ethernet (PoE) Powered - Device (PD). - - Additionally, a Cyclone III FPGA from Altera is integrated on the board. The FPGA is - mapped into the 32-bit AVR memory bus. The FPGA offers two DDR2 SDRAM interfaces, which - will cover even the most exceptional need of memory bandwidth. Together with the onboard - video decoder the board is ready for video processing. - - For more information see: http://www.miromico.ch/index.php/hammerhead.html - -config BOARD_FAVR_32 - bool "Favr-32 LCD-board" - select CPU_AT32AP7000 - -config BOARD_MERISC - bool "Merisc board" - select CPU_AT32AP7000 - help - Merisc is the family name for a range of AVR32-based boards. - - The boards are designed to be used in a man-machine - interfacing environment, utilizing a touch-based graphical - user interface. They host a vast range of I/O peripherals as - well as a large SDRAM & Flash memory bank. - - For more information see: http://www.martinsson.se/merisc - -config BOARD_MIMC200 - bool "MIMC200 CPU board" - select CPU_AT32AP7000 -endchoice - -source "arch/avr32/boards/atstk1000/Kconfig" -source "arch/avr32/boards/atngw100/Kconfig" -source "arch/avr32/boards/hammerhead/Kconfig" -source "arch/avr32/boards/favr-32/Kconfig" -source "arch/avr32/boards/merisc/Kconfig" - -choice - prompt "Boot loader type" - default LOADER_U_BOOT - -config LOADER_U_BOOT - bool "U-Boot (or similar) bootloader" -endchoice - -source "arch/avr32/mach-at32ap/Kconfig" - -config LOAD_ADDRESS - hex - default 0x10000000 if LOADER_U_BOOT=y && CPU_AT32AP700X=y - -config ENTRY_ADDRESS - hex - default 0x90000000 if LOADER_U_BOOT=y && CPU_AT32AP700X=y - -config PHYS_OFFSET - hex - default 0x10000000 if CPU_AT32AP700X=y - -source "kernel/Kconfig.preempt" - -config QUICKLIST - def_bool y - -config ARCH_HAVE_MEMORY_PRESENT - def_bool n - -config NEED_NODE_MEMMAP_SIZE - def_bool n - -config ARCH_FLATMEM_ENABLE - def_bool y - -config ARCH_DISCONTIGMEM_ENABLE - def_bool n - -config ARCH_SPARSEMEM_ENABLE - def_bool n - -config NODES_SHIFT - int - default "2" - depends on NEED_MULTIPLE_NODES - -source "mm/Kconfig" - -config OWNERSHIP_TRACE - bool "Ownership trace support" - default y - help - Say Y to generate an Ownership Trace message on every context switch, - enabling Nexus-compliant debuggers to keep track of the PID of the - currently executing task. - -config NMI_DEBUGGING - bool "NMI Debugging" - default n - help - Say Y here and pass the nmi_debug command-line parameter to - the kernel to turn on NMI debugging. Depending on the value - of the nmi_debug option, various pieces of information will - be dumped to the console when a Non-Maskable Interrupt - happens. - -# FPU emulation goes here - -source "kernel/Kconfig.hz" - -config CMDLINE - string "Default kernel command line" - default "" - help - If you don't have a boot loader capable of passing a command line string - to the kernel, you may specify one here. As a minimum, you should specify - the memory size and the root device (e.g., mem=8M, root=/dev/nfs). - -endmenu - -menu "Power management options" - -source "kernel/power/Kconfig" - -config ARCH_SUSPEND_POSSIBLE - def_bool y - -menu "CPU Frequency scaling" -source "drivers/cpufreq/Kconfig" -endmenu - -endmenu - -menu "Bus options" - -config PCI - bool - -source "drivers/pci/Kconfig" - -source "drivers/pcmcia/Kconfig" - -endmenu - -menu "Executable file formats" -source "fs/Kconfig.binfmt" -endmenu - -source "net/Kconfig" - -source "drivers/Kconfig" - -source "fs/Kconfig" - -source "arch/avr32/Kconfig.debug" - -source "security/Kconfig" - -source "crypto/Kconfig" - -source "lib/Kconfig" diff --git a/arch/avr32/Kconfig.debug b/arch/avr32/Kconfig.debug deleted file mode 100644 index 2283933a9a93..000000000000 --- a/arch/avr32/Kconfig.debug +++ /dev/null @@ -1,9 +0,0 @@ -menu "Kernel hacking" - -config TRACE_IRQFLAGS_SUPPORT - bool - default y - -source "lib/Kconfig.debug" - -endmenu diff --git a/arch/avr32/Makefile b/arch/avr32/Makefile deleted file mode 100644 index dba48a5d5bb9..000000000000 --- a/arch/avr32/Makefile +++ /dev/null @@ -1,84 +0,0 @@ -# -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# -# Copyright (C) 2004-2006 Atmel Corporation. - -# Default target when executing plain make -.PHONY: all -all: uImage vmlinux.elf - -KBUILD_DEFCONFIG := atstk1002_defconfig - -KBUILD_CFLAGS += -pipe -fno-builtin -mno-pic -D__linux__ -KBUILD_AFLAGS += -mrelax -mno-pic -KBUILD_CFLAGS_MODULE += -mno-relax -LDFLAGS_vmlinux += --relax - -cpuflags-$(CONFIG_PLATFORM_AT32AP) += -march=ap - -KBUILD_CFLAGS += $(cpuflags-y) -KBUILD_AFLAGS += $(cpuflags-y) - -CHECKFLAGS += -D__avr32__ -D__BIG_ENDIAN - -machine-$(CONFIG_PLATFORM_AT32AP) := at32ap -machdirs := $(patsubst %,arch/avr32/mach-%/, $(machine-y)) - -KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(machdirs)) - -head-$(CONFIG_LOADER_U_BOOT) += arch/avr32/boot/u-boot/head.o -head-y += arch/avr32/kernel/head.o -core-y += $(machdirs) -core-$(CONFIG_BOARD_ATSTK1000) += arch/avr32/boards/atstk1000/ -core-$(CONFIG_BOARD_ATNGW100_COMMON) += arch/avr32/boards/atngw100/ -core-$(CONFIG_BOARD_HAMMERHEAD) += arch/avr32/boards/hammerhead/ -core-$(CONFIG_BOARD_FAVR_32) += arch/avr32/boards/favr-32/ -core-$(CONFIG_BOARD_MERISC) += arch/avr32/boards/merisc/ -core-$(CONFIG_BOARD_MIMC200) += arch/avr32/boards/mimc200/ -core-$(CONFIG_LOADER_U_BOOT) += arch/avr32/boot/u-boot/ -core-y += arch/avr32/kernel/ -core-y += arch/avr32/mm/ -drivers-$(CONFIG_OPROFILE) += arch/avr32/oprofile/ -libs-y += arch/avr32/lib/ - -BOOT_TARGETS := vmlinux.elf vmlinux.bin uImage uImage.srec - -.PHONY: $(BOOT_TARGETS) install - -boot := arch/$(ARCH)/boot/images - - KBUILD_IMAGE := $(boot)/uImage -vmlinux.elf: KBUILD_IMAGE := $(boot)/vmlinux.elf -vmlinux.cso: KBUILD_IMAGE := $(boot)/vmlinux.cso -uImage.srec: KBUILD_IMAGE := $(boot)/uImage.srec -uImage: KBUILD_IMAGE := $(boot)/uImage - -quiet_cmd_listing = LST $@ - cmd_listing = avr32-linux-objdump $(OBJDUMPFLAGS) -lS $< > $@ -quiet_cmd_disasm = DIS $@ - cmd_disasm = avr32-linux-objdump $(OBJDUMPFLAGS) -d $< > $@ - -vmlinux.elf vmlinux.bin uImage.srec uImage vmlinux.cso: vmlinux - $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ - -install: vmlinux - $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@ - -vmlinux.s: vmlinux - $(call if_changed,disasm) - -vmlinux.lst: vmlinux - $(call if_changed,listing) - -CLEAN_FILES += vmlinux.s vmlinux.lst - -archclean: - $(Q)$(MAKE) $(clean)=$(boot) - -define archhelp - @echo '* vmlinux.elf - ELF image with load address 0' - @echo ' vmlinux.cso - PathFinder CSO image' - @echo '* uImage - Create a bootable image for U-Boot' -endef diff --git a/arch/avr32/boards/atngw100/Kconfig b/arch/avr32/boards/atngw100/Kconfig deleted file mode 100644 index 4e55617ade2d..000000000000 --- a/arch/avr32/boards/atngw100/Kconfig +++ /dev/null @@ -1,65 +0,0 @@ -# NGW100 customization - -if BOARD_ATNGW100_COMMON - -config BOARD_ATNGW100_MKII_LCD - bool "Enable ATNGW100 mkII LCD interface" - depends on BOARD_ATNGW100_MKII - help - This enables the LCD controller (LCDC) in the AT32AP7000. Since the - LCDC is multiplexed with MACB1 (LAN) Ethernet port, only one can be - enabled at a time. - - This choice enables the LCDC and disables the MACB1 interface marked - LAN on the PCB. - -choice - prompt "Select an NGW100 add-on board to support" - default BOARD_ATNGW100_ADDON_NONE - -config BOARD_ATNGW100_ADDON_NONE - bool "None" - -config BOARD_ATNGW100_EVKLCD10X - bool "EVKLCD10X addon board" - depends on BOARD_ATNGW100_MKI || BOARD_ATNGW100_MKII_LCD - help - This enables support for the EVKLCD100 (QVGA) or EVKLCD101 (VGA) - addon board for the NGW100 and NGW100 mkII. By enabling this the LCD - controller and AC97 controller is added as platform devices. - -config BOARD_ATNGW100_MRMT - bool "Mediama RMT1/2 add-on board" - help - This enables support for the Mediama RMT1 or RMT2 board. - RMT provides LCD support, AC97 codec and other - optional peripherals to the Atmel NGW100. - - This choice disables the detect pin and the write-protect pin for the - MCI platform device, since it conflicts with the LCD platform device. - The MCI pins can be reenabled by editing the "add device function" but - this may break the setup for other displays that use these pins. - -endchoice - -choice - prompt "LCD panel resolution on EVKLCD10X" - depends on BOARD_ATNGW100_EVKLCD10X - default BOARD_ATNGW100_EVKLCD10X_VGA - -config BOARD_ATNGW100_EVKLCD10X_QVGA - bool "QVGA (320x240)" - -config BOARD_ATNGW100_EVKLCD10X_VGA - bool "VGA (640x480)" - -config BOARD_ATNGW100_EVKLCD10X_POW_QVGA - bool "Powertip QVGA (320x240)" - -endchoice - -if BOARD_ATNGW100_MRMT -source "arch/avr32/boards/atngw100/Kconfig_mrmt" -endif - -endif # BOARD_ATNGW100_COMMON diff --git a/arch/avr32/boards/atngw100/Kconfig_mrmt b/arch/avr32/boards/atngw100/Kconfig_mrmt deleted file mode 100644 index 9a199a207f3c..000000000000 --- a/arch/avr32/boards/atngw100/Kconfig_mrmt +++ /dev/null @@ -1,80 +0,0 @@ -# RMT for NGW100 customization - -choice - prompt "RMT Version" - help - Select the RMTx board version. - -config BOARD_MRMT_REV1 - bool "RMT1" -config BOARD_MRMT_REV2 - bool "RMT2" - -endchoice - -config BOARD_MRMT_AC97 - bool "Enable AC97 CODEC" - help - Enable the UCB1400 AC97 CODEC driver. - -choice - prompt "Touchscreen Driver" - default BOARD_MRMT_ADS7846_TS - -config BOARD_MRMT_UCB1400_TS - bool "Use UCB1400 Touchscreen" - -config BOARD_MRMT_ADS7846_TS - bool "Use ADS7846 Touchscreen" - -endchoice - -choice - prompt "RMTx LCD Selection" - default BOARD_MRMT_LCD_DISABLE - -config BOARD_MRMT_LCD_DISABLE - bool "LCD Disabled" - -config BOARD_MRMT_LCD_LQ043T3DX0X - bool "Sharp LQ043T3DX0x or compatible" - help - If using RMT2, be sure to load the resistor pack selectors accordingly - -if BOARD_MRMT_REV2 -config BOARD_MRMT_LCD_KWH043GM08 - bool "Formike KWH043GM08 or compatible" - help - Be sure to load the RMT2 resistor pack selectors accordingly -endif - -endchoice - -if !BOARD_MRMT_LCD_DISABLE -config BOARD_MRMT_BL_PWM - bool "Use PWM control for LCD Backlight" - help - Use PWM driver for controlling LCD Backlight. - Otherwise, LCD Backlight is always on. -endif - -config BOARD_MRMT_RTC_I2C - bool "Use External RTC on I2C Bus" - help - RMT1 has an optional RTC device on the I2C bus. - It is a SII S35390A. Be sure to select the - matching RTC driver. - -choice - prompt "Wireless Module on ttyS2" - default BOARD_MRMT_WIRELESS_ZB - -config BOARD_MRMT_WIRELESS_ZB - bool "Use ZigBee/802.15.4 Module" - -config BOARD_MRMT_WIRELESS_BT - bool "Use Bluetooth (HCI) Module" - -config BOARD_MRMT_WIRELESS_NONE - bool "Not Installed" -endchoice diff --git a/arch/avr32/boards/atngw100/Makefile b/arch/avr32/boards/atngw100/Makefile deleted file mode 100644 index f4ebe42a8254..000000000000 --- a/arch/avr32/boards/atngw100/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -obj-y += setup.o flash.o -obj-$(CONFIG_BOARD_ATNGW100_EVKLCD10X) += evklcd10x.o -obj-$(CONFIG_BOARD_ATNGW100_MRMT) += mrmt.o diff --git a/arch/avr32/boards/atngw100/evklcd10x.c b/arch/avr32/boards/atngw100/evklcd10x.c deleted file mode 100644 index 64919b0da7aa..000000000000 --- a/arch/avr32/boards/atngw100/evklcd10x.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Board-specific setup code for the ATEVKLCD10X addon board to the ATNGW100 - * Network Gateway - * - * Copyright (C) 2008 Atmel Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - */ - -#include -#include -#include -#include -#include - -#include