From 8111477663813caa1a4469cfe6afaae36cd04513 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 29 Jun 2017 13:59:24 +0100 Subject: dma-buf/dma-fence: Extract __dma_fence_is_later() Often we have the task of comparing two seqno known to be on the same context, so provide a common __dma_fence_is_later(). Signed-off-by: Chris Wilson Cc: Sumit Semwal Cc: Sean Paul Cc: Gustavo Padovan Reviewed-by: Sean Paul Signed-off-by: Gustavo Padovan Link: http://patchwork.freedesktop.org/patch/msgid/20170629125930.821-1-chris@chris-wilson.co.uk --- include/linux/dma-fence.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index a5195a7d6f77..ac5987989e9a 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -335,6 +335,19 @@ dma_fence_is_signaled(struct dma_fence *fence) return false; } +/** + * __dma_fence_is_later - return if f1 is chronologically later than f2 + * @f1: [in] the first fence's seqno + * @f2: [in] the second fence's seqno from the same context + * + * Returns true if f1 is chronologically later than f2. Both fences must be + * from the same context, since a seqno is not common across contexts. + */ +static inline bool __dma_fence_is_later(u32 f1, u32 f2) +{ + return (int)(f1 - f2) > 0; +} + /** * dma_fence_is_later - return if f1 is chronologically later than f2 * @f1: [in] the first fence from the same context @@ -349,7 +362,7 @@ static inline bool dma_fence_is_later(struct dma_fence *f1, if (WARN_ON(f1->context != f2->context)) return false; - return (int)(f1->seqno - f2->seqno) > 0; + return __dma_fence_is_later(f1->seqno, f2->seqno); } /** -- cgit v1.2.3-71-gd317 From bded747bb432bc5f7ad6d84ea747368b70ed9df2 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 30 May 2017 09:42:53 +0300 Subject: net/mlx5: Add raw ethernet local loopback firmware command Add support for raw ethernet local loopback firmware command. Signed-off-by: Huy Nguyen Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 62 +++++++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 11 +++-- include/linux/mlx5/vport.h | 3 +- 3 files changed, 72 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 5abfec1c3399..d653b0025b13 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -897,6 +897,68 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, } EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc); +enum { + UC_LOCAL_LB, + MC_LOCAL_LB +}; + +int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + mlx5_core_dbg(mdev, "%s local_lb\n", enable ? "enable" : "disable"); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.disable_mc_local_lb, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.disable_mc_local_lb, !enable); + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.disable_uc_local_lb, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.disable_uc_local_lb, !enable); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_update_local_lb); + +int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status) +{ + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u32 *out; + int value; + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, 0, out, outlen); + if (err) + goto out; + + value = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.disable_mc_local_lb) << MC_LOCAL_LB; + + value |= MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.disable_uc_local_lb) << UC_LOCAL_LB; + + *status = !value; + +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_query_local_lb); + enum mlx5_vport_roce_state { MLX5_VPORT_ROCE_DISABLED = 0, MLX5_VPORT_ROCE_ENABLED = 1, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 87869c04849a..57c75e8b3c19 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1016,7 +1016,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_wq_sz[0x5]; u8 nic_vport_change_event[0x1]; - u8 reserved_at_3e1[0xa]; + u8 disable_local_lb[0x1]; + u8 reserved_at_3e2[0x9]; u8 log_max_vlan_list[0x5]; u8 reserved_at_3f0[0x3]; u8 log_max_current_mc_list[0x5]; @@ -2562,7 +2563,9 @@ struct mlx5_ifc_rmpc_bits { struct mlx5_ifc_nic_vport_context_bits { u8 reserved_at_0[0x5]; u8 min_wqe_inline_mode[0x3]; - u8 reserved_at_8[0x17]; + u8 reserved_at_8[0x15]; + u8 disable_mc_local_lb[0x1]; + u8 disable_uc_local_lb[0x1]; u8 roce_en[0x1]; u8 arm_change_event[0x1]; @@ -5229,7 +5232,9 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits { }; struct mlx5_ifc_modify_nic_vport_field_select_bits { - u8 reserved_at_0[0x16]; + u8 reserved_at_0[0x14]; + u8 disable_uc_local_lb[0x1]; + u8 disable_mc_local_lb[0x1]; u8 node_guid[0x1]; u8 port_guid[0x1]; u8 min_inline[0x1]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 656c70b65dd2..aaa0bb9e7655 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -114,5 +114,6 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, int vf, struct mlx5_hca_vport_context *req); - +int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable); +int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status); #endif /* __MLX5_VPORT_H__ */ -- cgit v1.2.3-71-gd317 From 4a2da0b8c0782816f3ae6846ae7942fcbb0f8172 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 30 May 2017 10:05:15 +0300 Subject: IB/mlx5: Add debug control parameters for congestion control This patch adds debug control parameters for congestion control which can be read or written through debugfs. They are for reaction point and notification point nodes. These control parameters are as below: +------------------------------+-----------------------------------------+ | Name | Description | |------------------------------+-----------------------------------------| |rp_clamp_tgt_rate | When set target rate is updated to | | | current rate | |------------------------------+-----------------------------------------| |rp_clamp_tgt_rate_ati | When set update target rate based on | | | timer as well | |------------------------------+-----------------------------------------| |rp_time_reset | time between rate increase if no | | | CNP is received unit in usec | |------------------------------+-----------------------------------------| |rp_byte_reset | Number of bytes between rate inease if | | | no CNP is received | |------------------------------+-----------------------------------------| |rp_threshold | Threshold for reaction point rate | | | control | |------------------------------+-----------------------------------------| |rp_ai_rate | Rate for target rate, unit in Mbps | |------------------------------+-----------------------------------------| |rp_hai_rate | Rate for hyper increase state | | | unit in Mbps | |------------------------------+-----------------------------------------| |rp_min_dec_fac | Minimum factor by which the current | | | transmit rate can be changed when | | | processing a CNP, unit is percerntage | |------------------------------+-----------------------------------------| |rp_min_rate | Minimum value for rate limit, | | | unit in Mbps | |------------------------------+-----------------------------------------| |rp_rate_to_set_on_first_cnp | Rate that is set when first CNP is | | | received, unit is Mbps | |------------------------------+-----------------------------------------| |rp_dce_tcp_g | Used to calculate alpha | |------------------------------+-----------------------------------------| |rp_dce_tcp_rtt | Time between updates of alpha value, | | | unit is usec | |------------------------------+-----------------------------------------| |rp_rate_reduce_monitor_period | Minimum time between consecutive rate | | | reductions | |------------------------------+-----------------------------------------| |rp_initial_alpha_value | Initial value of alpha | |------------------------------+-----------------------------------------| |rp_gd | When CNP is received, flow rate is | | | reduced based on gd, rp_gd is given as | | | log2(rp_gd) | |------------------------------+-----------------------------------------| |np_cnp_dscp | dscp code point for generated cnp | |------------------------------+-----------------------------------------| |np_cnp_prio_mode | 802.1p priority for generated cnp | |------------------------------+-----------------------------------------| |np_cnp_prio | cnp priority mode | +------------------------------+-----------------------------------------+ Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/Makefile | 2 +- drivers/infiniband/hw/mlx5/cmd.c | 20 ++ drivers/infiniband/hw/mlx5/cmd.h | 4 + drivers/infiniband/hw/mlx5/cong.c | 421 +++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/main.c | 9 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 37 +++ include/linux/mlx5/mlx5_ifc.h | 3 +- 7 files changed, 493 insertions(+), 3 deletions(-) create mode 100644 drivers/infiniband/hw/mlx5/cong.c (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 90ad2adc752f..bc6299697dda 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o -mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 18d5e1db93ed..470995fa38d2 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -57,3 +57,23 @@ int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, MLX5_SET(query_cong_statistics_in, in, clear, reset); return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } + +int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, + void *out, int out_size) +{ + u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = { }; + + MLX5_SET(query_cong_params_in, in, opcode, + MLX5_CMD_OP_QUERY_CONG_PARAMS); + MLX5_SET(query_cong_params_in, in, cong_protocol, cong_point); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); +} + +int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev, + void *in, int in_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_cong_params_out)] = { }; + + return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out)); +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index fa09228193a6..af4c24596274 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -39,4 +39,8 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, bool reset, void *out, int out_size); +int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, + void *out, int out_size); +int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, + void *in, int in_size); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c new file mode 100644 index 000000000000..2d32b519bb61 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -0,0 +1,421 @@ +/* + * Copyright (c) 2013-2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "mlx5_ib.h" +#include "cmd.h" + +enum mlx5_ib_cong_node_type { + MLX5_IB_RROCE_ECN_RP = 1, + MLX5_IB_RROCE_ECN_NP = 2, +}; + +static const char * const mlx5_ib_dbg_cc_name[] = { + "rp_clamp_tgt_rate", + "rp_clamp_tgt_rate_ati", + "rp_time_reset", + "rp_byte_reset", + "rp_threshold", + "rp_ai_rate", + "rp_hai_rate", + "rp_min_dec_fac", + "rp_min_rate", + "rp_rate_to_set_on_first_cnp", + "rp_dce_tcp_g", + "rp_dce_tcp_rtt", + "rp_rate_reduce_monitor_period", + "rp_initial_alpha_value", + "rp_gd", + "np_cnp_dscp", + "np_cnp_prio_mode", + "np_cnp_prio", +}; + +#define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1) +#define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR BIT(2) +#define MLX5_IB_RP_TIME_RESET_ATTR BIT(3) +#define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4) +#define MLX5_IB_RP_THRESHOLD_ATTR BIT(5) +#define MLX5_IB_RP_AI_RATE_ATTR BIT(7) +#define MLX5_IB_RP_HAI_RATE_ATTR BIT(8) +#define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9) +#define MLX5_IB_RP_MIN_RATE_ATTR BIT(10) +#define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR BIT(11) +#define MLX5_IB_RP_DCE_TCP_G_ATTR BIT(12) +#define MLX5_IB_RP_DCE_TCP_RTT_ATTR BIT(13) +#define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR BIT(14) +#define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15) +#define MLX5_IB_RP_GD_ATTR BIT(16) + +#define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3) +#define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4) + +static enum mlx5_ib_cong_node_type +mlx5_ib_param_to_node(enum mlx5_ib_dbg_cc_types param_offset) +{ + if (param_offset >= MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE && + param_offset <= MLX5_IB_DBG_CC_RP_GD) + return MLX5_IB_RROCE_ECN_RP; + else + return MLX5_IB_RROCE_ECN_NP; +} + +static u32 mlx5_get_cc_param_val(void *field, int offset) +{ + switch (offset) { + case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + clamp_tgt_rate); + case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + clamp_tgt_rate_after_time_inc); + case MLX5_IB_DBG_CC_RP_TIME_RESET: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_time_reset); + case MLX5_IB_DBG_CC_RP_BYTE_RESET: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_byte_reset); + case MLX5_IB_DBG_CC_RP_THRESHOLD: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_threshold); + case MLX5_IB_DBG_CC_RP_AI_RATE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_ai_rate); + case MLX5_IB_DBG_CC_RP_HAI_RATE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_hai_rate); + case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_min_dec_fac); + case MLX5_IB_DBG_CC_RP_MIN_RATE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_min_rate); + case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rate_to_set_on_first_cnp); + case MLX5_IB_DBG_CC_RP_DCE_TCP_G: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + dce_tcp_g); + case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + dce_tcp_rtt); + case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rate_reduce_monitor_period); + case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + initial_alpha_value); + case MLX5_IB_DBG_CC_RP_GD: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_gd); + case MLX5_IB_DBG_CC_NP_CNP_DSCP: + return MLX5_GET(cong_control_r_roce_ecn_np, field, + cnp_dscp); + case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE: + return MLX5_GET(cong_control_r_roce_ecn_np, field, + cnp_prio_mode); + case MLX5_IB_DBG_CC_NP_CNP_PRIO: + return MLX5_GET(cong_control_r_roce_ecn_np, field, + cnp_802p_prio); + default: + return 0; + } +} + +static void mlx5_ib_set_cc_param_mask_val(void *field, int offset, + u32 var, u32 *attr_mask) +{ + switch (offset) { + case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE: + *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + clamp_tgt_rate, var); + break; + case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI: + *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + clamp_tgt_rate_after_time_inc, var); + break; + case MLX5_IB_DBG_CC_RP_TIME_RESET: + *attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_time_reset, var); + break; + case MLX5_IB_DBG_CC_RP_BYTE_RESET: + *attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_byte_reset, var); + break; + case MLX5_IB_DBG_CC_RP_THRESHOLD: + *attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_threshold, var); + break; + case MLX5_IB_DBG_CC_RP_AI_RATE: + *attr_mask |= MLX5_IB_RP_AI_RATE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_ai_rate, var); + break; + case MLX5_IB_DBG_CC_RP_HAI_RATE: + *attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_hai_rate, var); + break; + case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC: + *attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_min_dec_fac, var); + break; + case MLX5_IB_DBG_CC_RP_MIN_RATE: + *attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_min_rate, var); + break; + case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP: + *attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rate_to_set_on_first_cnp, var); + break; + case MLX5_IB_DBG_CC_RP_DCE_TCP_G: + *attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + dce_tcp_g, var); + break; + case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT: + *attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + dce_tcp_rtt, var); + break; + case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD: + *attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rate_reduce_monitor_period, var); + break; + case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE: + *attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + initial_alpha_value, var); + break; + case MLX5_IB_DBG_CC_RP_GD: + *attr_mask |= MLX5_IB_RP_GD_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_gd, var); + break; + case MLX5_IB_DBG_CC_NP_CNP_DSCP: + *attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR; + MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_dscp, var); + break; + case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE: + *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var); + break; + case MLX5_IB_DBG_CC_NP_CNP_PRIO: + *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0); + MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var); + break; + } +} + +static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, int offset, u32 *var) +{ + int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out); + void *out; + void *field; + int err; + enum mlx5_ib_cong_node_type node; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + node = mlx5_ib_param_to_node(offset); + + err = mlx5_cmd_query_cong_params(dev->mdev, node, out, outlen); + if (err) + goto free; + + field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters); + *var = mlx5_get_cc_param_val(field, offset); + +free: + kvfree(out); + return err; +} + +static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, int offset, u32 var) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in); + void *in; + void *field; + enum mlx5_ib_cong_node_type node; + u32 attr_mask = 0; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_cong_params_in, in, opcode, + MLX5_CMD_OP_MODIFY_CONG_PARAMS); + + node = mlx5_ib_param_to_node(offset); + MLX5_SET(modify_cong_params_in, in, cong_protocol, node); + + field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters); + mlx5_ib_set_cc_param_mask_val(field, offset, var, &attr_mask); + + field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select); + MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp, + attr_mask); + + err = mlx5_cmd_modify_cong_params(dev->mdev, in, inlen); + kvfree(in); + return err; +} + +static ssize_t set_param(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_ib_dbg_param *param = filp->private_data; + int offset = param->offset; + char lbuf[11] = { }; + u32 var; + int ret; + + if (count > sizeof(lbuf)) + return -EINVAL; + + if (copy_from_user(lbuf, buf, count)) + return -EFAULT; + + lbuf[sizeof(lbuf) - 1] = '\0'; + + if (kstrtou32(lbuf, 0, &var)) + return -EINVAL; + + ret = mlx5_ib_set_cc_params(param->dev, offset, var); + return ret ? ret : count; +} + +static ssize_t get_param(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_ib_dbg_param *param = filp->private_data; + int offset = param->offset; + u32 var = 0; + int ret; + char lbuf[11]; + + if (*pos) + return 0; + + ret = mlx5_ib_get_cc_params(param->dev, offset, &var); + if (ret) + return ret; + + ret = snprintf(lbuf, sizeof(lbuf), "%d\n", var); + if (ret < 0) + return ret; + + if (copy_to_user(buf, lbuf, ret)) + return -EFAULT; + + *pos += ret; + return ret; +} + +static const struct file_operations dbg_cc_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = set_param, + .read = get_param, +}; + +void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev) +{ + if (!mlx5_debugfs_root || + !dev->dbg_cc_params || + !dev->dbg_cc_params->root) + return; + + debugfs_remove_recursive(dev->dbg_cc_params->root); + kfree(dev->dbg_cc_params); + dev->dbg_cc_params = NULL; +} + +int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev) +{ + struct mlx5_ib_dbg_cc_params *dbg_cc_params; + int i; + + if (!mlx5_debugfs_root) + goto out; + + if (!MLX5_CAP_GEN(dev->mdev, cc_query_allowed) || + !MLX5_CAP_GEN(dev->mdev, cc_modify_allowed)) + goto out; + + dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL); + if (!dbg_cc_params) + goto out; + + dev->dbg_cc_params = dbg_cc_params; + + dbg_cc_params->root = debugfs_create_dir("cc_params", + dev->mdev->priv.dbg_root); + if (!dbg_cc_params->root) + goto err; + + for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) { + dbg_cc_params->params[i].offset = i; + dbg_cc_params->params[i].dev = dev; + dbg_cc_params->params[i].dentry = + debugfs_create_file(mlx5_ib_dbg_cc_name[i], + 0600, dbg_cc_params->root, + &dbg_cc_params->params[i], + &dbg_cc_fops); + if (!dbg_cc_params->params[i].dentry) + goto err; + } +out: return 0; + +err: + mlx5_ib_warn(dev, "cong debugfs failure\n"); + mlx5_ib_cleanup_cong_debugfs(dev); + /* + * We don't want to fail driver if debugfs failed to initialize, + * so we are not forwarding error to the user. + */ + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index bfd9117da10c..a903728627f1 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3838,9 +3838,13 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) goto err_odp; } + err = mlx5_ib_init_cong_debugfs(dev); + if (err) + goto err_cnt; + dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev); if (!dev->mdev->priv.uar) - goto err_cnt; + goto err_cong; err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false); if (err) @@ -3889,6 +3893,8 @@ err_uar_page: mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); err_cnt: + mlx5_ib_cleanup_cong_debugfs(dev); +err_cong: if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) mlx5_ib_dealloc_counters(dev); @@ -3923,6 +3929,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); mlx5_free_bfreg(dev->mdev, &dev->bfreg); mlx5_put_uars_page(dev->mdev, mdev->priv.uar); + mlx5_ib_cleanup_cong_debugfs(dev); if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) mlx5_ib_dealloc_counters(dev); destroy_umrc_res(dev); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5fb4547bbcb8..f0682f383b52 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -619,6 +619,39 @@ struct mlx5_roce { enum ib_port_state last_port_state; }; +struct mlx5_ib_dbg_param { + int offset; + struct mlx5_ib_dev *dev; + struct dentry *dentry; +}; + +enum mlx5_ib_dbg_cc_types { + MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE, + MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI, + MLX5_IB_DBG_CC_RP_TIME_RESET, + MLX5_IB_DBG_CC_RP_BYTE_RESET, + MLX5_IB_DBG_CC_RP_THRESHOLD, + MLX5_IB_DBG_CC_RP_AI_RATE, + MLX5_IB_DBG_CC_RP_HAI_RATE, + MLX5_IB_DBG_CC_RP_MIN_DEC_FAC, + MLX5_IB_DBG_CC_RP_MIN_RATE, + MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP, + MLX5_IB_DBG_CC_RP_DCE_TCP_G, + MLX5_IB_DBG_CC_RP_DCE_TCP_RTT, + MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD, + MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE, + MLX5_IB_DBG_CC_RP_GD, + MLX5_IB_DBG_CC_NP_CNP_DSCP, + MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE, + MLX5_IB_DBG_CC_NP_CNP_PRIO, + MLX5_IB_DBG_CC_MAX, +}; + +struct mlx5_ib_dbg_cc_params { + struct dentry *root; + struct mlx5_ib_dbg_param params[MLX5_IB_DBG_CC_MAX]; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -655,6 +688,7 @@ struct mlx5_ib_dev { struct mlx5_ib_port *port; struct mlx5_sq_bfreg bfreg; struct mlx5_sq_bfreg fp_bfreg; + struct mlx5_ib_dbg_cc_params *dbg_cc_params; /* protect the user_td */ struct mutex lb_mutex; @@ -909,6 +943,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, int index, enum ib_gid_type *gid_type); +void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev); +int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev); + /* GSI QP helper functions */ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 57c75e8b3c19..3e697f672de5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1188,7 +1188,8 @@ struct mlx5_ifc_cong_control_r_roce_ecn_np_bits { u8 reserved_at_c0[0x12]; u8 cnp_dscp[0x6]; - u8 reserved_at_d8[0x5]; + u8 reserved_at_d8[0x4]; + u8 cnp_prio_mode[0x1]; u8 cnp_802p_prio[0x3]; u8 reserved_at_e0[0x720]; -- cgit v1.2.3-71-gd317 From 7ecf6d8ff154e6f7471ee537a400d43a5f3b1c57 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 30 May 2017 10:18:24 +0300 Subject: IB/mlx5: Restore IB guid/policy for virtual functions When a user sets port_guid, node_guid or policy of an IB virtual function, save this information in "struct mlx5_vf_context". This information will be restored later when pci_resume is called. To make sure this works, one can use aer-inject to generate PCI errors on mlx5 devices and verify if relevant fields are restored after PCI resume. Signed-off-by: Bodong Wang Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/ib_virt.c | 9 ++++++ drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 42 +++++++++++++++++++++++++ include/linux/mlx5/driver.h | 17 +++++----- 3 files changed, 61 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c index c1b9de800fe5..649a3364f838 100644 --- a/drivers/infiniband/hw/mlx5/ib_virt.c +++ b/drivers/infiniband/hw/mlx5/ib_virt.c @@ -96,6 +96,7 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, struct mlx5_ib_dev *dev = to_mdev(device); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_hca_vport_context *in; + struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx; int err; in = kzalloc(sizeof(*in), GFP_KERNEL); @@ -109,6 +110,8 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, } in->field_select = MLX5_HCA_VPORT_SEL_STATE_POLICY; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); + if (!err) + vfs_ctx[vf].policy = in->policy; out: kfree(in); @@ -151,6 +154,7 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid) struct mlx5_ib_dev *dev = to_mdev(device); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_hca_vport_context *in; + struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx; int err; in = kzalloc(sizeof(*in), GFP_KERNEL); @@ -160,6 +164,8 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid) in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID; in->node_guid = guid; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); + if (!err) + vfs_ctx[vf].node_guid = guid; kfree(in); return err; } @@ -169,6 +175,7 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid) struct mlx5_ib_dev *dev = to_mdev(device); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_hca_vport_context *in; + struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx; int err; in = kzalloc(sizeof(*in), GFP_KERNEL); @@ -178,6 +185,8 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid) in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID; in->port_guid = guid; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); + if (!err) + vfs_ctx[vf].port_guid = guid; kfree(in); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index bcdf7779c48d..090b29e05a6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -32,6 +32,7 @@ #include #include +#include #include "mlx5_core.h" #ifdef CONFIG_MLX5_CORE_EN #include "eswitch.h" @@ -44,6 +45,38 @@ bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) return !!sriov->num_vfs; } +static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + struct mlx5_hca_vport_context *in; + int err = 0; + + /* Restore sriov guid and policy settings */ + if (sriov->vfs_ctx[vf].node_guid || + sriov->vfs_ctx[vf].port_guid || + sriov->vfs_ctx[vf].policy != MLX5_POLICY_INVALID) { + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + in->node_guid = sriov->vfs_ctx[vf].node_guid; + in->port_guid = sriov->vfs_ctx[vf].port_guid; + in->policy = sriov->vfs_ctx[vf].policy; + in->field_select = + !!(in->port_guid) * MLX5_HCA_VPORT_SEL_PORT_GUID | + !!(in->node_guid) * MLX5_HCA_VPORT_SEL_NODE_GUID | + !!(in->policy) * MLX5_HCA_VPORT_SEL_STATE_POLICY; + + err = mlx5_core_modify_hca_vport_context(dev, 1, 1, vf + 1, in); + if (err) + mlx5_core_warn(dev, "modify vport context failed, unable to restore VF %d settings\n", vf); + + kfree(in); + } + + return err; +} + static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; @@ -74,6 +107,15 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) } sriov->vfs_ctx[vf].enabled = 1; sriov->enabled_vfs++; + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) { + err = sriov_restore_guids(dev, vf); + if (err) { + mlx5_core_warn(dev, + "failed to restore VF %d settings, err %d\n", + vf, err); + continue; + } + } mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index df6ce59a1f95..54221be5f69e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -162,6 +162,13 @@ enum dbg_rsc_type { MLX5_DBG_RSC_CQ, }; +enum port_state_policy { + MLX5_POLICY_DOWN = 0, + MLX5_POLICY_UP = 1, + MLX5_POLICY_FOLLOW = 2, + MLX5_POLICY_INVALID = 0xffffffff +}; + struct mlx5_field_desc { struct dentry *dent; int i; @@ -525,6 +532,9 @@ struct mlx5_mkey_table { struct mlx5_vf_context { int enabled; + u64 port_guid; + u64 node_guid; + enum port_state_policy policy; }; struct mlx5_core_sriov { @@ -842,13 +852,6 @@ struct mlx5_pas { u8 log_sz; }; -enum port_state_policy { - MLX5_POLICY_DOWN = 0, - MLX5_POLICY_UP = 1, - MLX5_POLICY_FOLLOW = 2, - MLX5_POLICY_INVALID = 0xffffffff -}; - enum phy_port_state { MLX5_AAA_111 }; -- cgit v1.2.3-71-gd317 From c1e0bfc1312d0e06bdb24e6e4e7e10b0b4313ec6 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 May 2017 10:29:11 +0300 Subject: net/mlx5: Introduce set delay drop command Add support to SET_DELAY_DROP command. This command will be used in downstream patches for delay packet drop. The timeout value should be indicated by delay_drop_timeout field. Packet processing will be delayed till timeout value passed or until more WQEs are posted. Setting this value to 0 disables the feature. Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 14 ++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 25 ++++++++++++++++++++++++- include/linux/mlx5/qp.h | 3 +++ 3 files changed, 41 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 340f281c9801..db9e665ab104 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -242,6 +242,20 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp); +int mlx5_core_set_delay_drop(struct mlx5_core_dev *dev, + u32 timeout_usec) +{ + u32 out[MLX5_ST_SZ_DW(set_delay_drop_params_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(set_delay_drop_params_in)] = {0}; + + MLX5_SET(set_delay_drop_params_in, in, opcode, + MLX5_CMD_OP_SET_DELAY_DROP_PARAMS); + MLX5_SET(set_delay_drop_params_in, in, delay_drop_timeout, + timeout_usec / 100); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL_GPL(mlx5_core_set_delay_drop); + struct mbox_info { u32 *in; u32 *out; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3e697f672de5..40c05d29d5bb 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -200,6 +200,7 @@ enum { MLX5_CMD_OP_QUERY_SQ = 0x907, MLX5_CMD_OP_CREATE_RQ = 0x908, MLX5_CMD_OP_MODIFY_RQ = 0x909, + MLX5_CMD_OP_SET_DELAY_DROP_PARAMS = 0x910, MLX5_CMD_OP_DESTROY_RQ = 0x90a, MLX5_CMD_OP_QUERY_RQ = 0x90b, MLX5_CMD_OP_CREATE_RMP = 0x90c, @@ -840,7 +841,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 retransmission_q_counters[0x1]; u8 reserved_at_183[0x1]; u8 modify_rq_counter_set_id[0x1]; - u8 reserved_at_185[0x1]; + u8 rq_delay_drop[0x1]; u8 max_qp_cnt[0xa]; u8 pkey_table_size[0x10]; @@ -5853,6 +5854,28 @@ struct mlx5_ifc_destroy_rq_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_set_delay_drop_params_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x10]; + u8 delay_drop_timeout[0x10]; +}; + +struct mlx5_ifc_set_delay_drop_params_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_destroy_rmp_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 6f41270d80c0..fff4ec13f620 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -561,6 +561,9 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, u32 *out, int outlen); +int mlx5_core_set_delay_drop(struct mlx5_core_dev *dev, + u32 timeout_usec); + int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn); int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn); void mlx5_init_qp_table(struct mlx5_core_dev *dev); -- cgit v1.2.3-71-gd317 From 246ac9814c5b2c0e9916dca5fbf8d6a40245fad1 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 May 2017 10:29:12 +0300 Subject: net/mlx5: Introduce general notification event When delay drop timeout is expired, the firmware raises general notification event of DELAY_DROP_TIMEOUT subtype. In addition the feature is disable so the driver have to reactivate the timeout. Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 23 +++++++++++++++++++++++ include/linux/mlx5/device.h | 5 +++++ include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 3 ++- 4 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index af51a5d2b912..849417425811 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -161,6 +161,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; case MLX5_EVENT_TYPE_FPGA_ERROR: return "MLX5_EVENT_TYPE_FPGA_ERROR"; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + return "MLX5_EVENT_TYPE_GENERAL_EVENT"; default: return "Unrecognized event"; } @@ -378,6 +380,20 @@ int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume); #endif +static void general_event_handler(struct mlx5_core_dev *dev, + struct mlx5_eqe *eqe) +{ + switch (eqe->sub_type) { + case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: + if (dev->event) + dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0); + break; + default: + mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n", + eqe->sub_type); + } +} + static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) { struct mlx5_eq *eq = eq_ptr; @@ -486,6 +502,9 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) mlx5_fpga_event(dev, eqe->type, &eqe->data.raw); break; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + general_event_handler(dev, eqe); + break; default: mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn); @@ -693,6 +712,10 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) mlx5_core_is_pf(dev)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && + MLX5_CAP_GEN(dev, general_notification_event)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT); + if (MLX5_CAP_GEN(dev, port_module_event)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT); else diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index f31a0b5377e1..a47b9ab9f2c9 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -290,6 +290,7 @@ enum mlx5_event { MLX5_EVENT_TYPE_GPIO_EVENT = 0x15, MLX5_EVENT_TYPE_PORT_MODULE_EVENT = 0x16, MLX5_EVENT_TYPE_REMOTE_CONFIG = 0x19, + MLX5_EVENT_TYPE_GENERAL_EVENT = 0x22, MLX5_EVENT_TYPE_PPS_EVENT = 0x25, MLX5_EVENT_TYPE_DB_BF_CONGESTION = 0x1a, @@ -304,6 +305,10 @@ enum mlx5_event { MLX5_EVENT_TYPE_FPGA_ERROR = 0x20, }; +enum { + MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT = 0x1, +}; + enum { MLX5_PORT_CHANGE_SUBTYPE_DOWN = 1, MLX5_PORT_CHANGE_SUBTYPE_ACTIVE = 4, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 54221be5f69e..758ef40f9316 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -192,6 +192,7 @@ enum mlx5_dev_event { MLX5_DEV_EVENT_GUID_CHANGE, MLX5_DEV_EVENT_CLIENT_REREG, MLX5_DEV_EVENT_PPS, + MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, }; enum mlx5_port_status { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 40c05d29d5bb..4bc57647fad8 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -874,7 +874,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 max_tc[0x4]; u8 reserved_at_1d0[0x1]; u8 dcbx[0x1]; - u8 reserved_at_1d2[0x3]; + u8 general_notification_event[0x1]; + u8 reserved_at_1d3[0x2]; u8 fpga[0x1]; u8 rol_s[0x1]; u8 rol_g[0x1]; -- cgit v1.2.3-71-gd317 From 03404e8ae652e02a5e3388224836cef53d7a0988 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 May 2017 10:29:13 +0300 Subject: IB/mlx5: Add support to dropless RQ RQs that were configured for "delay drop" will prevent packet drops when their WQEs are depleted. Marking an RQ to be drop-less is done by setting delay_drop_en in RQ context using CREATE_RQ command. Since this feature is globally activated/deactivated by using the SET_DELAY_DROP command on all the marked RQs, we activated/deactivated it according to the number of RQs with 'delay_drop' enabled. When timeout is expired, then the feature is deactivated. Therefore the driver handles the delay drop timeout event and reactivate it. Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 60 +++++++++++++++++++++++++++++++++--- drivers/infiniband/hw/mlx5/mlx5_ib.h | 19 ++++++++++++ drivers/infiniband/hw/mlx5/qp.c | 37 ++++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 2 +- 4 files changed, 113 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a903728627f1..ad4b12decc23 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -697,6 +697,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_UD_TSO; } + if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) && + MLX5_CAP_GEN(dev->mdev, general_notification_event)) + props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP; + if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && MLX5_CAP_ETH(dev->mdev, scatter_fcs)) { /* Legacy bit to support old userspace libraries */ @@ -2752,6 +2756,24 @@ static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags); } +static void delay_drop_handler(struct work_struct *work) +{ + int err; + struct mlx5_ib_delay_drop *delay_drop = + container_of(work, struct mlx5_ib_delay_drop, + delay_drop_work); + + mutex_lock(&delay_drop->lock); + err = mlx5_core_set_delay_drop(delay_drop->dev->mdev, + delay_drop->timeout); + if (err) { + mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n", + delay_drop->timeout); + delay_drop->activate = false; + } + mutex_unlock(&delay_drop->lock); +} + static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param) { @@ -2804,8 +2826,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, ibev.event = IB_EVENT_CLIENT_REREGISTER; port = (u8)param; break; + case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT: + schedule_work(&ibdev->delay_drop.delay_drop_work); + goto out; default: - return; + goto out; } ibev.device = &ibdev->ib_dev; @@ -2813,7 +2838,7 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, if (port < 1 || port > ibdev->num_ports) { mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); - return; + goto out; } if (ibdev->ib_active) @@ -2821,6 +2846,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, if (fatal) ibdev->ib_active = false; + +out: + return; } static int set_has_smi_cap(struct mlx5_ib_dev *dev) @@ -3623,6 +3651,26 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, return netdev; } +static void cancel_delay_drop(struct mlx5_ib_dev *dev) +{ + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return; + + cancel_work_sync(&dev->delay_drop.delay_drop_work); +} + +static void init_delay_drop(struct mlx5_ib_dev *dev) +{ + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return; + + mutex_init(&dev->delay_drop.lock); + dev->delay_drop.dev = dev; + dev->delay_drop.activate = false; + dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; + INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; @@ -3862,11 +3910,13 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (err) goto err_dev; + init_delay_drop(dev); + for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { err = device_create_file(&dev->ib_dev.dev, mlx5_class_attributes[i]); if (err) - goto err_umrc; + goto err_delay_drop; } if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && @@ -3877,7 +3927,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) return dev; -err_umrc: +err_delay_drop: + cancel_delay_drop(dev); destroy_umrc_res(dev); err_dev: @@ -3924,6 +3975,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) struct mlx5_ib_dev *dev = context; enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); + cancel_delay_drop(dev); mlx5_remove_netdev_notifier(dev); ib_unregister_device(&dev->ib_dev); mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f0682f383b52..097f12dc65a6 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -247,6 +247,10 @@ struct mlx5_ib_wq { void *qend; }; +enum mlx5_ib_wq_flags { + MLX5_IB_WQ_FLAGS_DELAY_DROP = 0x1, +}; + struct mlx5_ib_rwq { struct ib_wq ibwq; struct mlx5_core_qp core_qp; @@ -264,6 +268,7 @@ struct mlx5_ib_rwq { u32 wqe_count; u32 wqe_shift; int wq_sig; + u32 create_flags; /* Use enum mlx5_ib_wq_flags */ }; enum { @@ -652,6 +657,19 @@ struct mlx5_ib_dbg_cc_params { struct mlx5_ib_dbg_param params[MLX5_IB_DBG_CC_MAX]; }; +enum { + MLX5_MAX_DELAY_DROP_TIMEOUT_MS = 100, +}; + +struct mlx5_ib_delay_drop { + struct mlx5_ib_dev *dev; + struct work_struct delay_drop_work; + /* serialize setting of delay drop */ + struct mutex lock; + u32 timeout; + bool activate; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -688,6 +706,7 @@ struct mlx5_ib_dev { struct mlx5_ib_port *port; struct mlx5_sq_bfreg bfreg; struct mlx5_sq_bfreg fp_bfreg; + struct mlx5_ib_delay_drop delay_drop; struct mlx5_ib_dbg_cc_params *dbg_cc_params; /* protect the user_td */ diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0889ff367c86..939553d5c25f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4597,6 +4597,24 @@ static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type) } } +static int set_delay_drop(struct mlx5_ib_dev *dev) +{ + int err = 0; + + mutex_lock(&dev->delay_drop.lock); + if (dev->delay_drop.activate) + goto out; + + err = mlx5_core_set_delay_drop(dev->mdev, dev->delay_drop.timeout); + if (err) + goto out; + + dev->delay_drop.activate = true; +out: + mutex_unlock(&dev->delay_drop.lock); + return err; +} + static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, struct ib_wq_init_attr *init_attr) { @@ -4651,9 +4669,28 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, } MLX5_SET(rqc, rqc, scatter_fcs, 1); } + if (init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) { + if (!(dev->ib_dev.attrs.raw_packet_caps & + IB_RAW_PACKET_CAP_DELAY_DROP)) { + mlx5_ib_dbg(dev, "Delay drop is not supported\n"); + err = -EOPNOTSUPP; + goto out; + } + MLX5_SET(rqc, rqc, delay_drop_en, 1); + } rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp); + if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) { + err = set_delay_drop(dev); + if (err) { + mlx5_ib_warn(dev, "Failed to enable delay drop err=%d\n", + err); + mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); + } else { + rwq->create_flags |= MLX5_IB_WQ_FLAGS_DELAY_DROP; + } + } out: kvfree(in); return err; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4bc57647fad8..f350688a12fa 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2519,7 +2519,7 @@ enum { struct mlx5_ifc_rqc_bits { u8 rlky[0x1]; - u8 reserved_at_1[0x1]; + u8 delay_drop_en[0x1]; u8 scatter_fcs[0x1]; u8 vsd[0x1]; u8 mem_rq_type[0x4]; -- cgit v1.2.3-71-gd317 From 4ce749bd94f697772ac2be4fb7e7a92726e94bfb Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 8 Jun 2017 16:15:10 +0300 Subject: net/mlx5: Report enhanced capabilities for IPoIB Report 'ipoib_enhanced_offloads' capabilities from the core layer, it will be used in the next patch from this series. Signed-off-by: Yishai Hadas Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 6 ++++++ include/linux/mlx5/device.h | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index fa33d59ab485..2c71557d1cee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -120,6 +120,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_IPOIB_ENHANCED_OFFLOADS); + if (err) + return err; + } + if (MLX5_CAP_GEN(dev, pg)) { err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); if (err) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index a47b9ab9f2c9..c13d71deaeca 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -973,7 +973,7 @@ enum mlx5_cap_type { MLX5_CAP_ATOMIC, MLX5_CAP_ROCE, MLX5_CAP_IPOIB_OFFLOADS, - MLX5_CAP_EOIB_OFFLOADS, + MLX5_CAP_IPOIB_ENHANCED_OFFLOADS, MLX5_CAP_FLOW_TABLE, MLX5_CAP_ESWITCH_FLOW_TABLE, MLX5_CAP_ESWITCH, @@ -1016,6 +1016,10 @@ enum mlx5_mcam_feature_groups { MLX5_GET(per_protocol_networking_offload_caps,\ mdev->caps.hca_max[MLX5_CAP_ETHERNET_OFFLOADS], cap) +#define MLX5_CAP_IPOIB_ENHANCED(mdev, cap) \ + MLX5_GET(per_protocol_networking_offload_caps,\ + mdev->caps.hca_cur[MLX5_CAP_IPOIB_ENHANCED_OFFLOADS], cap) + #define MLX5_CAP_ROCE(mdev, cap) \ MLX5_GET(roce_cap, mdev->caps.hca_cur[MLX5_CAP_ROCE], cap) -- cgit v1.2.3-71-gd317 From 3fffc82ad6c78fcc9d5d4eca089f00db14ab0358 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 12 Jun 2017 10:36:16 +0300 Subject: IB/mlx5: Fix existence check for extended address vector The extended address vector is the highest bit in be32 variable, but it was compared with the lowest. This patch fixes the endianness of that check and removes already declared define. Fixes: 17d2f88f92ce ("IB/mlx5: Add ODP atomics support") Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/odp.c | 2 +- include/linux/mlx5/qp.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index ae0746754008..3d701c7a4c91 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -939,7 +939,7 @@ static int mlx5_ib_mr_initiator_pfault_handler( if (qp->ibqp.qp_type != IB_QPT_RC) { av = *wqe; - if (av->dqp_dct & be32_to_cpu(MLX5_WQE_AV_EXT)) + if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV)) *wqe += sizeof(struct mlx5_av); else *wqe += sizeof(struct mlx5_base_av); diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index fff4ec13f620..66d19b611fe4 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -212,7 +212,6 @@ struct mlx5_wqe_ctrl_seg { #define MLX5_WQE_CTRL_OPCODE_MASK 0xff #define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00 #define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8 -#define MLX5_WQE_AV_EXT 0x80000000 enum { MLX5_ETH_WQE_L3_INNER_CSUM = 1 << 4, -- cgit v1.2.3-71-gd317 From 58dcb60a226ee48cc66c96c27b751f06ec2bc5a9 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 19 Jun 2017 07:19:37 +0300 Subject: IB/mlx5: Expose extended error counters This patch adds below requester and responder side error counters, which will be exposed by hardware counters interface and are supported as part of query Q counters command extension. +---------------------------+-------------------------------------+ | Name | Description | |---------------------------+-------------------------------------| |resp_local_length_error | Number of times responder detected | | | local length errors | |---------------------------+-------------------------------------| |resp_cqe_error | Number of CQEs completed with error | | | at responder | |---------------------------+-------------------------------------| |req_cqe_error | Number of CQEs completed with error | | | at requester | |---------------------------+-------------------------------------| |req_remote_invalid_request | Number of times requester detected | | | remote invalid request error | |---------------------------+-------------------------------------| |req_remote_access_error | Number of times requester detected | | | remote access error | |---------------------------+-------------------------------------| |resp_remote_access_error | Number of times responder detected | | | remote access error | |---------------------------+-------------------------------------| |resp_cqe_flush_error | Number of CQEs completed with | | | flushed with error at responder | |---------------------------+-------------------------------------| |req_cqe_flush_error | Number of CQEs completed with | | | flushed with error at requester | +---------------------------+-------------------------------------+ Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 22 ++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 44 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2757d445b042..9dd9759459fb 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3432,6 +3432,17 @@ static const struct mlx5_ib_counter cong_cnts[] = { INIT_CONG_COUNTER(np_cnp_sent), }; +static const struct mlx5_ib_counter extended_err_cnts[] = { + INIT_Q_COUNTER(resp_local_length_error), + INIT_Q_COUNTER(resp_cqe_error), + INIT_Q_COUNTER(req_cqe_error), + INIT_Q_COUNTER(req_remote_invalid_request), + INIT_Q_COUNTER(req_remote_access_errors), + INIT_Q_COUNTER(resp_remote_access_errors), + INIT_Q_COUNTER(resp_cqe_flush_error), + INIT_Q_COUNTER(req_cqe_flush_error), +}; + static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { unsigned int i; @@ -3456,6 +3467,10 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) num_counters += ARRAY_SIZE(retrans_q_cnts); + + if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) + num_counters += ARRAY_SIZE(extended_err_cnts); + cnts->num_q_counters = num_counters; if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { @@ -3505,6 +3520,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, } } + if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { + for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { + names[j] = extended_err_cnts[i].name; + offsets[j] = extended_err_cnts[i].offset; + } + } + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { names[j] = cong_cnts[i].name; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f350688a12fa..5bae70eb25af 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -858,7 +858,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 pcam_reg[0x1]; u8 local_ca_ack_delay[0x5]; u8 port_module_event[0x1]; - u8 reserved_at_1b1[0x1]; + u8 enhanced_error_q_counters[0x1]; u8 ports_check[0x1]; u8 reserved_at_1b3[0x1]; u8 disable_link_up[0x1]; @@ -3953,7 +3953,47 @@ struct mlx5_ifc_query_q_counter_out_bits { u8 local_ack_timeout_err[0x20]; - u8 reserved_at_320[0x4e0]; + u8 reserved_at_320[0xa0]; + + u8 resp_local_length_error[0x20]; + + u8 req_local_length_error[0x20]; + + u8 resp_local_qp_error[0x20]; + + u8 local_operation_error[0x20]; + + u8 resp_local_protection[0x20]; + + u8 req_local_protection[0x20]; + + u8 resp_cqe_error[0x20]; + + u8 req_cqe_error[0x20]; + + u8 req_mw_binding[0x20]; + + u8 req_bad_response[0x20]; + + u8 req_remote_invalid_request[0x20]; + + u8 resp_remote_invalid_request[0x20]; + + u8 req_remote_access_errors[0x20]; + + u8 resp_remote_access_errors[0x20]; + + u8 req_remote_operation_errors[0x20]; + + u8 req_transport_retries_exceeded[0x20]; + + u8 cq_overflow[0x20]; + + u8 resp_cqe_flush_error[0x20]; + + u8 req_cqe_flush_error[0x20]; + + u8 reserved_at_620[0x1e0]; }; struct mlx5_ifc_query_q_counter_in_bits { -- cgit v1.2.3-71-gd317 From f3301870161ca293cd14b20a802c5646da02407f Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 21 Jun 2017 09:29:36 +0300 Subject: (IB, net)/mlx4: Add resource utilization support Adding visibility of resource usage of QPs, CQs and counters used by virtual functions. This feature will be used to give the PF administrator more data while debugging VF status. Usage info was added to ALLOC_RES command, to notify the PF if the resource which is being reserved or allocated for the VF will be used by kernel driver or by user verbs. Updated reservation and allocation functions of QP, CQ and counter with additional usage parameter. Signed-off-by: Moshe Shemesh Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/cq.c | 2 ++ drivers/infiniband/hw/mlx4/main.c | 6 ++++-- drivers/infiniband/hw/mlx4/qp.c | 13 +++++++++---- drivers/net/ethernet/mellanox/mlx4/cq.c | 7 ++++--- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 1 + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 3 ++- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 6 ++++-- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 3 ++- drivers/net/ethernet/mellanox/mlx4/main.c | 7 ++++--- drivers/net/ethernet/mellanox/mlx4/qp.c | 5 +++-- include/linux/mlx4/device.h | 12 ++++++++++-- 11 files changed, 45 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index ff931c580557..95382faa7ad1 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -218,6 +218,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, goto err_mtt; uar = &to_mucontext(context)->uar; + cq->mcq.usage = MLX4_RES_USAGE_USER_VERBS; } else { err = mlx4_db_alloc(dev->dev, &cq->db, 1); if (err) @@ -233,6 +234,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, goto err_db; uar = &dev->priv_uar; + cq->mcq.usage = MLX4_RES_USAGE_DRIVER; } if (dev->eq_table) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e8c290edb1e1..0944e224c0df 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2779,7 +2779,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) allocated = 0; if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) == IB_LINK_LAYER_ETHERNET) { - err = mlx4_counter_alloc(ibdev->dev, &counter_index); + err = mlx4_counter_alloc(ibdev->dev, &counter_index, + MLX4_RES_USAGE_DRIVER); /* if failed to allocate a new counter, use default */ if (err) counter_index = @@ -2834,7 +2835,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS; err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count, MLX4_IB_UC_STEER_QPN_ALIGN, - &ibdev->steer_qpn_base, 0); + &ibdev->steer_qpn_base, 0, + MLX4_RES_USAGE_DRIVER); if (err) goto err_counter; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index d1caa39a3943..247b9132e9de 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -769,6 +769,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_mtt; } + qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS; } else { err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp, 0); @@ -841,6 +842,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, err = -ENOMEM; goto err_wrid; } + qp->mqp.usage = MLX4_RES_USAGE_DRIVER; } if (sqpn) { @@ -860,13 +862,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, (init_attr->cap.max_send_wr ? MLX4_RESERVE_ETH_BF_QP : 0) | (init_attr->cap.max_recv_wr ? - MLX4_RESERVE_A0_QP : 0)); + MLX4_RESERVE_A0_QP : 0), + qp->mqp.usage); else if (qp->flags & MLX4_IB_QP_NETIF) err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn); else err = mlx4_qp_reserve_range(dev->dev, 1, 1, - &qpn, 0); + &qpn, 0, qp->mqp.usage); if (err) goto err_proxy; } @@ -1218,7 +1221,9 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, if (udata) return ERR_PTR(-EINVAL); if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) { - int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0); + int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, + 1, 1, &sqpn, 0, + MLX4_RES_USAGE_DRIVER); if (res) return ERR_PTR(res); @@ -1581,7 +1586,7 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK)) return 0; - err = mlx4_counter_alloc(dev->dev, &tmp_idx); + err = mlx4_counter_alloc(dev->dev, &tmp_idx, MLX4_RES_USAGE_DRIVER); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index c56a511b918e..72eb50cd5ecd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -241,13 +241,14 @@ err_out: return err; } -static int mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn) +static int mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn, u8 usage) { + u32 in_modifier = RES_CQ | (((u32)usage & 3) << 30); u64 out_param; int err; if (mlx4_is_mfunc(dev)) { - err = mlx4_cmd_imm(dev, 0, &out_param, RES_CQ, + err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier, RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) @@ -303,7 +304,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, cq->vector = vector; - err = mlx4_cq_alloc_icm(dev, &cq->cqn); + err = mlx4_cq_alloc_icm(dev, &cq->cqn, cq->usage); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 85fe17e4dcfb..f849eec21824 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -140,6 +140,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, (cq->type == RX && priv->hwtstamp_config.rx_filter)) timestamp_en = 1; + cq->mcq.usage = MLX4_RES_USAGE_DRIVER; err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar, cq->wqres.db.dma, &cq->mcq, cq->vector, 0, timestamp_en); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 3a291fc1780a..e3e6d9fa69fd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -651,7 +651,8 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv) return 0; } - err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP); + err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP, + MLX4_RES_USAGE_DRIVER); en_dbg(DRV, priv, "Reserved qp %d\n", *qpn); if (err) { en_err(priv, "Failed to reserve qp for mac registration\n"); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 436f7689a032..ad1ffd5857cb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1081,7 +1081,8 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) u32 qpn; err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn, - MLX4_RESERVE_A0_QP); + MLX4_RESERVE_A0_QP, + MLX4_RES_USAGE_DRIVER); if (err) { en_err(priv, "Failed reserving drop qpn\n"); return err; @@ -1127,7 +1128,8 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) flags = priv->rx_ring_num == 1 ? MLX4_RESERVE_A0_QP : 0; err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num, priv->rx_ring_num, - &rss_map->base_qpn, flags); + &rss_map->base_qpn, flags, + MLX4_RES_USAGE_DRIVER); if (err) { en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 73faa3d77921..a81db2582555 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -105,7 +105,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, (unsigned long long) ring->sp_wqres.buf.direct.map); err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn, - MLX4_RESERVE_ETH_BF_QP); + MLX4_RESERVE_ETH_BF_QP, + MLX4_RES_USAGE_DRIVER); if (err) { en_err(priv, "failed reserving qp for TX ring\n"); goto err_hwq_res; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index a27c9c13a36e..fb2591d0e735 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2475,7 +2475,7 @@ static int mlx4_allocate_default_counters(struct mlx4_dev *dev) priv->def_counter[port] = -1; for (port = 0; port < dev->caps.num_ports; port++) { - err = mlx4_counter_alloc(dev, &idx); + err = mlx4_counter_alloc(dev, &idx, MLX4_RES_USAGE_DRIVER); if (!err || err == -ENOSPC) { priv->def_counter[port] = idx; @@ -2517,13 +2517,14 @@ int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) return 0; } -int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) +int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage) { + u32 in_modifier = RES_COUNTER | (((u32)usage & 3) << 30); u64 out_param; int err; if (mlx4_is_mfunc(dev)) { - err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER, + err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier, RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (!err) diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 26747212526b..5e5b4475b85e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -245,8 +245,9 @@ int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, } int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, - int *base, u8 flags) + int *base, u8 flags, u8 usage) { + u32 in_modifier = RES_QP | (((u32)usage & 3) << 30); u64 in_param = 0; u64 out_param; int err; @@ -258,7 +259,7 @@ int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, set_param_l(&in_param, (((u32)flags) << 24) | (u32)cnt); set_param_h(&in_param, align); err = mlx4_cmd_imm(dev, in_param, &out_param, - RES_QP, RES_OP_RESERVE, + in_modifier, RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index aad5d81dfb44..3607da001ad3 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -428,6 +428,12 @@ enum mlx4_steer_type { MLX4_NUM_STEERS }; +enum mlx4_resource_usage { + MLX4_RES_USAGE_NONE, + MLX4_RES_USAGE_DRIVER, + MLX4_RES_USAGE_USER_VERBS, +}; + enum { MLX4_NUM_FEXCH = 64 * 1024, }; @@ -748,6 +754,7 @@ struct mlx4_cq { } tasklet_ctx; int reset_notify_added; struct list_head reset_notify; + u8 usage; }; struct mlx4_qp { @@ -757,6 +764,7 @@ struct mlx4_qp { atomic_t refcount; struct completion free; + u8 usage; }; struct mlx4_srq { @@ -1120,7 +1128,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, unsigned vector, int collapsed, int timestamp_en); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, - int *base, u8 flags); + int *base, u8 flags, u8 usage); void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp); @@ -1417,7 +1425,7 @@ int mlx4_get_phys_port_id(struct mlx4_dev *dev); int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port); int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); -int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); +int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage); void mlx4_counter_free(struct mlx4_dev *dev, u32 idx); int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port); -- cgit v1.2.3-71-gd317 From 6ce31263c9758c85e7d326cdc65c31fc521dc63d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 20 Jul 2017 14:51:07 +0200 Subject: dma-fence: Don't BUG_ON when not absolutely needed It makes debugging a massive pain. Signed-off-by: Daniel Vetter Cc: Sumit Semwal Cc: Gustavo Padovan Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Reviewed-by: Lucas Stach Acked-by: Sumit Semwal Acked-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20170720125107.26693-1-daniel.vetter@ffwll.ch Signed-off-by: Daniel Vetter --- drivers/dma-buf/dma-fence.c | 4 ++-- include/linux/dma-fence.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 57da14c15987..eee8377b8b3a 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -48,7 +48,7 @@ static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(0); */ u64 dma_fence_context_alloc(unsigned num) { - BUG_ON(!num); + WARN_ON(!num); return atomic64_add_return(num, &dma_fence_context_counter) - num; } EXPORT_SYMBOL(dma_fence_context_alloc); @@ -177,7 +177,7 @@ void dma_fence_release(struct kref *kref) trace_dma_fence_destroy(fence); - BUG_ON(!list_empty(&fence->cb_list)); + WARN_ON(!list_empty(&fence->cb_list)); if (fence->ops->release) fence->ops->release(fence); diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index ac5987989e9a..c01506e8609e 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -429,8 +429,8 @@ int dma_fence_get_status(struct dma_fence *fence); static inline void dma_fence_set_error(struct dma_fence *fence, int error) { - BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); - BUG_ON(error >= 0 || error < -MAX_ERRNO); + WARN_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); + WARN_ON(error >= 0 || error < -MAX_ERRNO); fence->error = error; } -- cgit v1.2.3-71-gd317 From db1fc97ca0c0d3fdeeadf314d99a26188438940a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Jul 2017 22:29:51 +0100 Subject: dma-buf/sync_file: Allow multiple sync_files to wrap a single dma-fence Up until recently sync_file were create to export a single dma-fence to userspace, and so we could canabalise a bit insie dma-fence to mark whether or not we had enable polling for the sync_file itself. However, with the advent of syncobj, we do allow userspace to create multiple sync_files for a single dma-fence. (Similarly, that the sw-sync validation framework also started returning multiple sync-files wrapping a single dma-fence for a syncpt also triggering the problem.) This patch reverts my suggestion in commit e24165537312 ("dma-buf/sync_file: only enable fence signalling on poll()") to use a single bit in the shared dma-fence and restores the sync_file->flags for tracking the bits individually. Reported-by: Gustavo Padovan Fixes: f1e8c67123cf ("dma-buf/sw-sync: Use an rbtree to sort fences in the timeline") Fixes: e9083420bbac ("drm: introduce sync objects (v4)") Signed-off-by: Chris Wilson Cc: Sumit Semwal Cc: Sean Paul Cc: Gustavo Padovan Cc: dri-devel@lists.freedesktop.org Cc: # v4.13-rc1+ Signed-off-by: Gustavo Padovan Link: http://patchwork.freedesktop.org/patch/msgid/20170728212951.7818-1-chris@chris-wilson.co.uk --- drivers/dma-buf/sync_file.c | 5 +++-- include/linux/sync_file.h | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index d7e219d2669d..66fb40d0ebdb 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -304,7 +304,7 @@ static int sync_file_release(struct inode *inode, struct file *file) { struct sync_file *sync_file = file->private_data; - if (test_bit(POLL_ENABLED, &sync_file->fence->flags)) + if (test_bit(POLL_ENABLED, &sync_file->flags)) dma_fence_remove_callback(sync_file->fence, &sync_file->cb); dma_fence_put(sync_file->fence); kfree(sync_file); @@ -318,7 +318,8 @@ static unsigned int sync_file_poll(struct file *file, poll_table *wait) poll_wait(file, &sync_file->wq, wait); - if (!test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) { + if (list_empty(&sync_file->cb.node) && + !test_and_set_bit(POLL_ENABLED, &sync_file->flags)) { if (dma_fence_add_callback(sync_file->fence, &sync_file->cb, fence_check_cb_func) < 0) wake_up_all(&sync_file->wq); diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h index 5726107963b2..0ad87c434ae6 100644 --- a/include/linux/sync_file.h +++ b/include/linux/sync_file.h @@ -43,12 +43,13 @@ struct sync_file { #endif wait_queue_head_t wq; + unsigned long flags; struct dma_fence *fence; struct dma_fence_cb cb; }; -#define POLL_ENABLED DMA_FENCE_FLAG_USER_BITS +#define POLL_ENABLED 0 struct sync_file *sync_file_create(struct dma_fence *fence); struct dma_fence *sync_file_get_fence(int fd); -- cgit v1.2.3-71-gd317 From 78249c4215840edb95447ec6867b69a7ac1d7a0d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 13 Jul 2017 11:09:38 +0300 Subject: mlx5: convert to generic pci_alloc_irq_vectors Now that we have a generic code to allocate an array of irq vectors and even correctly spread their affinity, correctly handle cpu hotplug events and more, were much better off using it. Reviewed-by: Christoph Hellwig Acked-by: Leon Romanovsky Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 9 ++--- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 39 +++++++++------------- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 1 - include/linux/mlx5/driver.h | 1 - 7 files changed, 20 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1eac5003084f..d0e572df3a1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -397,7 +397,7 @@ static void mlx5e_enable_async_events(struct mlx5e_priv *priv) static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); - synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC)); + synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC)); } static inline int mlx5e_get_wqe_mtt_sz(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index af51a5d2b912..8a09d7197d70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -585,7 +585,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, name, pci_name(dev->pdev)); eq->eqn = MLX5_GET(create_eq_out, out, eq_number); - eq->irqn = priv->msix_arr[vecidx].vector; + eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; err = request_irq(eq->irqn, handler, 0, @@ -620,7 +620,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, return 0; err_irq: - free_irq(priv->msix_arr[vecidx].vector, eq); + free_irq(eq->irqn, eq); err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -661,11 +661,6 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) } EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq); -u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx) -{ - return dev->priv.msix_arr[MLX5_EQ_VEC_ASYNC].vector; -} - int mlx5_eq_init(struct mlx5_core_dev *dev) { int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 89bfda419efe..1ce2543e3889 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1585,7 +1585,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) /* Mark this vport as disabled to discard new events */ vport->enabled = false; - synchronize_irq(mlx5_get_msix_vec(esw->dev, MLX5_EQ_VEC_ASYNC)); + synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC)); /* Wait for current already scheduled events to complete */ flush_workqueue(esw->work_queue); /* Disable events from this vport */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 4b6b03d6297f..8aea0a065e56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -81,7 +81,7 @@ static void trigger_cmd_completions(struct mlx5_core_dev *dev) u64 vector; /* wait for pending handlers to complete */ - synchronize_irq(dev->priv.msix_arr[MLX5_EQ_VEC_CMD].vector); + synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD)); spin_lock_irqsave(&dev->cmd.alloc_lock, flags); vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); if (!vector) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c065132b956d..d2fd55e5c68b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -312,13 +312,12 @@ static void release_bar(struct pci_dev *pdev) pci_release_regions(pdev); } -static int mlx5_enable_msix(struct mlx5_core_dev *dev) +static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct mlx5_eq_table *table = &priv->eq_table; int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq); int nvec; - int i; nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + MLX5_EQ_VEC_COMP_BASE; @@ -326,17 +325,13 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev) if (nvec <= MLX5_EQ_VEC_COMP_BASE) return -ENOMEM; - priv->msix_arr = kcalloc(nvec, sizeof(*priv->msix_arr), GFP_KERNEL); - priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL); - if (!priv->msix_arr || !priv->irq_info) + if (!priv->irq_info) goto err_free_msix; - for (i = 0; i < nvec; i++) - priv->msix_arr[i].entry = i; - - nvec = pci_enable_msix_range(dev->pdev, priv->msix_arr, - MLX5_EQ_VEC_COMP_BASE + 1, nvec); + nvec = pci_alloc_irq_vectors(dev->pdev, + MLX5_EQ_VEC_COMP_BASE + 1, nvec, + PCI_IRQ_MSIX); if (nvec < 0) return nvec; @@ -346,17 +341,15 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev) err_free_msix: kfree(priv->irq_info); - kfree(priv->msix_arr); return -ENOMEM; } -static void mlx5_disable_msix(struct mlx5_core_dev *dev) +static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; - pci_disable_msix(dev->pdev); + pci_free_irq_vectors(dev->pdev); kfree(priv->irq_info); - kfree(priv->msix_arr); } struct mlx5_reg_host_endianness { @@ -615,8 +608,7 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev) static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) { struct mlx5_priv *priv = &mdev->priv; - struct msix_entry *msix = priv->msix_arr; - int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; + int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); @@ -636,8 +628,7 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) { struct mlx5_priv *priv = &mdev->priv; - struct msix_entry *msix = priv->msix_arr; - int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; + int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); irq_set_affinity_hint(irq, NULL); free_cpumask_var(priv->irq_info[i].mask); @@ -760,8 +751,8 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) } #ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(dev->rmap, - dev->priv.msix_arr[i + MLX5_EQ_VEC_COMP_BASE].vector); + irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev, + MLX5_EQ_VEC_COMP_BASE + i)); #endif snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(dev, eq, @@ -1119,9 +1110,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_stop_poll; } - err = mlx5_enable_msix(dev); + err = mlx5_alloc_irq_vectors(dev); if (err) { - dev_err(&pdev->dev, "enable msix failed\n"); + dev_err(&pdev->dev, "alloc irq vectors failed\n"); goto err_cleanup_once; } @@ -1220,7 +1211,7 @@ err_put_uars: mlx5_put_uars_page(dev, priv->uar); err_disable_msix: - mlx5_disable_msix(dev); + mlx5_free_irq_vectors(dev); err_cleanup_once: if (boot) @@ -1287,7 +1278,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, free_comp_eqs(dev); mlx5_stop_eqs(dev); mlx5_put_uars_page(dev, priv->uar); - mlx5_disable_msix(dev); + mlx5_free_irq_vectors(dev); if (cleanup) mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 6a3d6bef7dd4..ba1d494b016d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -110,7 +110,6 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 element_id); int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev); -u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx); struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); void mlx5_cq_tasklet_cb(unsigned long data); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index df6ce59a1f95..5bac7f53b4f9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -597,7 +597,6 @@ struct mlx5_port_module_event_stats { struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; - struct msix_entry *msix_arr; struct mlx5_irq_info *irq_info; /* pages stuff */ -- cgit v1.2.3-71-gd317 From a435393acafbf0ecff4deb3e3cb554b34f0d0664 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 13 Jul 2017 11:09:40 +0300 Subject: mlx5: move affinity hints assignments to generic code generic api takes care of spreading affinity similar to what mlx5 open coded (and even handles better asymmetric configurations). Ask the generic API to spread affinity for us, and feed him pre_vectors that do not participate in affinity settings (which is an improvement to what we had before). The affinity assignments should match what mlx5 tried to do earlier but now we do not set affinity to async, cmd and pages dedicated vectors. Also, remove mlx5e_get_cpu and introduce mlx5e_get_node (used for allocation purposes) and mlx5_get_vector_affinity (for indirection table construction) as they provide the needed information. Luckily, we have generic helpers to get cpumask and node given a irq vector. mlx5_get_vector_affinity will be used by mlx5_ib in a subsequent patch. Reviewed-by: Christoph Hellwig Acked-by: Leon Romanovsky Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 45 +++++++------- drivers/net/ethernet/mellanox/mlx5/core/main.c | 75 ++--------------------- include/linux/mlx5/driver.h | 7 ++- 4 files changed, 35 insertions(+), 93 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index e1b7ddfecd01..909123243a85 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -587,7 +587,6 @@ struct mlx5e_channel { struct mlx5_core_dev *mdev; struct mlx5e_tstamp *tstamp; int ix; - int cpu; }; struct mlx5e_channels { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2c4e41833e55..fb647561c592 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -71,6 +71,11 @@ struct mlx5e_channel_param { struct mlx5e_cq_param icosq_cq; }; +static int mlx5e_get_node(struct mlx5e_priv *priv, int ix) +{ + return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix); +} + static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { return MLX5_CAP_GEN(mdev, striding_rq) && @@ -444,16 +449,17 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int wq_sz = mlx5_wq_ll_get_size(&rq->wq); int mtt_sz = mlx5e_get_wqe_mtt_sz(); int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; + int node = mlx5e_get_node(c->priv, c->ix); int i; rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), - GFP_KERNEL, cpu_to_node(c->cpu)); + GFP_KERNEL, node); if (!rq->mpwqe.info) goto err_out; /* We allocate more than mtt_sz as we will align the pointer */ - rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, - cpu_to_node(c->cpu)); + rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, + GFP_KERNEL, node); if (unlikely(!rq->mpwqe.mtt_no_align)) goto err_free_wqe_info; @@ -561,7 +567,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, int err; int i; - rqp->wq.db_numa_node = cpu_to_node(c->cpu); + rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq, &rq->wq_ctrl); @@ -628,7 +634,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, default: /* MLX5_WQ_TYPE_LINKED_LIST */ rq->wqe.frag_info = kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info), - GFP_KERNEL, cpu_to_node(c->cpu)); + GFP_KERNEL, + mlx5e_get_node(c->priv, c->ix)); if (!rq->wqe.frag_info) { err = -ENOMEM; goto err_rq_wq_destroy; @@ -993,13 +1000,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; - param->wq.db_numa_node = cpu_to_node(c->cpu); + param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) return err; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu)); + err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix)); if (err) goto err_sq_wq_destroy; @@ -1047,13 +1054,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, sq->channel = c; sq->uar_map = mdev->mlx5e_res.bfreg.map; - param->wq.db_numa_node = cpu_to_node(c->cpu); + param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) return err; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu)); + err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix)); if (err) goto err_sq_wq_destroy; @@ -1119,13 +1126,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); - param->wq.db_numa_node = cpu_to_node(c->cpu); + param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) return err; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu)); + err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix)); if (err) goto err_sq_wq_destroy; @@ -1497,8 +1504,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = c->priv->mdev; int err; - param->wq.buf_numa_node = cpu_to_node(c->cpu); - param->wq.db_numa_node = cpu_to_node(c->cpu); + param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix); + param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); param->eq_ix = c->ix; err = mlx5e_alloc_cq_common(mdev, param, cq); @@ -1597,11 +1604,6 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq) mlx5e_free_cq(cq); } -static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) -{ - return cpumask_first(priv->mdev->priv.irq_info[ix].mask); -} - static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) @@ -1750,11 +1752,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, { struct mlx5e_cq_moder icocq_moder = {0, 0}; struct net_device *netdev = priv->netdev; - int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; int err; - c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); + c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix)); if (!c) return -ENOMEM; @@ -1762,7 +1763,6 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->mdev = priv->mdev; c->tstamp = &priv->tstamp; c->ix = ix; - c->cpu = cpu; c->pdev = &priv->mdev->pdev->dev; c->netdev = priv->netdev; c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); @@ -1848,7 +1848,8 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) for (tc = 0; tc < c->num_tc; tc++) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_rq(&c->rq); - netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix); + netif_set_xps_queue(c->netdev, + mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix); } static void mlx5e_deactivate_channel(struct mlx5e_channel *c) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d2fd55e5c68b..e464e8179655 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -316,6 +316,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct mlx5_eq_table *table = &priv->eq_table; + struct irq_affinity irqdesc = { + .pre_vectors = MLX5_EQ_VEC_COMP_BASE, + }; int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq); int nvec; @@ -329,9 +332,10 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev) if (!priv->irq_info) goto err_free_msix; - nvec = pci_alloc_irq_vectors(dev->pdev, + nvec = pci_alloc_irq_vectors_affinity(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, nvec, - PCI_IRQ_MSIX); + PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, + &irqdesc); if (nvec < 0) return nvec; @@ -605,63 +609,6 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev) return (u64)timer_l | (u64)timer_h1 << 32; } -static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); - - if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { - mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); - return -ENOMEM; - } - - cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - priv->irq_info[i].mask); - - if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, priv->irq_info[i].mask)) - mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); - - return 0; -} - -static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); - - irq_set_affinity_hint(irq, NULL); - free_cpumask_var(priv->irq_info[i].mask); -} - -static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) -{ - int err; - int i; - - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { - err = mlx5_irq_set_affinity_hint(mdev, i); - if (err) - goto err_out; - } - - return 0; - -err_out: - for (i--; i >= 0; i--) - mlx5_irq_clear_affinity_hint(mdev, i); - - return err; -} - -static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev) -{ - int i; - - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) - mlx5_irq_clear_affinity_hint(mdev, i); -} - int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn) { @@ -1134,12 +1081,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_stop_eqs; } - err = mlx5_irq_set_affinity_hints(dev); - if (err) { - dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); - goto err_affinity_hints; - } - err = mlx5_init_fs(dev); if (err) { dev_err(&pdev->dev, "Failed to init flow steering\n"); @@ -1199,9 +1140,6 @@ err_sriov: mlx5_cleanup_fs(dev); err_fs: - mlx5_irq_clear_affinity_hints(dev); - -err_affinity_hints: free_comp_eqs(dev); err_stop_eqs: @@ -1274,7 +1212,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_eswitch_detach(dev->priv.eswitch); #endif mlx5_cleanup_fs(dev); - mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); mlx5_put_uars_page(dev, priv->uar); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5bac7f53b4f9..579731842c94 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -534,7 +534,6 @@ struct mlx5_core_sriov { }; struct mlx5_irq_info { - cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; }; @@ -1184,4 +1183,10 @@ enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; +static inline const struct cpumask * +mlx5_get_vector_affinity(struct mlx5_core_dev *dev, int vector) +{ + return pci_irq_get_affinity(dev->pdev, MLX5_EQ_VEC_COMP_BASE + vector); +} + #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3-71-gd317 From 24c5dc6610e8a3764fcb885cc3284c12ff1513de Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 13 Jul 2017 11:09:43 +0300 Subject: block: Add rdma affinity based queue mapping helper Like pci and virtio, we add a rdma helper for affinity spreading. This achieves optimal mq affinity assignments according to the underlying rdma device affinity maps. Reviewed-by: Jens Axboe Reviewed-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- block/Kconfig | 5 +++++ block/Makefile | 1 + block/blk-mq-rdma.c | 52 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/blk-mq-rdma.h | 10 +++++++++ 4 files changed, 68 insertions(+) create mode 100644 block/blk-mq-rdma.c create mode 100644 include/linux/blk-mq-rdma.h (limited to 'include/linux') diff --git a/block/Kconfig b/block/Kconfig index 89cd28f8d051..3ab42bbb06d5 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -206,4 +206,9 @@ config BLK_MQ_VIRTIO depends on BLOCK && VIRTIO default y +config BLK_MQ_RDMA + bool + depends on BLOCK && INFINIBAND + default y + source block/Kconfig.iosched diff --git a/block/Makefile b/block/Makefile index 2b281cf258a0..9396ebc85d24 100644 --- a/block/Makefile +++ b/block/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o +obj-$(CONFIG_BLK_MQ_RDMA) += blk-mq-rdma.o obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o diff --git a/block/blk-mq-rdma.c b/block/blk-mq-rdma.c new file mode 100644 index 000000000000..996167f1de18 --- /dev/null +++ b/block/blk-mq-rdma.c @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 Sagi Grimberg. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include +#include +#include + +/** + * blk_mq_rdma_map_queues - provide a default queue mapping for rdma device + * @set: tagset to provide the mapping for + * @dev: rdma device associated with @set. + * @first_vec: first interrupt vectors to use for queues (usually 0) + * + * This function assumes the rdma device @dev has at least as many available + * interrupt vetors as @set has queues. It will then query it's affinity mask + * and built queue mapping that maps a queue to the CPUs that have irq affinity + * for the corresponding vector. + * + * In case either the driver passed a @dev with less vectors than + * @set->nr_hw_queues, or @dev does not provide an affinity mask for a + * vector, we fallback to the naive mapping. + */ +int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set, + struct ib_device *dev, int first_vec) +{ + const struct cpumask *mask; + unsigned int queue, cpu; + + for (queue = 0; queue < set->nr_hw_queues; queue++) { + mask = ib_get_vector_affinity(dev, first_vec + queue); + if (!mask) + goto fallback; + + for_each_cpu(cpu, mask) + set->mq_map[cpu] = queue; + } + + return 0; + +fallback: + return blk_mq_map_queues(set); +} +EXPORT_SYMBOL_GPL(blk_mq_rdma_map_queues); diff --git a/include/linux/blk-mq-rdma.h b/include/linux/blk-mq-rdma.h new file mode 100644 index 000000000000..b4ade198007d --- /dev/null +++ b/include/linux/blk-mq-rdma.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_BLK_MQ_RDMA_H +#define _LINUX_BLK_MQ_RDMA_H + +struct blk_mq_tag_set; +struct ib_device; + +int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set, + struct ib_device *dev, int first_vec); + +#endif /* _LINUX_BLK_MQ_RDMA_H */ -- cgit v1.2.3-71-gd317 From 7faf952a3030d304334fe527be339b63e9e2745f Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 10 Aug 2017 13:01:48 -0400 Subject: dma-buf: add reservation_object_copy_fences (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows us to copy all the fences in a reservation object to another one. v2: handle NULL src_list Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/1502384509-10465-2-git-send-email-alexander.deucher@amd.com --- drivers/dma-buf/reservation.c | 60 +++++++++++++++++++++++++++++++++++++++++++ include/linux/reservation.h | 3 +++ 2 files changed, 63 insertions(+) (limited to 'include/linux') diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index 87f8f5747500..d4881f91c43b 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -261,6 +261,66 @@ void reservation_object_add_excl_fence(struct reservation_object *obj, } EXPORT_SYMBOL(reservation_object_add_excl_fence); +/** +* reservation_object_copy_fences - Copy all fences from src to dst. +* @dst: the destination reservation object +* @src: the source reservation object +* +* Copy all fences from src to dst. Both src->lock as well as dst-lock must be +* held. +*/ +int reservation_object_copy_fences(struct reservation_object *dst, + struct reservation_object *src) +{ + struct reservation_object_list *src_list, *dst_list; + struct dma_fence *old, *new; + size_t size; + unsigned i; + + src_list = reservation_object_get_list(src); + + if (src_list) { + size = offsetof(typeof(*src_list), + shared[src_list->shared_count]); + dst_list = kmalloc(size, GFP_KERNEL); + if (!dst_list) + return -ENOMEM; + + dst_list->shared_count = src_list->shared_count; + dst_list->shared_max = src_list->shared_count; + for (i = 0; i < src_list->shared_count; ++i) + dst_list->shared[i] = + dma_fence_get(src_list->shared[i]); + } else { + dst_list = NULL; + } + + kfree(dst->staged); + dst->staged = NULL; + + src_list = reservation_object_get_list(dst); + + old = reservation_object_get_excl(dst); + new = reservation_object_get_excl(src); + + dma_fence_get(new); + + preempt_disable(); + write_seqcount_begin(&dst->seq); + /* write_seqcount_begin provides the necessary memory barrier */ + RCU_INIT_POINTER(dst->fence_excl, new); + RCU_INIT_POINTER(dst->fence, dst_list); + write_seqcount_end(&dst->seq); + preempt_enable(); + + if (src_list) + kfree_rcu(src_list, rcu); + dma_fence_put(old); + + return 0; +} +EXPORT_SYMBOL(reservation_object_copy_fences); + /** * reservation_object_get_fences_rcu - Get an object's shared and exclusive * fences without update side lock held diff --git a/include/linux/reservation.h b/include/linux/reservation.h index 156cfd330b66..21fc84d82d41 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -254,6 +254,9 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj, unsigned *pshared_count, struct dma_fence ***pshared); +int reservation_object_copy_fences(struct reservation_object *dst, + struct reservation_object *src); + long reservation_object_wait_timeout_rcu(struct reservation_object *obj, bool wait_all, bool intr, unsigned long timeout); -- cgit v1.2.3-71-gd317 From 2926a2aa5c14fb2add75e6584845b1c03022235f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 14 Aug 2017 17:19:26 +0200 Subject: iommu: Fix wrong freeing of iommu_device->dev The struct iommu_device has a 'struct device' embedded into it, not as a pointer, but the whole struct. In the conversion of the iommu drivers to use struct iommu_device it was forgotten that the relase function for that struct device simply calls kfree() on the pointer. This frees memory that was never allocated and causes memory corruption. To fix this issue, use a pointer to struct device instead of embedding the whole struct. This needs some updates in the iommu sysfs code as well as the Intel VT-d and AMD IOMMU driver. Reported-by: Sebastian Ott Fixes: 39ab9555c241 ('iommu: Add sysfs bindings for struct iommu_device') Cc: stable@vger.kernel.org # >= v4.11 Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_types.h | 4 +++- drivers/iommu/intel-iommu.c | 4 +++- drivers/iommu/iommu-sysfs.c | 32 ++++++++++++++++++++------------ include/linux/iommu.h | 12 +++++++++++- 4 files changed, 37 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 294a409e283b..d6b873b57054 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -574,7 +574,9 @@ struct amd_iommu { static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev) { - return container_of(dev, struct amd_iommu, iommu.dev); + struct iommu_device *iommu = dev_to_iommu_device(dev); + + return container_of(iommu, struct amd_iommu, iommu); } #define ACPIHID_UID_LEN 256 diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 687f18f65cea..3e8636f1220e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4736,7 +4736,9 @@ static void intel_disable_iommus(void) static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev) { - return container_of(dev, struct intel_iommu, iommu.dev); + struct iommu_device *iommu_dev = dev_to_iommu_device(dev); + + return container_of(iommu_dev, struct intel_iommu, iommu); } static ssize_t intel_iommu_show_version(struct device *dev, diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c index c58351ed61c1..36d1a7ce7fc4 100644 --- a/drivers/iommu/iommu-sysfs.c +++ b/drivers/iommu/iommu-sysfs.c @@ -62,32 +62,40 @@ int iommu_device_sysfs_add(struct iommu_device *iommu, va_list vargs; int ret; - device_initialize(&iommu->dev); + iommu->dev = kzalloc(sizeof(*iommu->dev), GFP_KERNEL); + if (!iommu->dev) + return -ENOMEM; - iommu->dev.class = &iommu_class; - iommu->dev.parent = parent; - iommu->dev.groups = groups; + device_initialize(iommu->dev); + + iommu->dev->class = &iommu_class; + iommu->dev->parent = parent; + iommu->dev->groups = groups; va_start(vargs, fmt); - ret = kobject_set_name_vargs(&iommu->dev.kobj, fmt, vargs); + ret = kobject_set_name_vargs(&iommu->dev->kobj, fmt, vargs); va_end(vargs); if (ret) goto error; - ret = device_add(&iommu->dev); + ret = device_add(iommu->dev); if (ret) goto error; + dev_set_drvdata(iommu->dev, iommu); + return 0; error: - put_device(&iommu->dev); + put_device(iommu->dev); return ret; } void iommu_device_sysfs_remove(struct iommu_device *iommu) { - device_unregister(&iommu->dev); + dev_set_drvdata(iommu->dev, NULL); + device_unregister(iommu->dev); + iommu->dev = NULL; } /* * IOMMU drivers can indicate a device is managed by a given IOMMU using @@ -102,14 +110,14 @@ int iommu_device_link(struct iommu_device *iommu, struct device *link) if (!iommu || IS_ERR(iommu)) return -ENODEV; - ret = sysfs_add_link_to_group(&iommu->dev.kobj, "devices", + ret = sysfs_add_link_to_group(&iommu->dev->kobj, "devices", &link->kobj, dev_name(link)); if (ret) return ret; - ret = sysfs_create_link_nowarn(&link->kobj, &iommu->dev.kobj, "iommu"); + ret = sysfs_create_link_nowarn(&link->kobj, &iommu->dev->kobj, "iommu"); if (ret) - sysfs_remove_link_from_group(&iommu->dev.kobj, "devices", + sysfs_remove_link_from_group(&iommu->dev->kobj, "devices", dev_name(link)); return ret; @@ -121,5 +129,5 @@ void iommu_device_unlink(struct iommu_device *iommu, struct device *link) return; sysfs_remove_link(&link->kobj, "iommu"); - sysfs_remove_link_from_group(&iommu->dev.kobj, "devices", dev_name(link)); + sysfs_remove_link_from_group(&iommu->dev->kobj, "devices", dev_name(link)); } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2cb54adc4a33..176f7569d874 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -240,7 +240,7 @@ struct iommu_device { struct list_head list; const struct iommu_ops *ops; struct fwnode_handle *fwnode; - struct device dev; + struct device *dev; }; int iommu_device_register(struct iommu_device *iommu); @@ -265,6 +265,11 @@ static inline void iommu_device_set_fwnode(struct iommu_device *iommu, iommu->fwnode = fwnode; } +static inline struct iommu_device *dev_to_iommu_device(struct device *dev) +{ + return (struct iommu_device *)dev_get_drvdata(dev); +} + #define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */ #define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */ #define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */ @@ -589,6 +594,11 @@ static inline void iommu_device_set_fwnode(struct iommu_device *iommu, { } +static inline struct iommu_device *dev_to_iommu_device(struct device *dev) +{ + return NULL; +} + static inline void iommu_device_unregister(struct iommu_device *iommu) { } -- cgit v1.2.3-71-gd317 From 739a6d5d640a9811beef6a828253ee184dd431c5 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 11 Aug 2017 16:49:12 +0300 Subject: drm: omapdrm: Remove omapdrm platform data The omapdrm platform data are not used anymore, remove them. Signed-off-by: Laurent Pinchart Reviewed-by: Tomi Valkeinen Signed-off-by: Tomi Valkeinen --- include/linux/platform_data/omap_drm.h | 53 ---------------------------------- 1 file changed, 53 deletions(-) delete mode 100644 include/linux/platform_data/omap_drm.h (limited to 'include/linux') diff --git a/include/linux/platform_data/omap_drm.h b/include/linux/platform_data/omap_drm.h deleted file mode 100644 index f4e4a237ebd2..000000000000 --- a/include/linux/platform_data/omap_drm.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * DRM/KMS platform data for TI OMAP platforms - * - * Copyright (C) 2012 Texas Instruments - * Author: Rob Clark - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . - */ - -#ifndef __PLATFORM_DATA_OMAP_DRM_H__ -#define __PLATFORM_DATA_OMAP_DRM_H__ - -/* - * Optional platform data to configure the default configuration of which - * pipes/overlays/CRTCs are used.. if this is not provided, then instead the - * first CONFIG_DRM_OMAP_NUM_CRTCS are used, and they are each connected to - * one manager, with priority given to managers that are connected to - * detected devices. Remaining overlays are used as video planes. This - * should be a good default behavior for most cases, but yet there still - * might be times when you wish to do something different. - */ -struct omap_kms_platform_data { - /* overlays to use as CRTCs: */ - int ovl_cnt; - const int *ovl_ids; - - /* overlays to use as video planes: */ - int pln_cnt; - const int *pln_ids; - - int mgr_cnt; - const int *mgr_ids; - - int dev_cnt; - const char **dev_names; -}; - -struct omap_drm_platform_data { - uint32_t omaprev; - struct omap_kms_platform_data *kms_pdata; -}; - -#endif /* __PLATFORM_DATA_OMAP_DRM_H__ */ -- cgit v1.2.3-71-gd317 From 92d50fc1602ecef44babe411c475344e55e1cdd9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 19 Jul 2017 15:01:06 +0200 Subject: PCI/IB: add support for pci driver attribute groups Some drivers (specifically the nes IB driver), want to create a lot of sysfs driver attributes. Instead of open-coding the creation and removal of these files (and getting it wrong btw), it's a better idea to let the driver core handle all of this logic for us. So add a new field to the pci driver structure, **groups, that allows pci drivers to specify an attribute group list it wishes to have created when it is registered with the driver core. Big bonus is now the driver doesn't race with userspace when the sysfs files are created vs. when the kobject is announced, so any script/tool that actually wanted to use these files will not have to poll waiting for them to show up. Cc: Faisal Latif Cc: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman Acked-by: Bjorn Helgaas Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes.c | 67 ++++++++++++++--------------------------- drivers/pci/pci-driver.c | 1 + include/linux/pci.h | 1 + 3 files changed, 25 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index a30aa6527f7e..84162e9ae5c0 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -808,13 +808,6 @@ static void nes_remove(struct pci_dev *pcidev) } -static struct pci_driver nes_pci_driver = { - .name = DRV_NAME, - .id_table = nes_pci_table, - .probe = nes_probe, - .remove = nes_remove, -}; - static ssize_t adapter_show(struct device_driver *ddp, char *buf) { unsigned int devfn = 0xffffffff; @@ -1156,35 +1149,29 @@ static DRIVER_ATTR_RW(idx_addr); static DRIVER_ATTR_RW(idx_data); static DRIVER_ATTR_RW(wqm_quanta); -static int nes_create_driver_sysfs(struct pci_driver *drv) -{ - int error; - error = driver_create_file(&drv->driver, &driver_attr_adapter); - error |= driver_create_file(&drv->driver, &driver_attr_eeprom_cmd); - error |= driver_create_file(&drv->driver, &driver_attr_eeprom_data); - error |= driver_create_file(&drv->driver, &driver_attr_flash_cmd); - error |= driver_create_file(&drv->driver, &driver_attr_flash_data); - error |= driver_create_file(&drv->driver, &driver_attr_nonidx_addr); - error |= driver_create_file(&drv->driver, &driver_attr_nonidx_data); - error |= driver_create_file(&drv->driver, &driver_attr_idx_addr); - error |= driver_create_file(&drv->driver, &driver_attr_idx_data); - error |= driver_create_file(&drv->driver, &driver_attr_wqm_quanta); - return error; -} +static struct attribute *nes_attrs[] = { + &driver_attr_adapter.attr, + &driver_attr_eeprom_cmd.attr, + &driver_attr_eeprom_data.attr, + &driver_attr_flash_cmd.attr, + &driver_attr_flash_data.attr, + &driver_attr_nonidx_addr.attr, + &driver_attr_nonidx_data.attr, + &driver_attr_idx_addr.attr, + &driver_attr_idx_data.attr, + &driver_attr_wqm_quanta.attr, + NULL, +}; +ATTRIBUTE_GROUPS(nes); + +static struct pci_driver nes_pci_driver = { + .name = DRV_NAME, + .id_table = nes_pci_table, + .probe = nes_probe, + .remove = nes_remove, + .groups = nes_groups, +}; -static void nes_remove_driver_sysfs(struct pci_driver *drv) -{ - driver_remove_file(&drv->driver, &driver_attr_adapter); - driver_remove_file(&drv->driver, &driver_attr_eeprom_cmd); - driver_remove_file(&drv->driver, &driver_attr_eeprom_data); - driver_remove_file(&drv->driver, &driver_attr_flash_cmd); - driver_remove_file(&drv->driver, &driver_attr_flash_data); - driver_remove_file(&drv->driver, &driver_attr_nonidx_addr); - driver_remove_file(&drv->driver, &driver_attr_nonidx_data); - driver_remove_file(&drv->driver, &driver_attr_idx_addr); - driver_remove_file(&drv->driver, &driver_attr_idx_data); - driver_remove_file(&drv->driver, &driver_attr_wqm_quanta); -} /** * nes_init_module - module initialization entry point @@ -1192,20 +1179,13 @@ static void nes_remove_driver_sysfs(struct pci_driver *drv) static int __init nes_init_module(void) { int retval; - int retval1; retval = nes_cm_start(); if (retval) { printk(KERN_ERR PFX "Unable to start NetEffect iWARP CM.\n"); return retval; } - retval = pci_register_driver(&nes_pci_driver); - if (retval >= 0) { - retval1 = nes_create_driver_sysfs(&nes_pci_driver); - if (retval1 < 0) - printk(KERN_ERR PFX "Unable to create NetEffect sys files.\n"); - } - return retval; + return pci_register_driver(&nes_pci_driver); } @@ -1215,7 +1195,6 @@ static int __init nes_init_module(void) static void __exit nes_exit_module(void) { nes_cm_stop(); - nes_remove_driver_sysfs(&nes_pci_driver); pci_unregister_driver(&nes_pci_driver); } diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index d51e8738f9c2..4450feaf5c00 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1307,6 +1307,7 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner, drv->driver.bus = &pci_bus_type; drv->driver.owner = owner; drv->driver.mod_name = mod_name; + drv->driver.groups = drv->groups; spin_lock_init(&drv->dynids.lock); INIT_LIST_HEAD(&drv->dynids.list); diff --git a/include/linux/pci.h b/include/linux/pci.h index 4869e66dd659..c421af60f2df 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -729,6 +729,7 @@ struct pci_driver { void (*shutdown) (struct pci_dev *dev); int (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* PF pdev */ const struct pci_error_handlers *err_handler; + const struct attribute_group **groups; struct device_driver driver; struct pci_dynids dynids; }; -- cgit v1.2.3-71-gd317 From f00fd7ae4f409abb7b2e5d099248832548199f0c Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sun, 30 Jul 2017 16:14:42 -0600 Subject: PATCH] iio: Fix some documentation warnings The kerneldoc description for the trig_readonly field of struct iio_dev lacked a colon, leading to this doc build warning: ./include/linux/iio/iio.h:603: warning: No description found for parameter 'trig_readonly' A similar issue for iio_trigger_set_immutable() in trigger.h yielded: ./include/linux/iio/trigger.h:151: warning: No description found for parameter 'indio_dev' ./include/linux/iio/trigger.h:151: warning: No description found for parameter 'trig' Fix the formatting and silence the warnings. Signed-off-by: Jonathan Corbet Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 2 +- include/linux/iio/trigger.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index d68bec297a45..c380daa40c0e 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -535,7 +535,7 @@ struct iio_buffer_setup_ops { * @scan_timestamp: [INTERN] set if any buffers have requested timestamp * @scan_index_timestamp:[INTERN] cache of the index to the timestamp * @trig: [INTERN] current device trigger (buffer modes) - * @trig_readonly [INTERN] mark the current trigger immutable + * @trig_readonly: [INTERN] mark the current trigger immutable * @pollfunc: [DRIVER] function run on trigger being received * @pollfunc_event: [DRIVER] function run on events trigger being received * @channels: [DRIVER] channel specification structure table diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index ea08302f2d7b..7142d8d6e470 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -144,8 +144,8 @@ void devm_iio_trigger_unregister(struct device *dev, /** * iio_trigger_set_immutable() - set an immutable trigger on destination * - * @indio_dev - IIO device structure containing the device - * @trig - trigger to assign to device + * @indio_dev: IIO device structure containing the device + * @trig: trigger to assign to device * **/ int iio_trigger_set_immutable(struct iio_dev *indio_dev, struct iio_trigger *trig); -- cgit v1.2.3-71-gd317 From 67dfabe3a01be6b0cc4c0056aa546f0279ed6279 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 21 Aug 2017 16:54:01 +0200 Subject: mfd: da9052: Add register details for TSI Add register details an channels definition for using the TSI registers in the hwmon driver. Signed-off-by: Sebastian Reichel Signed-off-by: Lee Jones --- include/linux/mfd/da9052/da9052.h | 6 ++++++ include/linux/mfd/da9052/reg.h | 11 ++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h index ce9230af09c2..ae5b663836d0 100644 --- a/include/linux/mfd/da9052/da9052.h +++ b/include/linux/mfd/da9052/da9052.h @@ -45,6 +45,12 @@ #define DA9052_ADC_TJUNC 8 #define DA9052_ADC_VBBAT 9 +/* TSI channel has its own 4 channel mux */ +#define DA9052_ADC_TSI_XP 70 +#define DA9052_ADC_TSI_XN 71 +#define DA9052_ADC_TSI_YP 72 +#define DA9052_ADC_TSI_YN 73 + #define DA9052_IRQ_DCIN 0 #define DA9052_IRQ_VBUS 1 #define DA9052_IRQ_DCINREM 2 diff --git a/include/linux/mfd/da9052/reg.h b/include/linux/mfd/da9052/reg.h index 5010f978725c..76780ea8849c 100644 --- a/include/linux/mfd/da9052/reg.h +++ b/include/linux/mfd/da9052/reg.h @@ -690,7 +690,10 @@ /* TSI CONTROL REGISTER B BITS */ #define DA9052_TSICONTB_ADCREF 0X80 #define DA9052_TSICONTB_TSIMAN 0X40 -#define DA9052_TSICONTB_TSIMUX 0X30 +#define DA9052_TSICONTB_TSIMUX_XP 0X00 +#define DA9052_TSICONTB_TSIMUX_YP 0X10 +#define DA9052_TSICONTB_TSIMUX_XN 0X20 +#define DA9052_TSICONTB_TSIMUX_YN 0X30 #define DA9052_TSICONTB_TSISEL3 0X08 #define DA9052_TSICONTB_TSISEL2 0X04 #define DA9052_TSICONTB_TSISEL1 0X02 @@ -705,8 +708,14 @@ /* TSI CO-ORDINATE LSB RESULT REGISTER BITS */ #define DA9052_TSILSB_PENDOWN 0X40 #define DA9052_TSILSB_TSIZL 0X30 +#define DA9052_TSILSB_TSIZL_SHIFT 4 +#define DA9052_TSILSB_TSIZL_BITS 2 #define DA9052_TSILSB_TSIYL 0X0C +#define DA9052_TSILSB_TSIYL_SHIFT 2 +#define DA9052_TSILSB_TSIYL_BITS 2 #define DA9052_TSILSB_TSIXL 0X03 +#define DA9052_TSILSB_TSIXL_SHIFT 0 +#define DA9052_TSILSB_TSIXL_BITS 2 /* TSI Z MEASUREMENT MSB RESULT REGISTER BIT */ #define DA9052_TSIZMSB_TSIZM 0XFF -- cgit v1.2.3-71-gd317 From 18c90df9f2c00cb35ab8ba747aa0f742ee6bbf6a Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 22 Aug 2017 13:46:59 +0200 Subject: mlx5: Replace PCI pool old API The PCI pool API is deprecated. This commit replaces the PCI pool old API by the appropriate function with the DMA pool API. Signed-off-by: Romain Perier Reviewed-by: Peter Senna Tschudin Acked-by: Doug Ledford Tested-by: Doug Ledford Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 11 ++++++----- include/linux/mlx5/driver.h | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index f5a2c605749f..7a40e8790f75 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1095,7 +1095,7 @@ static struct mlx5_cmd_mailbox *alloc_cmd_box(struct mlx5_core_dev *dev, if (!mailbox) return ERR_PTR(-ENOMEM); - mailbox->buf = pci_pool_zalloc(dev->cmd.pool, flags, + mailbox->buf = dma_pool_zalloc(dev->cmd.pool, flags, &mailbox->dma); if (!mailbox->buf) { mlx5_core_dbg(dev, "failed allocation\n"); @@ -1110,7 +1110,7 @@ static struct mlx5_cmd_mailbox *alloc_cmd_box(struct mlx5_core_dev *dev, static void free_cmd_box(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *mailbox) { - pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); + dma_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); kfree(mailbox); } @@ -1759,7 +1759,8 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) return -EINVAL; } - cmd->pool = pci_pool_create("mlx5_cmd", dev->pdev, size, align, 0); + cmd->pool = dma_pool_create("mlx5_cmd", &dev->pdev->dev, size, align, + 0); if (!cmd->pool) return -ENOMEM; @@ -1849,7 +1850,7 @@ err_free_page: free_cmd_page(dev, cmd); err_free_pool: - pci_pool_destroy(cmd->pool); + dma_pool_destroy(cmd->pool); return err; } @@ -1863,6 +1864,6 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) destroy_workqueue(cmd->wq); destroy_msg_cache(dev); free_cmd_page(dev, cmd); - pci_pool_destroy(cmd->pool); + dma_pool_destroy(cmd->pool); } EXPORT_SYMBOL(mlx5_cmd_cleanup); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 099fe311f272..db40bc4055c7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -299,7 +299,7 @@ struct mlx5_cmd { struct semaphore pages_sem; int mode; struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS]; - struct pci_pool *pool; + struct dma_pool *pool; struct mlx5_cmd_debug dbg; struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES]; int checksum_disabled; -- cgit v1.2.3-71-gd317 From cd0a137acbb66208368353723f5f1480995cf1c4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 22 Aug 2017 15:12:14 -0700 Subject: net: core: Specify skb_pad()/skb_put_padto() SKB freeing Rename skb_pad() into __skb_pad() and make it take a third argument: free_on_error which controls whether kfree_skb() should be called or not, skb_pad() directly makes use of it and passes true to preserve its existing behavior. Do exactly the same thing with __skb_put_padto() and skb_put_padto(). Suggested-by: David Miller Signed-off-by: Florian Fainelli Reviewed-by: Woojung Huh Signed-off-by: David S. Miller --- include/linux/skbuff.h | 41 +++++++++++++++++++++++++++++++++++++---- net/core/skbuff.c | 13 ++++++++----- 2 files changed, 45 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index dbe29b6c9bd6..d67a8182e5eb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -973,7 +973,23 @@ int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg int __must_check skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len); int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); -int skb_pad(struct sk_buff *skb, int pad); +int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error); + +/** + * skb_pad - zero pad the tail of an skb + * @skb: buffer to pad + * @pad: space to pad + * + * Ensure that a buffer is followed by a padding area that is zero + * filled. Used by network drivers which may DMA or transfer data + * beyond the buffer end onto the wire. + * + * May return error in out of memory cases. The skb is freed on error. + */ +static inline int skb_pad(struct sk_buff *skb, int pad) +{ + return __skb_pad(skb, pad, true); +} #define dev_kfree_skb(a) consume_skb(a) int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, @@ -2825,25 +2841,42 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len) * skb_put_padto - increase size and pad an skbuff up to a minimal size * @skb: buffer to pad * @len: minimal length + * @free_on_error: free buffer on error * * Pads up a buffer to ensure the trailing bytes exist and are * blanked. If the buffer already contains sufficient data it * is untouched. Otherwise it is extended. Returns zero on - * success. The skb is freed on error. + * success. The skb is freed on error if @free_on_error is true. */ -static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) +static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len, + bool free_on_error) { unsigned int size = skb->len; if (unlikely(size < len)) { len -= size; - if (skb_pad(skb, len)) + if (__skb_pad(skb, len, free_on_error)) return -ENOMEM; __skb_put(skb, len); } return 0; } +/** + * skb_put_padto - increase size and pad an skbuff up to a minimal size + * @skb: buffer to pad + * @len: minimal length + * + * Pads up a buffer to ensure the trailing bytes exist and are + * blanked. If the buffer already contains sufficient data it + * is untouched. Otherwise it is extended. Returns zero on + * success. The skb is freed on error. + */ +static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) +{ + return __skb_put_padto(skb, len, true); +} + static inline int skb_add_data(struct sk_buff *skb, struct iov_iter *from, int copy) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f990eb8b30a9..e07556606284 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1363,18 +1363,20 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, EXPORT_SYMBOL(skb_copy_expand); /** - * skb_pad - zero pad the tail of an skb + * __skb_pad - zero pad the tail of an skb * @skb: buffer to pad * @pad: space to pad + * @free_on_error: free buffer on error * * Ensure that a buffer is followed by a padding area that is zero * filled. Used by network drivers which may DMA or transfer data * beyond the buffer end onto the wire. * - * May return error in out of memory cases. The skb is freed on error. + * May return error in out of memory cases. The skb is freed on error + * if @free_on_error is true. */ -int skb_pad(struct sk_buff *skb, int pad) +int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error) { int err; int ntail; @@ -1403,10 +1405,11 @@ int skb_pad(struct sk_buff *skb, int pad) return 0; free_skb: - kfree_skb(skb); + if (free_on_error) + kfree_skb(skb); return err; } -EXPORT_SYMBOL(skb_pad); +EXPORT_SYMBOL(__skb_pad); /** * pskb_put - add data to the tail of a potentially fragmented buffer -- cgit v1.2.3-71-gd317 From 50b4d485528d1dbe0bd249f2073140e3444f4a7b Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Thu, 24 Aug 2017 01:57:56 +0200 Subject: bsg-lib: fix kernel panic resulting from missing allocation of reply-buffer Since we split the scsi_request out of struct request bsg fails to provide a reply-buffer for the drivers. This was done via the pointer for sense-data, that is not preallocated anymore. Failing to allocate/assign it results in illegal dereferences because LLDs use this pointer unquestioned. An example panic on s390x, using the zFCP driver, looks like this (I had debugging on, otherwise NULL-pointer dereferences wouldn't even panic on s390x): Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 6b6b6b6b6b6b6000 TEID: 6b6b6b6b6b6b6403 Fault in home space mode while using kernel ASCE. AS:0000000001590007 R3:0000000000000024 Oops: 0038 ilc:2 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.12.0-bsg-regression+ #3 Hardware name: IBM 2964 N96 702 (z/VM 6.4.0) task: 0000000065cb0100 task.stack: 0000000065cb4000 Krnl PSW : 0704e00180000000 000003ff801e4156 (zfcp_fc_ct_els_job_handler+0x16/0x58 [zfcp]) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 Krnl GPRS: 0000000000000001 000000005fa9d0d0 000000005fa9d078 0000000000e16866 000003ff00000290 6b6b6b6b6b6b6b6b 0000000059f78f00 000000000000000f 00000000593a0958 00000000593a0958 0000000060d88800 000000005ddd4c38 0000000058b50100 07000000659cba08 000003ff801e8556 00000000659cb9a8 Krnl Code: 000003ff801e4146: e31020500004 lg %r1,80(%r2) 000003ff801e414c: 58402040 l %r4,64(%r2) #000003ff801e4150: e35020200004 lg %r5,32(%r2) >000003ff801e4156: 50405004 st %r4,4(%r5) 000003ff801e415a: e54c50080000 mvhi 8(%r5),0 000003ff801e4160: e33010280012 lt %r3,40(%r1) 000003ff801e4166: a718fffb lhi %r1,-5 000003ff801e416a: 1803 lr %r0,%r3 Call Trace: ([<000003ff801e8556>] zfcp_fsf_req_complete+0x726/0x768 [zfcp]) [<000003ff801ea82a>] zfcp_fsf_reqid_check+0x102/0x180 [zfcp] [<000003ff801eb980>] zfcp_qdio_int_resp+0x230/0x278 [zfcp] [<00000000009b91b6>] qdio_kick_handler+0x2ae/0x2c8 [<00000000009b9e3e>] __tiqdio_inbound_processing+0x406/0xc10 [<00000000001684c2>] tasklet_action+0x15a/0x1d8 [<0000000000bd28ec>] __do_softirq+0x3ec/0x848 [<00000000001675a4>] irq_exit+0x74/0xf8 [<000000000010dd6a>] do_IRQ+0xba/0xf0 [<0000000000bd19e8>] io_int_handler+0x104/0x2d4 [<00000000001033b6>] enabled_wait+0xb6/0x188 ([<000000000010339e>] enabled_wait+0x9e/0x188) [<000000000010396a>] arch_cpu_idle+0x32/0x50 [<0000000000bd0112>] default_idle_call+0x52/0x68 [<00000000001cd0fa>] do_idle+0x102/0x188 [<00000000001cd41e>] cpu_startup_entry+0x3e/0x48 [<0000000000118c64>] smp_start_secondary+0x11c/0x130 [<0000000000bd2016>] restart_int_handler+0x62/0x78 [<0000000000000000>] (null) INFO: lockdep is turned off. Last Breaking-Event-Address: [<000003ff801e41d6>] zfcp_fc_ct_job_handler+0x3e/0x48 [zfcp] Kernel panic - not syncing: Fatal exception in interrupt This patch moves bsg-lib to allocate and setup struct bsg_job ahead of time, including the allocation of a buffer for the reply-data. This means, struct bsg_job is not allocated separately anymore, but as part of struct request allocation - similar to struct scsi_cmd. Reflect this in the function names that used to handle creation/destruction of struct bsg_job. Reported-by: Steffen Maier Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Signed-off-by: Benjamin Block Fixes: 82ed4db499b8 ("block: split scsi_request out of struct request") Cc: #4.11+ Signed-off-by: Jens Axboe --- block/bsg-lib.c | 74 +++++++++++++++++++++++++++++-------------------- include/linux/blkdev.h | 1 - include/linux/bsg-lib.h | 2 ++ 3 files changed, 46 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/block/bsg-lib.c b/block/bsg-lib.c index c4513b23f57a..dd56d7460cb9 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -29,26 +29,25 @@ #include /** - * bsg_destroy_job - routine to teardown/delete a bsg job + * bsg_teardown_job - routine to teardown a bsg job * @job: bsg_job that is to be torn down */ -static void bsg_destroy_job(struct kref *kref) +static void bsg_teardown_job(struct kref *kref) { struct bsg_job *job = container_of(kref, struct bsg_job, kref); struct request *rq = job->req; - blk_end_request_all(rq, BLK_STS_OK); - put_device(job->dev); /* release reference for the request */ kfree(job->request_payload.sg_list); kfree(job->reply_payload.sg_list); - kfree(job); + + blk_end_request_all(rq, BLK_STS_OK); } void bsg_job_put(struct bsg_job *job) { - kref_put(&job->kref, bsg_destroy_job); + kref_put(&job->kref, bsg_teardown_job); } EXPORT_SYMBOL_GPL(bsg_job_put); @@ -100,7 +99,7 @@ EXPORT_SYMBOL_GPL(bsg_job_done); */ static void bsg_softirq_done(struct request *rq) { - struct bsg_job *job = rq->special; + struct bsg_job *job = blk_mq_rq_to_pdu(rq); bsg_job_put(job); } @@ -122,33 +121,20 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req) } /** - * bsg_create_job - create the bsg_job structure for the bsg request + * bsg_prepare_job - create the bsg_job structure for the bsg request * @dev: device that is being sent the bsg request * @req: BSG request that needs a job structure */ -static int bsg_create_job(struct device *dev, struct request *req) +static int bsg_prepare_job(struct device *dev, struct request *req) { struct request *rsp = req->next_rq; - struct request_queue *q = req->q; struct scsi_request *rq = scsi_req(req); - struct bsg_job *job; + struct bsg_job *job = blk_mq_rq_to_pdu(req); int ret; - BUG_ON(req->special); - - job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL); - if (!job) - return -ENOMEM; - - req->special = job; - job->req = req; - if (q->bsg_job_size) - job->dd_data = (void *)&job[1]; job->request = rq->cmd; job->request_len = rq->cmd_len; - job->reply = rq->sense; - job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer - * allocated */ + if (req->bio) { ret = bsg_map_buffer(&job->request_payload, req); if (ret) @@ -187,7 +173,6 @@ static void bsg_request_fn(struct request_queue *q) { struct device *dev = q->queuedata; struct request *req; - struct bsg_job *job; int ret; if (!get_device(dev)) @@ -199,7 +184,7 @@ static void bsg_request_fn(struct request_queue *q) break; spin_unlock_irq(q->queue_lock); - ret = bsg_create_job(dev, req); + ret = bsg_prepare_job(dev, req); if (ret) { scsi_req(req)->result = ret; blk_end_request_all(req, BLK_STS_OK); @@ -207,8 +192,7 @@ static void bsg_request_fn(struct request_queue *q) continue; } - job = req->special; - ret = q->bsg_job_fn(job); + ret = q->bsg_job_fn(blk_mq_rq_to_pdu(req)); spin_lock_irq(q->queue_lock); if (ret) break; @@ -219,6 +203,35 @@ static void bsg_request_fn(struct request_queue *q) spin_lock_irq(q->queue_lock); } +static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp) +{ + struct bsg_job *job = blk_mq_rq_to_pdu(req); + struct scsi_request *sreq = &job->sreq; + + memset(job, 0, sizeof(*job)); + + scsi_req_init(sreq); + sreq->sense_len = SCSI_SENSE_BUFFERSIZE; + sreq->sense = kzalloc(sreq->sense_len, gfp); + if (!sreq->sense) + return -ENOMEM; + + job->req = req; + job->reply = sreq->sense; + job->reply_len = sreq->sense_len; + job->dd_data = job + 1; + + return 0; +} + +static void bsg_exit_rq(struct request_queue *q, struct request *req) +{ + struct bsg_job *job = blk_mq_rq_to_pdu(req); + struct scsi_request *sreq = &job->sreq; + + kfree(sreq->sense); +} + /** * bsg_setup_queue - Create and add the bsg hooks so we can receive requests * @dev: device to attach bsg device to @@ -235,7 +248,9 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name, q = blk_alloc_queue(GFP_KERNEL); if (!q) return ERR_PTR(-ENOMEM); - q->cmd_size = sizeof(struct scsi_request); + q->cmd_size = sizeof(struct bsg_job) + dd_job_size; + q->init_rq_fn = bsg_init_rq; + q->exit_rq_fn = bsg_exit_rq; q->request_fn = bsg_request_fn; ret = blk_init_allocated_queue(q); @@ -243,7 +258,6 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name, goto out_cleanup_queue; q->queuedata = dev; - q->bsg_job_size = dd_job_size; q->bsg_job_fn = job_fn; queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 25f6a0cb27d3..2a5d52fa90f5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -568,7 +568,6 @@ struct request_queue { #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; - int bsg_job_size; struct bsg_class_device bsg_dev; #endif diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index e34dde2da0ef..637a20cfb237 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -24,6 +24,7 @@ #define _BLK_BSG_ #include +#include struct request; struct device; @@ -37,6 +38,7 @@ struct bsg_buffer { }; struct bsg_job { + struct scsi_request sreq; struct device *dev; struct request *req; -- cgit v1.2.3-71-gd317 From 8b7ff7f3b301de52924cb2cf3fed47b181893116 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Thu, 17 Aug 2017 15:52:29 +0300 Subject: IB/mlx5: Enable UMR for MRs created with reg_create This patch is the first step in decoupling UMR usage and allocation from the MR cache. The only functional change in this patch is to enables UMR for MRs created with reg_create. This change fixes a bug where ODP memory regions that were not allocated from the MR cache did not have UMR enabled. Signed-off-by: Ilya Lesokhin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- drivers/infiniband/hw/mlx5/mr.c | 33 ++++++++++++++------------------- include/linux/mlx5/driver.h | 2 +- 3 files changed, 16 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 7ac991070020..b3380e8beacf 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -503,7 +503,7 @@ struct mlx5_ib_mr { struct mlx5_shared_mr_info *smr_info; struct list_head list; int order; - int umred; + bool allocated_from_cache; int npages; struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index a0eb2f96179a..bc87016021e3 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -48,8 +48,7 @@ enum { #define MLX5_UMR_ALIGN 2048 static int clean_mr(struct mlx5_ib_mr *mr); -static int max_umr_order(struct mlx5_ib_dev *dev); -static int use_umr(struct mlx5_ib_dev *dev, int order); +static int mr_cache_max_order(struct mlx5_ib_dev *dev); static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) @@ -184,7 +183,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) break; } mr->order = ent->order; - mr->umred = 1; + mr->allocated_from_cache = 1; mr->dev = dev; MLX5_SET(mkc, mkc, free, 1); @@ -497,7 +496,7 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) int i; c = order2idx(dev, order); - last_umr_cache_entry = order2idx(dev, max_umr_order(dev)); + last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev)); if (c < 0 || c > last_umr_cache_entry) { mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); return NULL; @@ -677,12 +676,12 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); queue_work(cache->wq, &ent->work); - if (i > MAX_UMR_CACHE_ENTRY) { + if (i > MR_CACHE_LAST_STD_ENTRY) { mlx5_odp_init_mr_cache_entry(ent); continue; } - if (!use_umr(dev, ent->order)) + if (ent->order > mr_cache_max_order(dev)) continue; ent->page = PAGE_SHIFT; @@ -819,18 +818,13 @@ static int get_octo_len(u64 addr, u64 len, int page_size) return (npages + 1) / 2; } -static int max_umr_order(struct mlx5_ib_dev *dev) +static int mr_cache_max_order(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) - return MAX_UMR_CACHE_ENTRY + 2; + return MR_CACHE_LAST_STD_ENTRY + 2; return MLX5_MAX_UMR_SHIFT; } -static int use_umr(struct mlx5_ib_dev *dev, int order) -{ - return order <= max_umr_order(dev); -} - static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, int access_flags, struct ib_umem **umem, int *npages, int *page_shift, int *ncont, @@ -1149,6 +1143,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET64(mkc, mkc, start_addr, virt_addr); MLX5_SET64(mkc, mkc, len, length); @@ -1231,7 +1226,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (err < 0) return ERR_PTR(err); - if (use_umr(dev, order)) { + if (order <= mr_cache_max_order(dev)) { mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, order, access_flags); if (PTR_ERR(mr) == -EAGAIN) { @@ -1355,7 +1350,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, /* * UMR can't be used - MKey needs to be replaced. */ - if (mr->umred) { + if (mr->allocated_from_cache) { err = unreg_umr(dev, mr); if (err) mlx5_ib_warn(dev, "Failed to unregister MR\n"); @@ -1373,7 +1368,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (IS_ERR(mr)) return PTR_ERR(mr); - mr->umred = 0; + mr->allocated_from_cache = 0; } else { /* * Send a UMR WQE @@ -1461,7 +1456,7 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) static int clean_mr(struct mlx5_ib_mr *mr) { struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - int umred = mr->umred; + int allocated_from_cache = mr->allocated_from_cache; int err; if (mr->sig) { @@ -1479,7 +1474,7 @@ static int clean_mr(struct mlx5_ib_mr *mr) mlx5_free_priv_descs(mr); - if (!umred) { + if (!allocated_from_cache) { err = destroy_mkey(dev, mr); if (err) { mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", @@ -1490,7 +1485,7 @@ static int clean_mr(struct mlx5_ib_mr *mr) mlx5_mr_cache_free(dev, mr); } - if (!umred) + if (!allocated_from_cache) kfree(mr); return 0; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index db40bc4055c7..99d88624ad07 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1093,7 +1093,7 @@ enum { }; enum { - MAX_UMR_CACHE_ENTRY = 20, + MR_CACHE_LAST_STD_ENTRY = 20, MLX5_IMR_MTT_CACHE_ENTRY, MLX5_IMR_KSM_CACHE_ENTRY, MAX_MR_CACHE_ENTRIES -- cgit v1.2.3-71-gd317 From a550ddfc543e250798048cf4eabe721cd85ac724 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 17 Aug 2017 15:52:33 +0300 Subject: IB/mlx5: Add support for multi underlay QP Set underlay QPN as part of flow rule when it's applicable. There is one root flow table in the NIC RX namespace and all the underlay QPs steer the traffic to this flow table. In order to prevent QP to get traffic which is not target to its underlay QP, we need to set the underlay QP number as part of the steering matching. Note: When multicast traffic is sent the QPN filtering is done by the firmware as some early step. Adding the QPN match on the flow table entry is wrong as by that time the target QPN holds the multicast address (e.g. FF(s)) and it won't match. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 49 +++++++++++++++++++++++++++++++++------ include/linux/mlx5/mlx5_ifc.h | 8 +++++-- 2 files changed, 48 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 07789450ec42..8f7e46090f9a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2125,7 +2125,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, * it won't fall into the multicast flow steering table and this rule * could steal other multicast packets. */ -static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr) +static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) { union ib_flow_spec *flow_spec; @@ -2337,10 +2337,31 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, return err ? ERR_PTR(err) : prio; } -static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst) +static void set_underlay_qp(struct mlx5_ib_dev *dev, + struct mlx5_flow_spec *spec, + u32 underlay_qpn) +{ + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, + spec->match_criteria, + misc_parameters); + void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + if (underlay_qpn && + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + ft_field_support.bth_dst_qp)) { + MLX5_SET(fte_match_set_misc, + misc_params_v, bth_dst_qp, underlay_qpn); + MLX5_SET(fte_match_set_misc, + misc_params_c, bth_dst_qp, 0xffffff); + } +} + +static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst, + u32 underlay_qpn) { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; @@ -2376,6 +2397,9 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, ib_flow += ((union ib_flow_spec *)ib_flow)->size; } + if (!flow_is_multicast_only(flow_attr)) + set_underlay_qp(dev, spec, underlay_qpn); + spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); if (is_drop) { flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; @@ -2415,6 +2439,14 @@ free: return err ? ERR_PTR(err) : handler; } +static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0); +} + static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, struct ib_flow_attr *flow_attr, @@ -2551,6 +2583,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct mlx5_ib_flow_prio *ft_prio_tx = NULL; struct mlx5_ib_flow_prio *ft_prio; int err; + int underlay_qpn; if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) return ERR_PTR(-ENOMEM); @@ -2591,8 +2624,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, handler = create_dont_trap_rule(dev, ft_prio, flow_attr, dst); } else { - handler = create_flow_rule(dev, ft_prio, flow_attr, - dst); + underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ? + mqp->underlay_qpn : 0; + handler = _create_flow_rule(dev, ft_prio, flow_attr, + dst, underlay_qpn); } } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5bae70eb25af..6563500c85de 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -295,8 +295,10 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 inner_tcp_dport[0x1]; u8 inner_tcp_flags[0x1]; u8 reserved_at_37[0x9]; + u8 reserved_at_40[0x1a]; + u8 bth_dst_qp[0x1]; - u8 reserved_at_40[0x40]; + u8 reserved_at_5b[0x25]; }; struct mlx5_ifc_flow_table_prop_layout_bits { @@ -432,7 +434,9 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 reserved_at_100[0xc]; u8 inner_ipv6_flow_label[0x14]; - u8 reserved_at_120[0xe0]; + u8 reserved_at_120[0x28]; + u8 bth_dst_qp[0x18]; + u8 reserved_at_160[0xa0]; }; struct mlx5_ifc_cmd_pas_bits { -- cgit v1.2.3-71-gd317 From 795b609c8b59f8f20fa9d72bf8b4ae3b8aa5582c Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 17 Aug 2017 15:52:34 +0300 Subject: IB/mlx5: Allow posting multi packet send WQEs if hardware supports Set the field to allow posting multi packet send WQEs if hardware supports this feature. This doesn't mean the send WQEs will be for multi packet unless the send WQE was prepared according to multi packet send WQE format. User space shall use flag MLX5_IB_ALLOW_MPW to check if hardware supports MPW and allows MPW in SQ context. Signed-off-by: Bodong Wang Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 5 +++-- drivers/infiniband/hw/mlx5/qp.c | 2 ++ include/linux/mlx5/mlx5_ifc.h | 2 +- include/uapi/rdma/mlx5-abi.h | 5 +++++ 4 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8f7e46090f9a..ba0a97d6f677 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -802,8 +802,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes, uhw->outlen)) { - resp.mlx5_ib_support_multi_pkt_send_wqes = - MLX5_CAP_ETH(mdev, multi_pkt_send_wqe); + if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe)) + resp.mlx5_ib_support_multi_pkt_send_wqes = + MLX5_IB_ALLOW_MPW; resp.response_length += sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 656773196f27..d6df88a78d5e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1083,6 +1083,8 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); MLX5_SET(sqc, sqc, flush_in_error_en, 1); + if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe)) + MLX5_SET(sqc, sqc, allow_multi_pkt_send_wqe, 1); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd)); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6563500c85de..6865e60ba473 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2445,7 +2445,7 @@ struct mlx5_ifc_sqc_bits { u8 cd_master[0x1]; u8 fre[0x1]; u8 flush_in_error_en[0x1]; - u8 reserved_at_4[0x1]; + u8 allow_multi_pkt_send_wqe[0x1]; u8 min_wqe_inline_mode[0x3]; u8 state[0x4]; u8 reg_umr[0x1]; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 64d398e662cd..a61a512e5747 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -168,6 +168,11 @@ struct mlx5_packet_pacing_caps { __u32 reserved; }; +enum mlx5_ib_mpw_caps { + MPW_RESERVED = 1 << 0, + MLX5_IB_ALLOW_MPW = 1 << 1, +}; + enum mlx5_ib_sw_parsing_offloads { MLX5_IB_SW_PARSING = 1 << 0, MLX5_IB_SW_PARSING_CSUM = 1 << 1, -- cgit v1.2.3-71-gd317 From 050da902adde8faf6b1bef15ac4876ae145358f4 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 17 Aug 2017 15:52:35 +0300 Subject: IB/mlx5: Report mlx5 enhanced multi packet WQE capability Expose enhanced multi packet WQE capability to user space through query_device by uhw. Signed-off-by: Bodong Wang Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 5 +++++ include/linux/mlx5/mlx5_ifc.h | 2 +- include/uapi/rdma/mlx5-abi.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ba0a97d6f677..62e6298810e7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -805,6 +805,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe)) resp.mlx5_ib_support_multi_pkt_send_wqes = MLX5_IB_ALLOW_MPW; + + if (MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)) + resp.mlx5_ib_support_multi_pkt_send_wqes |= + MLX5_IB_SUPPORT_EMPW; + resp.response_length += sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6865e60ba473..4eff0b8a1482 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -604,7 +604,7 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 rss_ind_tbl_cap[0x4]; u8 reg_umr_sq[0x1]; u8 scatter_fcs[0x1]; - u8 reserved_at_1a[0x1]; + u8 enhanced_multi_pkt_send_wqe[0x1]; u8 tunnel_lso_const_out_ip_id[0x1]; u8 reserved_at_1c[0x2]; u8 tunnel_statless_gre[0x1]; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index a61a512e5747..1791bf123ba9 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -171,6 +171,7 @@ struct mlx5_packet_pacing_caps { enum mlx5_ib_mpw_caps { MPW_RESERVED = 1 << 0, MLX5_IB_ALLOW_MPW = 1 << 1, + MLX5_IB_SUPPORT_EMPW = 1 << 2, }; enum mlx5_ib_sw_parsing_offloads { -- cgit v1.2.3-71-gd317 From 0cc3b0ec23ce4c69e1e890ed2b8d2fa932b14aad Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Aug 2017 12:12:25 -0700 Subject: Clarify (and fix) MAX_LFS_FILESIZE macros We have a MAX_LFS_FILESIZE macro that is meant to be filled in by filesystems (and other IO targets) that know they are 64-bit clean and don't have any 32-bit limits in their IO path. It turns out that our 32-bit value for that limit was bogus. On 32-bit, the VM layer is limited by the page cache to only 32-bit index values, but our logic for that was confusing and actually wrong. We used to define that value to (((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1) which is actually odd in several ways: it limits the index to 31 bits, and then it limits files so that they can't have data in that last byte of a page that has the highest 31-bit index (ie page index 0x7fffffff). Neither of those limitations make sense. The index is actually the full 32 bit unsigned value, and we can use that whole full page. So the maximum size of the file would logically be "PAGE_SIZE << BITS_PER_LONG". However, we do wan tto avoid the maximum index, because we have code that iterates over the page indexes, and we don't want that code to overflow. So the maximum size of a file on a 32-bit host should actually be one page less than the full 32-bit index. So the actual limit is ULONG_MAX << PAGE_SHIFT. That means that we will not actually be using the page of that last index (ULONG_MAX), but we can grow a file up to that limit. The wrong value of MAX_LFS_FILESIZE actually caused problems for Doug Nazar, who was still using a 32-bit host, but with a 9.7TB 2 x RAID5 volume. It turns out that our old MAX_LFS_FILESIZE was 8TiB (well, one byte less), but the actual true VM limit is one page less than 16TiB. This was invisible until commit c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()"), which started applying that MAX_LFS_FILESIZE limit to block devices too. NOTE! On 64-bit, the page index isn't a limiter at all, and the limit is actually just the offset type itself (loff_t), which is signed. But for clarity, on 64-bit, just use the maximum signed value, and don't make people have to count the number of 'f' characters in the hex constant. So just use LLONG_MAX for the 64-bit case. That was what the value had been before too, just written out as a hex constant. Fixes: c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()") Reported-and-tested-by: Doug Nazar Cc: Andreas Dilger Cc: Mark Fasheh Cc: Joel Becker Cc: Dave Kleikamp Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6e1fd5d21248..cbfe127bccf8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -907,9 +907,9 @@ static inline struct file *get_file(struct file *f) /* Page cache limit. The filesystems should put that into their s_maxbytes limits, otherwise bad things can happen in VM. */ #if BITS_PER_LONG==32 -#define MAX_LFS_FILESIZE (((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1) +#define MAX_LFS_FILESIZE ((loff_t)ULONG_MAX << PAGE_SHIFT) #elif BITS_PER_LONG==64 -#define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffffLL) +#define MAX_LFS_FILESIZE ((loff_t)LLONG_MAX) #endif #define FL_POSIX 1 -- cgit v1.2.3-71-gd317 From 604407890ecf624c2fb41013c82b22aade59b455 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 9 Aug 2017 11:32:11 -0700 Subject: dm: fix printk() rate limiting code Using the same rate limiting state for different kinds of messages is wrong because this can cause a high frequency message to suppress a report of a low frequency message. Hence use a unique rate limiting state per message type. Fixes: 71a16736a15e ("dm: use local printk ratelimit") Cc: stable@vger.kernel.org Signed-off-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 10 ---------- include/linux/device-mapper.h | 41 ++++++++++++----------------------------- 2 files changed, 12 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8298670757e9..d669fddd9290 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -27,16 +27,6 @@ #define DM_MSG_PREFIX "core" -#ifdef CONFIG_PRINTK -/* - * ratelimit state to be used in DMXXX_LIMIT(). - */ -DEFINE_RATELIMIT_STATE(dm_ratelimit_state, - DEFAULT_RATELIMIT_INTERVAL, - DEFAULT_RATELIMIT_BURST); -EXPORT_SYMBOL(dm_ratelimit_state); -#endif - /* * Cookies are numeric values sent with CHANGE and REMOVE * uevents while resuming, removing or renaming the device. diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 1473455d0341..4f2b3b2076c4 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -549,46 +549,29 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); *---------------------------------------------------------------*/ #define DM_NAME "device-mapper" -#ifdef CONFIG_PRINTK -extern struct ratelimit_state dm_ratelimit_state; - -#define dm_ratelimit() __ratelimit(&dm_ratelimit_state) -#else -#define dm_ratelimit() 0 -#endif +#define DM_RATELIMIT(pr_func, fmt, ...) \ +do { \ + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + \ + if (__ratelimit(&rs)) \ + pr_func(DM_FMT(fmt), ##__VA_ARGS__); \ +} while (0) #define DM_FMT(fmt) DM_NAME ": " DM_MSG_PREFIX ": " fmt "\n" #define DMCRIT(fmt, ...) pr_crit(DM_FMT(fmt), ##__VA_ARGS__) #define DMERR(fmt, ...) pr_err(DM_FMT(fmt), ##__VA_ARGS__) -#define DMERR_LIMIT(fmt, ...) \ -do { \ - if (dm_ratelimit()) \ - DMERR(fmt, ##__VA_ARGS__); \ -} while (0) - +#define DMERR_LIMIT(fmt, ...) DM_RATELIMIT(pr_err, fmt, ##__VA_ARGS__) #define DMWARN(fmt, ...) pr_warn(DM_FMT(fmt), ##__VA_ARGS__) -#define DMWARN_LIMIT(fmt, ...) \ -do { \ - if (dm_ratelimit()) \ - DMWARN(fmt, ##__VA_ARGS__); \ -} while (0) - +#define DMWARN_LIMIT(fmt, ...) DM_RATELIMIT(pr_warn, fmt, ##__VA_ARGS__) #define DMINFO(fmt, ...) pr_info(DM_FMT(fmt), ##__VA_ARGS__) -#define DMINFO_LIMIT(fmt, ...) \ -do { \ - if (dm_ratelimit()) \ - DMINFO(fmt, ##__VA_ARGS__); \ -} while (0) +#define DMINFO_LIMIT(fmt, ...) DM_RATELIMIT(pr_info, fmt, ##__VA_ARGS__) #ifdef CONFIG_DM_DEBUG #define DMDEBUG(fmt, ...) printk(KERN_DEBUG DM_FMT(fmt), ##__VA_ARGS__) -#define DMDEBUG_LIMIT(fmt, ...) \ -do { \ - if (dm_ratelimit()) \ - DMDEBUG(fmt, ##__VA_ARGS__); \ -} while (0) +#define DMDEBUG_LIMIT(fmt, ...) DM_RATELIMIT(pr_debug, fmt, ##__VA_ARGS__) #else #define DMDEBUG(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #define DMDEBUG_LIMIT(fmt, ...) no_printk(fmt, ##__VA_ARGS__) -- cgit v1.2.3-71-gd317 From 35f0b6a779b8b7a98faefd7c1c660b4dac9a5c26 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Aug 2017 08:28:08 +0200 Subject: libata: quirk read log on no-name M.2 SSD Ido reported that reading the log page on his systems fails, so quirk it as it won't support ZBC or security protocols. Signed-off-by: Christoph Hellwig Reported-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 4 ++++ include/linux/libata.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index fa7dd4394c02..697f5f896b19 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2079,6 +2079,8 @@ unsigned int ata_read_log_page(struct ata_device *dev, u8 log, */ if (ap_flags & ATA_FLAG_NO_LOG_PAGE) return AC_ERR_DEV; + if (dev->horkage & ATA_HORKAGE_NO_LOG_PAGE) + return AC_ERR_DEV; retry: ata_tf_init(dev, &tf); @@ -4578,6 +4580,8 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "WDC WD3000JD-*", NULL, ATA_HORKAGE_WD_BROKEN_LPM }, { "WDC WD3200JD-*", NULL, ATA_HORKAGE_WD_BROKEN_LPM }, + + { "M.2 (S42) 3ME3", NULL, ATA_HORKAGE_NO_LOG_PAGE }, /* End Marker */ { } }; diff --git a/include/linux/libata.h b/include/linux/libata.h index 931c32f1f18d..9e927ae7fced 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -438,6 +438,7 @@ enum { ATA_HORKAGE_NO_DMA_LOG = (1 << 23), /* don't use DMA for log read */ ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ + ATA_HORKAGE_NO_LOG_PAGE = (1 << 26), /* Doesn't like Get Log Page */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3-71-gd317 From 75e8387685f6c65feb195a4556110b58f852b848 Mon Sep 17 00:00:00 2001 From: Zhou Chengming Date: Fri, 25 Aug 2017 21:49:37 +0800 Subject: perf/ftrace: Fix double traces of perf on ftrace:function When running perf on the ftrace:function tracepoint, there is a bug which can be reproduced by: perf record -e ftrace:function -a sleep 20 & perf record -e ftrace:function ls perf script ls 10304 [005] 171.853235: ftrace:function: perf_output_begin ls 10304 [005] 171.853237: ftrace:function: perf_output_begin ls 10304 [005] 171.853239: ftrace:function: task_tgid_nr_ns ls 10304 [005] 171.853240: ftrace:function: task_tgid_nr_ns ls 10304 [005] 171.853242: ftrace:function: __task_pid_nr_ns ls 10304 [005] 171.853244: ftrace:function: __task_pid_nr_ns We can see that all the function traces are doubled. The problem is caused by the inconsistency of the register function perf_ftrace_event_register() with the probe function perf_ftrace_function_call(). The former registers one probe for every perf_event. And the latter handles all perf_events on the current cpu. So when two perf_events on the current cpu, the traces of them will be doubled. So this patch adds an extra parameter "event" for perf_tp_event, only send sample data to this event when it's not NULL. Signed-off-by: Zhou Chengming Reviewed-by: Jiri Olsa Acked-by: Steven Rostedt (VMware) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: alexander.shishkin@linux.intel.com Cc: huawei.libin@huawei.com Link: http://lkml.kernel.org/r/1503668977-12526-1-git-send-email-zhouchengming1@huawei.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- include/linux/trace_events.h | 4 ++-- kernel/events/core.c | 13 +++++++++---- kernel/trace/trace_event_perf.c | 4 +++- kernel/trace/trace_kprobe.c | 4 ++-- kernel/trace/trace_syscalls.c | 4 ++-- kernel/trace/trace_uprobe.c | 2 +- 7 files changed, 20 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b14095bcf4bb..c00cd4b02f32 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1201,7 +1201,7 @@ extern void perf_event_init(void); extern void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, - struct task_struct *task); + struct task_struct *task, struct perf_event *event); extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 536c80ff7ad9..5012b524283d 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -508,9 +508,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, u64 count, struct pt_regs *regs, void *head, - struct task_struct *task) + struct task_struct *task, struct perf_event *event) { - perf_tp_event(type, count, raw_data, size, regs, head, rctx, task); + perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event); } #endif diff --git a/kernel/events/core.c b/kernel/events/core.c index ce131d25622a..03ac9c8b02fb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7906,16 +7906,15 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, } } perf_tp_event(call->event.type, count, raw_data, size, regs, head, - rctx, task); + rctx, task, NULL); } EXPORT_SYMBOL_GPL(perf_trace_run_bpf_submit); void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, - struct task_struct *task) + struct task_struct *task, struct perf_event *event) { struct perf_sample_data data; - struct perf_event *event; struct perf_raw_record raw = { .frag = { @@ -7929,9 +7928,15 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, perf_trace_buf_update(record, event_type); - hlist_for_each_entry_rcu(event, head, hlist_entry) { + /* Use the given event instead of the hlist */ + if (event) { if (perf_tp_event_match(event, &data, regs)) perf_swevent_event(event, count, &data, regs); + } else { + hlist_for_each_entry_rcu(event, head, hlist_entry) { + if (perf_tp_event_match(event, &data, regs)) + perf_swevent_event(event, count, &data, regs); + } } /* diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 562fa69df5d3..13ba2d3f6a91 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -306,6 +306,7 @@ static void perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *pt_regs) { + struct perf_event *event; struct ftrace_entry *entry; struct hlist_head *head; struct pt_regs regs; @@ -329,8 +330,9 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, entry->ip = ip; entry->parent_ip = parent_ip; + event = container_of(ops, struct perf_event, ftrace_ops); perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN, - 1, ®s, head, NULL); + 1, ®s, head, NULL, event); #undef ENTRY_SIZE } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c9b5aa10fbf9..8a907e12b6b9 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1200,7 +1200,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, - head, NULL); + head, NULL, NULL); } NOKPROBE_SYMBOL(kprobe_perf_func); @@ -1236,7 +1236,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, entry->ret_ip = (unsigned long)ri->ret_addr; store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, - head, NULL); + head, NULL, NULL); } NOKPROBE_SYMBOL(kretprobe_perf_func); #endif /* CONFIG_PERF_EVENTS */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 5e10395da88e..74d9a86eccc0 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -596,7 +596,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) (unsigned long *)&rec->args); perf_trace_buf_submit(rec, size, rctx, sys_data->enter_event->event.type, 1, regs, - head, NULL); + head, NULL, NULL); } static int perf_sysenter_enable(struct trace_event_call *call) @@ -667,7 +667,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type, - 1, regs, head, NULL); + 1, regs, head, NULL, NULL); } static int perf_sysexit_enable(struct trace_event_call *call) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index a7581fec9681..4525e0271a53 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1156,7 +1156,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, } perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, - head, NULL); + head, NULL, NULL); out: preempt_enable(); } -- cgit v1.2.3-71-gd317 From 6e44636aeab19259f804c8abca57a95ddc01df66 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Tue, 15 Aug 2017 11:59:02 +0300 Subject: net/mlx5: Update HW layout definitions * add offload_type field to mlx5_ifc_qpc_bits * update mlx5_ifc_xrqc_bits layout Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4eff0b8a1482..e27283ab3667 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2022,6 +2022,10 @@ enum { MLX5_QPC_PM_STATE_MIGRATED = 0x3, }; +enum { + MLX5_QPC_OFFLOAD_TYPE_RNDV = 0x1, +}; + enum { MLX5_QPC_END_PADDING_MODE_SCATTER_AS_IS = 0x0, MLX5_QPC_END_PADDING_MODE_PAD_TO_CACHE_LINE_ALIGNMENT = 0x1, @@ -2065,7 +2069,8 @@ struct mlx5_ifc_qpc_bits { u8 st[0x8]; u8 reserved_at_10[0x3]; u8 pm_state[0x2]; - u8 reserved_at_15[0x7]; + u8 reserved_at_15[0x3]; + u8 offload_type[0x4]; u8 end_padding_mode[0x2]; u8 reserved_at_1e[0x2]; @@ -3010,7 +3015,7 @@ struct mlx5_ifc_xrqc_bits { struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context; - u8 reserved_at_180[0x880]; + u8 reserved_at_180[0x280]; struct mlx5_ifc_wq_bits wq; }; -- cgit v1.2.3-71-gd317 From 5b3ec3fcb6bbe081279c73fb574af8c72f14cea0 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Thu, 17 Aug 2017 15:52:10 +0300 Subject: net/mlx5: Add XRQ support Add support to new XRQ(eXtended shared Receive Queue) hardware object. It supports SRQ semantics with addition of extended receive buffers topologies and offloads. Currently supports tag matching topology and rendezvouz offload. Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx5/core/srq.c | 150 ++++++++++++++++++++++++-- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/srq.h | 5 + 3 files changed, 146 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index f774de6f5fcb..7673da04efa4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -435,16 +435,128 @@ out: return err; } +static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0}; + void *create_in; + void *xrqc; + void *wq; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); + wq = MLX5_ADDR_OF(xrqc, xrqc, wq); + + set_wq(wq, in); + memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); + + if (in->type == IB_SRQT_TM) { + MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); + if (in->flags & MLX5_SRQ_FLAG_RNDV) + MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV); + MLX5_SET(xrqc, xrqc, + tag_matching_topology_context.log_matching_list_sz, + in->tm_log_list_size); + } + MLX5_SET(xrqc, xrqc, user_index, in->user_index); + MLX5_SET(xrqc, xrqc, cqn, in->cqn); + MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ); + err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) + srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn); + + return err; +} + +static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0}; + + MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int arm_xrq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + u16 lwm) +{ + u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; + + MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); + MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); + MLX5_SET(arm_rq_in, in, lwm, lwm); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0}; + u32 *xrq_out; + int outlen = MLX5_ST_SZ_BYTES(query_xrq_out); + void *xrqc; + int err; + + xrq_out = kvzalloc(outlen, GFP_KERNEL); + if (!xrq_out) + return -ENOMEM; + + MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ); + MLX5_SET(query_xrq_in, in, xrqn, srq->srqn); + + err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen); + if (err) + goto out; + + xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context); + get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out); + if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; + out->tm_next_tag = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.append_next_index); + out->tm_hw_phase_cnt = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.hw_phase_cnt); + out->tm_sw_phase_cnt = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.sw_phase_cnt); + +out: + kvfree(xrq_out); + return err; +} + static int create_srq_split(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in) { if (!dev->issi) return create_srq_cmd(dev, srq, in); - else if (srq->common.res == MLX5_RES_XSRQ) + switch (srq->common.res) { + case MLX5_RES_XSRQ: return create_xrc_srq_cmd(dev, srq, in); - else + case MLX5_RES_XRQ: + return create_xrq_cmd(dev, srq, in); + default: return create_rmp_cmd(dev, srq, in); + } } static int destroy_srq_split(struct mlx5_core_dev *dev, @@ -452,10 +564,14 @@ static int destroy_srq_split(struct mlx5_core_dev *dev, { if (!dev->issi) return destroy_srq_cmd(dev, srq); - else if (srq->common.res == MLX5_RES_XSRQ) + switch (srq->common.res) { + case MLX5_RES_XSRQ: return destroy_xrc_srq_cmd(dev, srq); - else + case MLX5_RES_XRQ: + return destroy_xrq_cmd(dev, srq); + default: return destroy_rmp_cmd(dev, srq); + } } int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, @@ -464,10 +580,16 @@ int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, int err; struct mlx5_srq_table *table = &dev->priv.srq_table; - if (in->type == IB_SRQT_XRC) + switch (in->type) { + case IB_SRQT_XRC: srq->common.res = MLX5_RES_XSRQ; - else + break; + case IB_SRQT_TM: + srq->common.res = MLX5_RES_XRQ; + break; + default: srq->common.res = MLX5_RES_SRQ; + } err = create_srq_split(dev, srq, in); if (err) @@ -528,10 +650,14 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, { if (!dev->issi) return query_srq_cmd(dev, srq, out); - else if (srq->common.res == MLX5_RES_XSRQ) + switch (srq->common.res) { + case MLX5_RES_XSRQ: return query_xrc_srq_cmd(dev, srq, out); - else + case MLX5_RES_XRQ: + return query_xrq_cmd(dev, srq, out); + default: return query_rmp_cmd(dev, srq, out); + } } EXPORT_SYMBOL(mlx5_core_query_srq); @@ -540,10 +666,14 @@ int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, { if (!dev->issi) return arm_srq_cmd(dev, srq, lwm, is_srq); - else if (srq->common.res == MLX5_RES_XSRQ) + switch (srq->common.res) { + case MLX5_RES_XSRQ: return arm_xrc_srq_cmd(dev, srq, lwm); - else + case MLX5_RES_XRQ: + return arm_xrq_cmd(dev, srq, lwm); + default: return arm_rmp_cmd(dev, srq, lwm); + } } EXPORT_SYMBOL(mlx5_core_arm_srq); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 99d88624ad07..c33e6f7a1afb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -418,6 +418,7 @@ enum mlx5_res_type { MLX5_RES_SQ = MLX5_EVENT_QUEUE_TYPE_SQ, MLX5_RES_SRQ = 3, MLX5_RES_XSRQ = 4, + MLX5_RES_XRQ = 5, }; struct mlx5_core_rsc_common { diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h index 1cde0fd53f90..24ff23e27c8a 100644 --- a/include/linux/mlx5/srq.h +++ b/include/linux/mlx5/srq.h @@ -38,6 +38,7 @@ enum { MLX5_SRQ_FLAG_ERR = (1 << 0), MLX5_SRQ_FLAG_WQ_SIG = (1 << 1), + MLX5_SRQ_FLAG_RNDV = (1 << 2), }; struct mlx5_srq_attr { @@ -56,6 +57,10 @@ struct mlx5_srq_attr { u32 user_index; u64 db_record; __be64 *pas; + u32 tm_log_list_size; + u32 tm_next_tag; + u32 tm_hw_phase_cnt; + u32 tm_sw_phase_cnt; }; struct mlx5_core_dev; -- cgit v1.2.3-71-gd317 From e8f11db956aa09c1618051a7aaf367d6810d8d8c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Aug 2017 14:42:06 +0200 Subject: libata: check for trusted computing in IDENTIFY DEVICE data ATA-8 and later mirrors the TRUSTED COMPUTING SUPPORTED bit in word 48 of the IDENTIFY DEVICE data. Check this before issuing a READ LOG PAGE command to avoid issues with buggy devices. The only downside is that we can't support Security Send / Receive for a device with an older revision due to the conflicting use of this field in earlier specifications. tj: The reason we need this is because some devices which don't support READ LOG PAGE lock up after getting issued that command. Signed-off-by: Christoph Hellwig Tested-by: David Ahern Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 3 +++ include/linux/ata.h | 10 +++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 697f5f896b19..ca57b03ab950 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2413,6 +2413,9 @@ static void ata_dev_config_trusted(struct ata_device *dev) u64 trusted_cap; unsigned int err; + if (!ata_id_has_trusted(dev->id)) + return; + if (!ata_identify_page_supported(dev, ATA_LOG_SECURITY)) { ata_dev_warn(dev, "Security Log not supported\n"); diff --git a/include/linux/ata.h b/include/linux/ata.h index e65ae4b2ed48..c7a353825450 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -60,7 +60,8 @@ enum { ATA_ID_FW_REV = 23, ATA_ID_PROD = 27, ATA_ID_MAX_MULTSECT = 47, - ATA_ID_DWORD_IO = 48, + ATA_ID_DWORD_IO = 48, /* before ATA-8 */ + ATA_ID_TRUSTED = 48, /* ATA-8 and later */ ATA_ID_CAPABILITY = 49, ATA_ID_OLD_PIO_MODES = 51, ATA_ID_OLD_DMA_MODES = 52, @@ -889,6 +890,13 @@ static inline bool ata_id_has_dword_io(const u16 *id) return id[ATA_ID_DWORD_IO] & (1 << 0); } +static inline bool ata_id_has_trusted(const u16 *id) +{ + if (ata_id_major_version(id) <= 7) + return false; + return id[ATA_ID_TRUSTED] & (1 << 0); +} + static inline bool ata_id_has_unload(const u16 *id) { if (ata_id_major_version(id) >= 7 && -- cgit v1.2.3-71-gd317 From 2aca392398384f8a2f339f4c661a1699f4f2e2eb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 29 Aug 2017 08:36:58 -0700 Subject: Revert "libata: quirk read log on no-name M.2 SSD" This reverts commit 35f0b6a779b8b7a98faefd7c1c660b4dac9a5c26. We now conditionalize issuing of READ LOG PAGE on the TRUSTED COMPUTING SUPPORTED bit in the identity data and this shouldn't be necessary. Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 4 ---- include/linux/libata.h | 1 - 2 files changed, 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index ca57b03ab950..1945a8ea2099 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2079,8 +2079,6 @@ unsigned int ata_read_log_page(struct ata_device *dev, u8 log, */ if (ap_flags & ATA_FLAG_NO_LOG_PAGE) return AC_ERR_DEV; - if (dev->horkage & ATA_HORKAGE_NO_LOG_PAGE) - return AC_ERR_DEV; retry: ata_tf_init(dev, &tf); @@ -4583,8 +4581,6 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "WDC WD3000JD-*", NULL, ATA_HORKAGE_WD_BROKEN_LPM }, { "WDC WD3200JD-*", NULL, ATA_HORKAGE_WD_BROKEN_LPM }, - - { "M.2 (S42) 3ME3", NULL, ATA_HORKAGE_NO_LOG_PAGE }, /* End Marker */ { } }; diff --git a/include/linux/libata.h b/include/linux/libata.h index 9e927ae7fced..931c32f1f18d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -438,7 +438,6 @@ enum { ATA_HORKAGE_NO_DMA_LOG = (1 << 23), /* don't use DMA for log read */ ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ - ATA_HORKAGE_NO_LOG_PAGE = (1 << 26), /* Doesn't like Get Log Page */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3-71-gd317 From 223694b9ae8bfba99f3528d49d07a740af6ff95a Mon Sep 17 00:00:00 2001 From: Changpeng Liu Date: Thu, 31 Aug 2017 11:22:49 +0800 Subject: nvme: fix the definition of the doorbell buffer config support bit NVMe 1.3 specification defines the Optional Admin Command Support feature flags, bit 8 set to '1' then the controller supports the Doorbell Buffer Config command. Bit 7 is used for Virtualization Mangement command. Signed-off-by: Changpeng Liu Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Fixes: f9f38e33 ("nvme: improve performance for virtual NVMe devices") Cc: stable@vger.kernel.org --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 25d8225dbd04..8efff888bd9b 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -254,7 +254,7 @@ enum { NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0, NVME_CTRL_OACS_DIRECTIVES = 1 << 5, - NVME_CTRL_OACS_DBBUF_SUPP = 1 << 7, + NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, }; struct nvme_lbaf { -- cgit v1.2.3-71-gd317 From b3cb5388499c5e219324bfe7da2e46cbad82bfcf Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 8 Aug 2017 13:17:00 -0500 Subject: net/mlx5: Skip mlx5_unload_one if mlx5_load_one fails There is an issue where the firmware fails during mlx5_load_one, the health_care timer detects the issue and schedules a health_care call. Then the mlx5_load_one detects the issue, cleans up and quits. Then the health_care starts and calls mlx5_unload_one to clean up the resources that no longer exist and causes kernel panic. The root cause is that the bit MLX5_INTERFACE_STATE_DOWN is not set after mlx5_load_one fails. The solution is removing the bit MLX5_INTERFACE_STATE_DOWN and quit mlx5_unload_one if the bit MLX5_INTERFACE_STATE_UP is not set. The bit MLX5_INTERFACE_STATE_DOWN is redundant and we can use MLX5_INTERFACE_STATE_UP instead. Fixes: 5fc7197d3a25 ("net/mlx5: Add pci shutdown callback") Signed-off-by: Huy Nguyen Reviewed-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 +--- include/linux/mlx5/driver.h | 5 ++--- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c065132b956d..4cdb414aa2d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1186,7 +1186,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, } } - clear_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state); set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); out: mutex_unlock(&dev->intf_state_mutex); @@ -1261,7 +1260,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_drain_health_recovery(dev); mutex_lock(&dev->intf_state_mutex); - if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { + if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", __func__); if (cleanup) @@ -1270,7 +1269,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, } clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); - set_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state); if (mlx5_device_registered(dev)) mlx5_detach_device(dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index df6ce59a1f95..918f5e644506 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -673,9 +673,8 @@ enum mlx5_device_state { }; enum mlx5_interface_state { - MLX5_INTERFACE_STATE_DOWN = BIT(0), - MLX5_INTERFACE_STATE_UP = BIT(1), - MLX5_INTERFACE_STATE_SHUTDOWN = BIT(2), + MLX5_INTERFACE_STATE_UP = BIT(0), + MLX5_INTERFACE_STATE_SHUTDOWN = BIT(1), }; enum mlx5_pci_status { -- cgit v1.2.3-71-gd317 From 10a8d00707082955b177164d4b4e758ffcbd4017 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Wed, 9 Aug 2017 10:03:40 -0500 Subject: net/mlx5: Remove the flag MLX5_INTERFACE_STATE_SHUTDOWN MLX5_INTERFACE_STATE_SHUTDOWN is not used in the code. Fixes: 5fc7197d3a25 ("net/mlx5: Add pci shutdown callback") Signed-off-by: Huy Nguyen Reviewed-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 -- include/linux/mlx5/driver.h | 1 - 2 files changed, 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 4cdb414aa2d5..16885827367b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1563,8 +1563,6 @@ static void shutdown(struct pci_dev *pdev) int err; dev_info(&pdev->dev, "Shutdown was called\n"); - /* Notify mlx5 clients that the kernel is being shut down */ - set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state); err = mlx5_try_fast_unload(dev); if (err) mlx5_unload_one(dev, priv, false); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 918f5e644506..205d82d4c468 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -674,7 +674,6 @@ enum mlx5_device_state { enum mlx5_interface_state { MLX5_INTERFACE_STATE_UP = BIT(0), - MLX5_INTERFACE_STATE_SHUTDOWN = BIT(1), }; enum mlx5_pci_status { -- cgit v1.2.3-71-gd317 From a27bfcab5c1c3a0df61b68e85fc5e4cade83559f Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 31 Aug 2017 09:47:22 -0600 Subject: genalloc: Fix an incorrect kerneldoc comment The kerneldoc comment for the genpool_algo_t typedef was incomplete and incorrectly formatted, leading to a raft of warnings during the docs build. Fix it appropriately. Signed-off-by: Jonathan Corbet --- include/linux/genalloc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 29d4385903d4..6dfec4d638df 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -38,12 +38,13 @@ struct device_node; struct gen_pool; /** - * Allocation callback function type definition + * typedef genpool_algo_t: Allocation callback function type definition * @map: Pointer to bitmap * @size: The bitmap size in bits * @start: The bitnumber to start searching at * @nr: The number of zeroed bits we're looking for - * @data: optional additional data used by @genpool_algo_t + * @data: optional additional data used by the callback + * @pool: the pool being allocated from */ typedef unsigned long (*genpool_algo_t)(unsigned long *map, unsigned long size, -- cgit v1.2.3-71-gd317 From 4f59c718521a0f00b6589da6b8fcea2dc296026d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 8 Jul 2017 11:40:39 -0400 Subject: teach SYSCALL_DEFINE/COMPAT_SYSCALL_DEFINE to handle __bitwise arguments Signed-off-by: Al Viro --- arch/s390/include/asm/compat.h | 5 +++-- include/linux/compat.h | 2 +- include/linux/syscalls.h | 9 +++++---- 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index b9300f8aee10..07a82bc933a7 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -8,11 +8,12 @@ #include #include -#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p(typeof(0?(t)0:0ULL), u64)) +#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p( \ + typeof(0?(__force t)0:0ULL), u64)) #define __SC_DELOUSE(t,v) ({ \ BUILD_BUG_ON(sizeof(t) > 4 && !__TYPE_IS_PTR(t)); \ - (t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \ + (__force t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \ }) #define PSW32_MASK_PER 0x40000000UL diff --git a/include/linux/compat.h b/include/linux/compat.h index 5a6a109b4a50..e5d3fbe24f7d 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -27,7 +27,7 @@ #endif #ifndef __SC_DELOUSE -#define __SC_DELOUSE(t,v) ((t)(unsigned long)(v)) +#define __SC_DELOUSE(t,v) ((__force t)(unsigned long)(v)) #endif #define COMPAT_SYSCALL_DEFINE0(name) \ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3cb15ea48aee..0bc1d2e8cc17 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -100,11 +100,12 @@ union bpf_attr; #define __MAP(n,...) __MAP##n(__VA_ARGS__) #define __SC_DECL(t, a) t a -#define __TYPE_IS_L(t) (__same_type((t)0, 0L)) -#define __TYPE_IS_UL(t) (__same_type((t)0, 0UL)) -#define __TYPE_IS_LL(t) (__same_type((t)0, 0LL) || __same_type((t)0, 0ULL)) +#define __TYPE_AS(t, v) __same_type((__force t)0, v) +#define __TYPE_IS_L(t) (__TYPE_AS(t, 0L)) +#define __TYPE_IS_UL(t) (__TYPE_AS(t, 0UL)) +#define __TYPE_IS_LL(t) (__TYPE_AS(t, 0LL) || __TYPE_AS(t, 0ULL)) #define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a -#define __SC_CAST(t, a) (t) a +#define __SC_CAST(t, a) (__force t) a #define __SC_ARGS(t, a) a #define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long)) -- cgit v1.2.3-71-gd317 From ddef7ed2b5cbafae692d1d580bb5a07808926a9c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jul 2017 18:58:37 +0200 Subject: annotate RWF_... flags [AV: added missing annotations in syscalls.h/compat.h] Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/nfsd/vfs.c | 2 +- fs/read_write.c | 50 ++++++++++++++++++++++---------------------- include/linux/compat.h | 16 ++++++++++++-- include/linux/fs.h | 12 ++++++----- include/linux/syscalls.h | 4 ++-- include/uapi/linux/aio_abi.h | 21 ++++++++++--------- include/uapi/linux/fs.h | 28 ++++++++++++++++++------- 7 files changed, 80 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 38d0383dc7f9..bc69d40c4e8b 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -969,7 +969,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, int use_wgather; loff_t pos = offset; unsigned int pflags = current->flags; - int flags = 0; + rwf_t flags = 0; if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) /* diff --git a/fs/read_write.c b/fs/read_write.c index 0cc7033aa413..61b58c7b6531 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -33,7 +33,7 @@ const struct file_operations generic_ro_fops = { EXPORT_SYMBOL(generic_ro_fops); -static inline int unsigned_offsets(struct file *file) +static inline bool unsigned_offsets(struct file *file) { return file->f_mode & FMODE_UNSIGNED_OFFSET; } @@ -633,7 +633,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) EXPORT_SYMBOL(iov_shorten); static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, - loff_t *ppos, int type, int flags) + loff_t *ppos, int type, rwf_t flags) { struct kiocb kiocb; ssize_t ret; @@ -655,7 +655,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, /* Do it by hand, with file-ops */ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, - loff_t *ppos, int type, int flags) + loff_t *ppos, int type, rwf_t flags) { ssize_t ret = 0; @@ -871,7 +871,7 @@ out: #endif static ssize_t do_iter_read(struct file *file, struct iov_iter *iter, - loff_t *pos, int flags) + loff_t *pos, rwf_t flags) { size_t tot_len; ssize_t ret = 0; @@ -899,7 +899,7 @@ out: } ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, - int flags) + rwf_t flags) { if (!file->f_op->read_iter) return -EINVAL; @@ -908,7 +908,7 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, EXPORT_SYMBOL(vfs_iter_read); static ssize_t do_iter_write(struct file *file, struct iov_iter *iter, - loff_t *pos, int flags) + loff_t *pos, rwf_t flags) { size_t tot_len; ssize_t ret = 0; @@ -935,7 +935,7 @@ static ssize_t do_iter_write(struct file *file, struct iov_iter *iter, } ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, - int flags) + rwf_t flags) { if (!file->f_op->write_iter) return -EINVAL; @@ -944,7 +944,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, EXPORT_SYMBOL(vfs_iter_write); ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, - unsigned long vlen, loff_t *pos, int flags) + unsigned long vlen, loff_t *pos, rwf_t flags) { struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; @@ -962,7 +962,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, EXPORT_SYMBOL(vfs_readv); ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, - unsigned long vlen, loff_t *pos, int flags) + unsigned long vlen, loff_t *pos, rwf_t flags) { struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; @@ -981,7 +981,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, EXPORT_SYMBOL(vfs_writev); static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec, - unsigned long vlen, int flags) + unsigned long vlen, rwf_t flags) { struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; @@ -1001,7 +1001,7 @@ static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec, } static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec, - unsigned long vlen, int flags) + unsigned long vlen, rwf_t flags) { struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; @@ -1027,7 +1027,7 @@ static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) } static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec, - unsigned long vlen, loff_t pos, int flags) + unsigned long vlen, loff_t pos, rwf_t flags) { struct fd f; ssize_t ret = -EBADF; @@ -1050,7 +1050,7 @@ static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec, } static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec, - unsigned long vlen, loff_t pos, int flags) + unsigned long vlen, loff_t pos, rwf_t flags) { struct fd f; ssize_t ret = -EBADF; @@ -1094,7 +1094,7 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, - int, flags) + rwf_t, flags) { loff_t pos = pos_from_hilo(pos_h, pos_l); @@ -1114,7 +1114,7 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, - int, flags) + rwf_t, flags) { loff_t pos = pos_from_hilo(pos_h, pos_l); @@ -1127,7 +1127,7 @@ SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec, #ifdef CONFIG_COMPAT static size_t compat_readv(struct file *file, const struct compat_iovec __user *vec, - unsigned long vlen, loff_t *pos, int flags) + unsigned long vlen, loff_t *pos, rwf_t flags) { struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; @@ -1147,7 +1147,7 @@ static size_t compat_readv(struct file *file, static size_t do_compat_readv(compat_ulong_t fd, const struct compat_iovec __user *vec, - compat_ulong_t vlen, int flags) + compat_ulong_t vlen, rwf_t flags) { struct fd f = fdget_pos(fd); ssize_t ret; @@ -1173,7 +1173,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, static long do_compat_preadv64(unsigned long fd, const struct compat_iovec __user *vec, - unsigned long vlen, loff_t pos, int flags) + unsigned long vlen, loff_t pos, rwf_t flags) { struct fd f; ssize_t ret; @@ -1211,7 +1211,7 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd, const struct compat_iovec __user *,vec, - unsigned long, vlen, loff_t, pos, int, flags) + unsigned long, vlen, loff_t, pos, rwf_t, flags) { return do_compat_preadv64(fd, vec, vlen, pos, flags); } @@ -1220,7 +1220,7 @@ COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd, COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high, - int, flags) + rwf_t, flags) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; @@ -1232,7 +1232,7 @@ COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, static size_t compat_writev(struct file *file, const struct compat_iovec __user *vec, - unsigned long vlen, loff_t *pos, int flags) + unsigned long vlen, loff_t *pos, rwf_t flags) { struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; @@ -1254,7 +1254,7 @@ static size_t compat_writev(struct file *file, static size_t do_compat_writev(compat_ulong_t fd, const struct compat_iovec __user* vec, - compat_ulong_t vlen, int flags) + compat_ulong_t vlen, rwf_t flags) { struct fd f = fdget_pos(fd); ssize_t ret; @@ -1279,7 +1279,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, static long do_compat_pwritev64(unsigned long fd, const struct compat_iovec __user *vec, - unsigned long vlen, loff_t pos, int flags) + unsigned long vlen, loff_t pos, rwf_t flags) { struct fd f; ssize_t ret; @@ -1317,7 +1317,7 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd, const struct compat_iovec __user *,vec, - unsigned long, vlen, loff_t, pos, int, flags) + unsigned long, vlen, loff_t, pos, rwf_t, flags) { return do_compat_pwritev64(fd, vec, vlen, pos, flags); } @@ -1325,7 +1325,7 @@ COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd, COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, const struct compat_iovec __user *,vec, - compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags) + compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; diff --git a/include/linux/compat.h b/include/linux/compat.h index e5d3fbe24f7d..3fc433303d7a 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -365,10 +365,10 @@ asmlinkage ssize_t compat_sys_pwritev(compat_ulong_t fd, compat_ulong_t vlen, u32 pos_low, u32 pos_high); asmlinkage ssize_t compat_sys_preadv2(compat_ulong_t fd, const struct compat_iovec __user *vec, - compat_ulong_t vlen, u32 pos_low, u32 pos_high, int flags); + compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd, const struct compat_iovec __user *vec, - compat_ulong_t vlen, u32 pos_low, u32 pos_high, int flags); + compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 asmlinkage long compat_sys_preadv64(unsigned long fd, @@ -382,6 +382,18 @@ asmlinkage long compat_sys_pwritev64(unsigned long fd, unsigned long vlen, loff_t pos); #endif +#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 +asmlinkage long compat_sys_readv64v2(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos, rwf_t flags); +#endif + +#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 +asmlinkage long compat_sys_pwritev64v2(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos, rwf_t flags); +#endif + asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int); asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv, diff --git a/include/linux/fs.h b/include/linux/fs.h index cbfe127bccf8..2625fc47c7e5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -72,6 +72,8 @@ extern int leases_enable, lease_break_time; extern int sysctl_protected_symlinks; extern int sysctl_protected_hardlinks; +typedef __kernel_rwf_t rwf_t; + struct buffer_head; typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); @@ -1758,9 +1760,9 @@ extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *) extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, - unsigned long, loff_t *, int); + unsigned long, loff_t *, rwf_t); extern ssize_t vfs_writev(struct file *, const struct iovec __user *, - unsigned long, loff_t *, int); + unsigned long, loff_t *, rwf_t); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, @@ -2874,9 +2876,9 @@ extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *); extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, - int flags); + rwf_t flags); ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, - int flags); + rwf_t flags); /* fs/block_dev.c */ extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to); @@ -3143,7 +3145,7 @@ static inline int iocb_flags(struct file *file) return res; } -static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags) +static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) { if (unlikely(flags & ~RWF_SUPPORTED)) return -EOPNOTSUPP; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0bc1d2e8cc17..138c94535864 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -579,12 +579,12 @@ asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h); asmlinkage long sys_preadv2(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h, - int flags); + rwf_t flags); asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h); asmlinkage long sys_pwritev2(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h, - int flags); + rwf_t flags); asmlinkage long sys_getcwd(char __user *buf, unsigned long size); asmlinkage long sys_mkdir(const char __user *pathname, umode_t mode); asmlinkage long sys_chdir(const char __user *filename); diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index a2d4a8ac94ca..a04adbc70ddf 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -28,6 +28,7 @@ #define __LINUX__AIO_ABI_H #include +#include #include typedef __kernel_ulong_t aio_context_t; @@ -62,14 +63,6 @@ struct io_event { __s64 res2; /* secondary result */ }; -#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) -#define PADDED(x,y) x, y -#elif defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN) -#define PADDED(x,y) y, x -#else -#error edit for your odd byteorder. -#endif - /* * we always use a 64bit off_t when communicating * with userland. its up to libraries to do the @@ -79,8 +72,16 @@ struct io_event { struct iocb { /* these are internal to the kernel/libc. */ __u64 aio_data; /* data to be returned in event's data */ - __u32 PADDED(aio_key, aio_rw_flags); - /* the kernel sets aio_key to the req # */ + +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) + __u32 aio_key; /* the kernel sets aio_key to the req # */ + __kernel_rwf_t aio_rw_flags; /* RWF_* flags */ +#elif defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN) + __kernel_rwf_t aio_rw_flags; /* RWF_* flags */ + __u32 aio_key; /* the kernel sets aio_key to the req # */ +#else +#error edit for your odd byteorder. +#endif /* common fields */ __u16 aio_lio_opcode; /* see IOCB_CMD_ above */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index b7495d05e8de..56235dddea7d 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -358,13 +358,25 @@ struct fscrypt_key { #define SYNC_FILE_RANGE_WRITE 2 #define SYNC_FILE_RANGE_WAIT_AFTER 4 -/* flags for preadv2/pwritev2: */ -#define RWF_HIPRI 0x00000001 /* high priority request, poll if possible */ -#define RWF_DSYNC 0x00000002 /* per-IO O_DSYNC */ -#define RWF_SYNC 0x00000004 /* per-IO O_SYNC */ -#define RWF_NOWAIT 0x00000008 /* per-IO, return -EAGAIN if operation would block */ - -#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC |\ - RWF_NOWAIT) +/* + * Flags for preadv2/pwritev2: + */ + +typedef int __bitwise __kernel_rwf_t; + +/* high priority request, poll if possible */ +#define RWF_HIPRI ((__force __kernel_rwf_t)0x00000001) + +/* per-IO O_DSYNC */ +#define RWF_DSYNC ((__force __kernel_rwf_t)0x00000002) + +/* per-IO O_SYNC */ +#define RWF_SYNC ((__force __kernel_rwf_t)0x00000004) + +/* per-IO, return -EAGAIN if operation would block */ +#define RWF_NOWAIT ((__force __kernel_rwf_t)0x00000008) + +/* mask of flags supported by the kernel */ +#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT) #endif /* _UAPI_LINUX_FS_H */ -- cgit v1.2.3-71-gd317 From f58e76c1c551c7577b25a6fe493d82f5214331b7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 23 Aug 2017 15:29:10 -0700 Subject: : Fix copy_in_user() declaration copy_in_user() copies data from user-space address @from to user- space address @to. Hence declare both @from and @to as user-space pointers. Fixes: commit d597580d3737 ("generic ...copy_..._user primitives") Signed-off-by: Bart Van Assche Cc: Signed-off-by: Al Viro --- include/linux/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index acdd6f915a8d..20ef8e6ec2db 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -156,7 +156,7 @@ copy_to_user(void __user *to, const void *from, unsigned long n) } #ifdef CONFIG_COMPAT static __always_inline unsigned long __must_check -copy_in_user(void __user *to, const void *from, unsigned long n) +copy_in_user(void __user *to, const void __user *from, unsigned long n) { might_fault(); if (access_ok(VERIFY_WRITE, to, n) && access_ok(VERIFY_READ, from, n)) -- cgit v1.2.3-71-gd317 From a4d1a885251382250ec315482bdd8ca52dd61e6a Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Thu, 31 Aug 2017 17:17:26 -0400 Subject: dax: update to new mmu_notifier semantic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace all mmu_notifier_invalidate_page() calls by *_invalidate_range() and make sure it is bracketed by calls to *_invalidate_range_start()/end(). Note that because we can not presume the pmd value or pte value we have to assume the worst and unconditionaly report an invalidation as happening. Signed-off-by: Jérôme Glisse Cc: Dan Williams Cc: Ross Zwisler Cc: Bernhard Held Cc: Adam Borowski Cc: Andrea Arcangeli Cc: Radim Krčmář Cc: Wanpeng Li Cc: Paolo Bonzini Cc: Takashi Iwai Cc: Nadav Amit Cc: Mike Galbraith Cc: Kirill A. Shutemov Cc: axie Cc: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 19 +++++++++++-------- include/linux/mm.h | 1 + mm/memory.c | 26 +++++++++++++++++++++----- 3 files changed, 33 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/dax.c b/fs/dax.c index 865d42c63e23..ab925dc6647a 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -646,11 +646,10 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, pte_t pte, *ptep = NULL; pmd_t *pmdp = NULL; spinlock_t *ptl; - bool changed; i_mmap_lock_read(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) { - unsigned long address; + unsigned long address, start, end; cond_resched(); @@ -658,8 +657,13 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, continue; address = pgoff_address(index, vma); - changed = false; - if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl)) + + /* + * Note because we provide start/end to follow_pte_pmd it will + * call mmu_notifier_invalidate_range_start() on our behalf + * before taking any lock. + */ + if (follow_pte_pmd(vma->vm_mm, address, &start, &end, &ptep, &pmdp, &ptl)) continue; if (pmdp) { @@ -676,7 +680,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, pmd = pmd_wrprotect(pmd); pmd = pmd_mkclean(pmd); set_pmd_at(vma->vm_mm, address, pmdp, pmd); - changed = true; + mmu_notifier_invalidate_range(vma->vm_mm, start, end); unlock_pmd: spin_unlock(ptl); #endif @@ -691,13 +695,12 @@ unlock_pmd: pte = pte_wrprotect(pte); pte = pte_mkclean(pte); set_pte_at(vma->vm_mm, address, ptep, pte); - changed = true; + mmu_notifier_invalidate_range(vma->vm_mm, start, end); unlock_pte: pte_unmap_unlock(ptep, ptl); } - if (changed) - mmu_notifier_invalidate_page(vma->vm_mm, address); + mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); } i_mmap_unlock_read(mapping); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 46b9ac5e8569..c1f6c95f3496 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1260,6 +1260,7 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, void unmap_mapping_range(struct address_space *mapping, loff_t const holebegin, loff_t const holelen, int even_cows); int follow_pte_pmd(struct mm_struct *mm, unsigned long address, + unsigned long *start, unsigned long *end, pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp); int follow_pfn(struct vm_area_struct *vma, unsigned long address, unsigned long *pfn); diff --git a/mm/memory.c b/mm/memory.c index fe2fba27ded2..56e48e4593cb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4008,7 +4008,8 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) #endif /* __PAGETABLE_PMD_FOLDED */ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, - pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) + unsigned long *start, unsigned long *end, + pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) { pgd_t *pgd; p4d_t *p4d; @@ -4035,17 +4036,29 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, if (!pmdpp) goto out; + if (start && end) { + *start = address & PMD_MASK; + *end = *start + PMD_SIZE; + mmu_notifier_invalidate_range_start(mm, *start, *end); + } *ptlp = pmd_lock(mm, pmd); if (pmd_huge(*pmd)) { *pmdpp = pmd; return 0; } spin_unlock(*ptlp); + if (start && end) + mmu_notifier_invalidate_range_end(mm, *start, *end); } if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) goto out; + if (start && end) { + *start = address & PAGE_MASK; + *end = *start + PAGE_SIZE; + mmu_notifier_invalidate_range_start(mm, *start, *end); + } ptep = pte_offset_map_lock(mm, pmd, address, ptlp); if (!pte_present(*ptep)) goto unlock; @@ -4053,6 +4066,8 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, return 0; unlock: pte_unmap_unlock(ptep, *ptlp); + if (start && end) + mmu_notifier_invalidate_range_end(mm, *start, *end); out: return -EINVAL; } @@ -4064,20 +4079,21 @@ static inline int follow_pte(struct mm_struct *mm, unsigned long address, /* (void) is needed to make gcc happy */ (void) __cond_lock(*ptlp, - !(res = __follow_pte_pmd(mm, address, ptepp, NULL, - ptlp))); + !(res = __follow_pte_pmd(mm, address, NULL, NULL, + ptepp, NULL, ptlp))); return res; } int follow_pte_pmd(struct mm_struct *mm, unsigned long address, + unsigned long *start, unsigned long *end, pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) { int res; /* (void) is needed to make gcc happy */ (void) __cond_lock(*ptlp, - !(res = __follow_pte_pmd(mm, address, ptepp, pmdpp, - ptlp))); + !(res = __follow_pte_pmd(mm, address, start, end, + ptepp, pmdpp, ptlp))); return res; } EXPORT_SYMBOL(follow_pte_pmd); -- cgit v1.2.3-71-gd317 From 5f32b265400de723ab0db23101a75ac073bdd980 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Thu, 31 Aug 2017 17:17:38 -0400 Subject: mm/mmu_notifier: kill invalidate_page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The invalidate_page callback suffered from two pitfalls. First it used to happen after the page table lock was release and thus a new page might have setup before the call to invalidate_page() happened. This is in a weird way fixed by commit c7ab0d2fdc84 ("mm: convert try_to_unmap_one() to use page_vma_mapped_walk()") that moved the callback under the page table lock but this also broke several existing users of the mmu_notifier API that assumed they could sleep inside this callback. The second pitfall was invalidate_page() being the only callback not taking a range of address in respect to invalidation but was giving an address and a page. Lots of the callback implementers assumed this could never be THP and thus failed to invalidate the appropriate range for THP. By killing this callback we unify the mmu_notifier callback API to always take a virtual address range as input. Finally this also simplifies the end user life as there is now two clear choices: - invalidate_range_start()/end() callback (which allow you to sleep) - invalidate_range() where you can not sleep but happen right after page table update under page table lock Signed-off-by: Jérôme Glisse Cc: Bernhard Held Cc: Adam Borowski Cc: Andrea Arcangeli Cc: Radim Krčmář Cc: Wanpeng Li Cc: Paolo Bonzini Cc: Takashi Iwai Cc: Nadav Amit Cc: Mike Galbraith Cc: Kirill A. Shutemov Cc: axie Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmu_notifier.h | 25 ------------------------- mm/mmu_notifier.c | 14 -------------- 2 files changed, 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index c91b3bcd158f..7b2e31b1745a 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -94,17 +94,6 @@ struct mmu_notifier_ops { unsigned long address, pte_t pte); - /* - * Before this is invoked any secondary MMU is still ok to - * read/write to the page previously pointed to by the Linux - * pte because the page hasn't been freed yet and it won't be - * freed until this returns. If required set_page_dirty has to - * be called internally to this method. - */ - void (*invalidate_page)(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long address); - /* * invalidate_range_start() and invalidate_range_end() must be * paired and are called only when the mmap_sem and/or the @@ -220,8 +209,6 @@ extern int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address); extern void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, pte_t pte); -extern void __mmu_notifier_invalidate_page(struct mm_struct *mm, - unsigned long address); extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, unsigned long start, unsigned long end); extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, @@ -268,13 +255,6 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm, __mmu_notifier_change_pte(mm, address, pte); } -static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, - unsigned long address) -{ - if (mm_has_notifiers(mm)) - __mmu_notifier_invalidate_page(mm, address); -} - static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, unsigned long start, unsigned long end) { @@ -442,11 +422,6 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm, { } -static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, - unsigned long address) -{ -} - static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, unsigned long start, unsigned long end) { diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 54ca54562928..314285284e6e 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -174,20 +174,6 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, srcu_read_unlock(&srcu, id); } -void __mmu_notifier_invalidate_page(struct mm_struct *mm, - unsigned long address) -{ - struct mmu_notifier *mn; - int id; - - id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { - if (mn->ops->invalidate_page) - mn->ops->invalidate_page(mn, mm, address); - } - srcu_read_unlock(&srcu, id); -} - void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, unsigned long start, unsigned long end) { -- cgit v1.2.3-71-gd317 From c03567a8e8d5cf2aaca40e605c48f319dc2ead57 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 31 Aug 2017 16:15:33 -0700 Subject: include/linux/compiler.h: don't perform compiletime_assert with -O0 Commit c7acec713d14 ("kernel.h: handle pointers to arrays better in container_of()") made use of __compiletime_assert() from container_of() thus increasing the usage of this macro, allowing developers to notice type conflicts in usage of container_of() at compile time. However, the implementation of __compiletime_assert relies on compiler optimizations to report an error. This means that if a developer uses "-O0" with any code that performs container_of(), the compiler will always report an error regardless of whether there is an actual problem in the code. This patch disables compile_time_assert when optimizations are disabled to allow such code to compile with CFLAGS="-O0". Example compilation failure: ./include/linux/compiler.h:547:38: error: call to `__compiletime_assert_94' declared with attribute error: pointer type mismatch in container_of() _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) ^ ./include/linux/compiler.h:530:4: note: in definition of macro `__compiletime_assert' prefix ## suffix(); \ ^~~~~~ ./include/linux/compiler.h:547:2: note: in expansion of macro `_compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) ^~~~~~~~~~~~~~~~~~~ ./include/linux/build_bug.h:46:37: note: in expansion of macro `compiletime_assert' #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) ^~~~~~~~~~~~~~~~~~ ./include/linux/kernel.h:860:2: note: in expansion of macro `BUILD_BUG_ON_MSG' BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ ^~~~~~~~~~~~~~~~ [akpm@linux-foundation.org: use do{}while(0), per Michal] Link: http://lkml.kernel.org/r/20170829230114.11662-1-joe@ovn.org Fixes: c7acec713d14c6c ("kernel.h: handle pointers to arrays better in container_of()") Signed-off-by: Joe Stringer Cc: Ian Abbott Cc: Arnd Bergmann Cc: Michal Nazarewicz Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index eca8ad75e28b..043b60de041e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -517,7 +517,8 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s # define __compiletime_error_fallback(condition) do { } while (0) #endif -#define __compiletime_assert(condition, msg, prefix, suffix) \ +#ifdef __OPTIMIZE__ +# define __compiletime_assert(condition, msg, prefix, suffix) \ do { \ bool __cond = !(condition); \ extern void prefix ## suffix(void) __compiletime_error(msg); \ @@ -525,6 +526,9 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s prefix ## suffix(); \ __compiletime_error_fallback(__cond); \ } while (0) +#else +# define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0) +#endif #define _compiletime_assert(condition, msg, prefix, suffix) \ __compiletime_assert(condition, msg, prefix, suffix) -- cgit v1.2.3-71-gd317 From 25cc72a33835ed8a6f53180a822cadab855852ac Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 1 Sep 2017 10:52:31 +0200 Subject: mlxsw: spectrum: Forbid linking to devices that have uppers The mlxsw driver relies on NETDEV_CHANGEUPPER events to configure the device in case a port is enslaved to a master netdev such as bridge or bond. Since the driver ignores events unrelated to its ports and their uppers, it's possible to engineer situations in which the device's data path differs from the kernel's. One example to such a situation is when a port is enslaved to a bond that is already enslaved to a bridge. When the bond was enslaved the driver ignored the event - as the bond wasn't one of its uppers - and therefore a bridge port instance isn't created in the device. Until such configurations are supported forbid them by checking that the upper device doesn't have uppers of its own. Fixes: 0d65fc13042f ("mlxsw: spectrum: Implement LAG port join/leave") Signed-off-by: Ido Schimmel Reported-by: Nogah Frankel Tested-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 6 ++++++ include/linux/netdevice.h | 2 ++ net/core/dev.c | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 60bf8f27cc00..c6a3e61b53bd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4139,6 +4139,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, return -EINVAL; if (!info->linking) break; + if (netdev_has_any_upper_dev(upper_dev)) + return -EINVAL; if (netif_is_lag_master(upper_dev) && !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev, info->upper_info)) @@ -4258,6 +4260,10 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, upper_dev = info->upper_dev; if (!netif_is_bridge_master(upper_dev)) return -EINVAL; + if (!info->linking) + break; + if (netdev_has_any_upper_dev(upper_dev)) + return -EINVAL; break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 779b23595596..c99ba7914c0a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3866,6 +3866,8 @@ int netdev_walk_all_upper_dev_rcu(struct net_device *dev, bool netdev_has_upper_dev_all_rcu(struct net_device *dev, struct net_device *upper_dev); +bool netdev_has_any_upper_dev(struct net_device *dev); + void *netdev_lower_get_next_private(struct net_device *dev, struct list_head **iter); void *netdev_lower_get_next_private_rcu(struct net_device *dev, diff --git a/net/core/dev.c b/net/core/dev.c index 818dfa6e7ab5..86b4b0a79e7a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5668,12 +5668,13 @@ EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); * Find out if a device is linked to an upper device and return true in case * it is. The caller must hold the RTNL lock. */ -static bool netdev_has_any_upper_dev(struct net_device *dev) +bool netdev_has_any_upper_dev(struct net_device *dev) { ASSERT_RTNL(); return !list_empty(&dev->adj_list.upper); } +EXPORT_SYMBOL(netdev_has_any_upper_dev); /** * netdev_master_upper_dev_get - Get master upper device -- cgit v1.2.3-71-gd317