From 5930e8d0ab3689f1e239566443ca8f53e45e01cc Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 15 Oct 2013 16:55:22 +0200 Subject: net/mlx4: Fix typo, move similar defs to same location Small code cleanup: 1. change MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN to MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN 2. put MLX4_SET_PORT_PRIO2TC and MLX4_SET_PORT_SCHEDULER in the same union with the other MLX4_SET_PORT_yyy Signed-off-by: Or Gerlitz Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 6 ++---- include/linux/mlx4/device.h | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux/mlx4') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index cd1fdf75103b..8df61bc5da00 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -154,10 +154,6 @@ enum { MLX4_CMD_QUERY_IF_STAT = 0X54, MLX4_CMD_SET_IF_STAT = 0X55, - /* set port opcode modifiers */ - MLX4_SET_PORT_PRIO2TC = 0x8, - MLX4_SET_PORT_SCHEDULER = 0x9, - /* register/delete flow steering network rules */ MLX4_QP_FLOW_STEERING_ATTACH = 0x65, MLX4_QP_FLOW_STEERING_DETACH = 0x66, @@ -182,6 +178,8 @@ enum { MLX4_SET_PORT_VLAN_TABLE = 0x3, MLX4_SET_PORT_PRIO_MAP = 0x4, MLX4_SET_PORT_GID_TABLE = 0x5, + MLX4_SET_PORT_PRIO2TC = 0x8, + MLX4_SET_PORT_SCHEDULER = 0x9, }; enum { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 24ce6bdd540e..9ad0c18495ad 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -155,7 +155,7 @@ enum { MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1, MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2, MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3, - MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN = 1LL << 4, + MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN = 1LL << 4, MLX4_DEV_CAP_FLAG2_TS = 1LL << 5, MLX4_DEV_CAP_FLAG2_VLAN_CONTROL = 1LL << 6, MLX4_DEV_CAP_FLAG2_FSM = 1LL << 7, -- cgit v1.2.3-71-gd317 From acddd5dd44d4fd9b45dd5ee69cd8b183052b1cdc Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 3 Nov 2013 10:03:18 +0200 Subject: net/mlx4_core: Fix reg/unreg vlan/mac to conform to the firmware spec The functions mlx4_register_vlan, mlx4_unregister_vlan, mlx4_register_mac, mlx4_unregister_mac all made illegal use of the out_param in multifunc mode to pass the port number. The firmware spec specifies that the port number should be passed in bits 8..15 of the input-modifier field for ALLOC_RES and FREE_RES (sections 20.15.1 and 20.15.2). For MAC register/unregister, this patch contains workarounds so that guests running previous kernels continue to work on a new Hypervisor, and guests running the new kernel will continue to work on old hypervisors. Vlan registeration capability is still not operational in multifunction mode, since the vlan wrapper functions are not implemented in this patch. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/port.c | 45 +++++++++++++++------- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 28 ++++++++------ include/linux/mlx4/device.h | 1 + 3 files changed, 49 insertions(+), 25 deletions(-) (limited to 'include/linux/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index d3d3106f588f..9433c1f6b0d4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -178,13 +178,24 @@ EXPORT_SYMBOL_GPL(__mlx4_register_mac); int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) { u64 out_param = 0; - int err; + int err = -EINVAL; if (mlx4_is_mfunc(dev)) { - set_param_l(&out_param, port); - err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC, - RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, - MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) { + err = mlx4_cmd_imm(dev, mac, &out_param, + ((u32) port) << 8 | (u32) RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + } + if (err && err == -EINVAL && mlx4_is_slave(dev)) { + /* retry using old REG_MAC format */ + set_param_l(&out_param, port); + err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (!err) + dev->flags |= MLX4_FLAG_OLD_REG_MAC; + } if (err) return err; @@ -231,10 +242,18 @@ void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) u64 out_param = 0; if (mlx4_is_mfunc(dev)) { - set_param_l(&out_param, port); - (void) mlx4_cmd_imm(dev, mac, &out_param, RES_MAC, - RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, - MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) { + (void) mlx4_cmd_imm(dev, mac, &out_param, + ((u32) port) << 8 | (u32) RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + } else { + /* use old unregister mac format */ + set_param_l(&out_param, port); + (void) mlx4_cmd_imm(dev, mac, &out_param, RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + } return; } __mlx4_unregister_mac(dev, port, mac); @@ -374,8 +393,8 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index) return -EINVAL; if (mlx4_is_mfunc(dev)) { - set_param_l(&out_param, port); - err = mlx4_cmd_imm(dev, vlan, &out_param, RES_VLAN, + err = mlx4_cmd_imm(dev, vlan, &out_param, + ((u32) port) << 8 | (u32) RES_VLAN, RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (!err) @@ -418,8 +437,8 @@ void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index) u64 out_param = 0; if (mlx4_is_mfunc(dev)) { - set_param_l(&out_param, port); - (void) mlx4_cmd_imm(dev, index, &out_param, RES_VLAN, + (void) mlx4_cmd_imm(dev, index, &out_param, + ((u32) port) << 8 | (u32) RES_VLAN, RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index dd6876321116..a5aa3be554fe 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1443,7 +1443,7 @@ static void rem_slave_macs(struct mlx4_dev *dev, int slave) } static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, - u64 in_param, u64 *out_param) + u64 in_param, u64 *out_param, int in_port) { int err = -EINVAL; int port; @@ -1452,7 +1452,7 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (op != RES_OP_RESERVE_AND_MAP) return err; - port = get_param_l(out_param); + port = !in_port ? get_param_l(out_param) : in_port; mac = in_param; err = __mlx4_register_mac(dev, port, mac); @@ -1470,7 +1470,7 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, } static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, - u64 in_param, u64 *out_param) + u64 in_param, u64 *out_param, int port) { return 0; } @@ -1528,7 +1528,7 @@ int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave, int err; int alop = vhcr->op_modifier; - switch (vhcr->in_modifier) { + switch (vhcr->in_modifier & 0xFF) { case RES_QP: err = qp_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); @@ -1556,12 +1556,14 @@ int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave, case RES_MAC: err = mac_alloc_res(dev, slave, vhcr->op_modifier, alop, - vhcr->in_param, &vhcr->out_param); + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); break; case RES_VLAN: err = vlan_alloc_res(dev, slave, vhcr->op_modifier, alop, - vhcr->in_param, &vhcr->out_param); + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); break; case RES_COUNTER: @@ -1730,14 +1732,14 @@ static int srq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, } static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, - u64 in_param, u64 *out_param) + u64 in_param, u64 *out_param, int in_port) { int port; int err = 0; switch (op) { case RES_OP_RESERVE_AND_MAP: - port = get_param_l(out_param); + port = !in_port ? get_param_l(out_param) : in_port; mac_del_from_slave(dev, slave, in_param, port); __mlx4_unregister_mac(dev, port, in_param); break; @@ -1751,7 +1753,7 @@ static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, } static int vlan_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, - u64 in_param, u64 *out_param) + u64 in_param, u64 *out_param, int port) { return 0; } @@ -1803,7 +1805,7 @@ int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave, int err = -EINVAL; int alop = vhcr->op_modifier; - switch (vhcr->in_modifier) { + switch (vhcr->in_modifier & 0xFF) { case RES_QP: err = qp_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param); @@ -1831,12 +1833,14 @@ int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave, case RES_MAC: err = mac_free_res(dev, slave, vhcr->op_modifier, alop, - vhcr->in_param, &vhcr->out_param); + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); break; case RES_VLAN: err = vlan_free_res(dev, slave, vhcr->op_modifier, alop, - vhcr->in_param, &vhcr->out_param); + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); break; case RES_COUNTER: diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 9ad0c18495ad..297a16309f00 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -54,6 +54,7 @@ enum { MLX4_FLAG_MASTER = 1 << 2, MLX4_FLAG_SLAVE = 1 << 3, MLX4_FLAG_SRIOV = 1 << 4, + MLX4_FLAG_OLD_REG_MAC = 1 << 6, }; enum { -- cgit v1.2.3-71-gd317 From 2009d0059c084288f060b1ffe3d14229588acb67 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 3 Nov 2013 10:03:19 +0200 Subject: net/mlx4_en: Use vlan id instead of vlan index for unregistration Use of vlan_index created problems unregistering vlans on guests. In addition, tools delete vlan by tag, not by index, lets follow that. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 6 +---- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 +- drivers/net/ethernet/mellanox/mlx4/port.c | 27 ++++++++++++---------- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 2 +- include/linux/mlx4/device.h | 2 +- 6 files changed, 20 insertions(+), 21 deletions(-) (limited to 'include/linux/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index ae8eb4c4fb6c..887d62576f54 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1687,7 +1687,7 @@ static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if (NO_INDX != vp_oper->vlan_idx) { __mlx4_unregister_vlan(&priv->dev, - port, vp_oper->vlan_idx); + port, vp_oper->state.default_vlan); vp_oper->vlan_idx = NO_INDX; } if (NO_INDX != vp_oper->mac_idx) { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 85d91665d400..b5554121aca4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -417,7 +417,6 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; - int idx; en_dbg(HW, priv, "Killing VID:%d\n", vid); @@ -425,10 +424,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, /* Remove VID from port VLAN filter */ mutex_lock(&mdev->state_lock); - if (!mlx4_find_cached_vlan(mdev->dev, priv->port, vid, &idx)) - mlx4_unregister_vlan(mdev->dev, priv->port, idx); - else - en_dbg(HW, priv, "could not find vid %d in cache\n", vid); + mlx4_unregister_vlan(mdev->dev, priv->port, vid); if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 348bb8c7d9a7..f2ad4f61f58c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1111,7 +1111,7 @@ int mlx4_change_port_types(struct mlx4_dev *dev, void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table); void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table); -void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index); +void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz); diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 9433c1f6b0d4..caaa15470395 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -406,23 +406,26 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index) } EXPORT_SYMBOL_GPL(mlx4_register_vlan); -void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index) +void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) { struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; + int index; - if (index < MLX4_VLAN_REGULAR) { - mlx4_warn(dev, "Trying to free special vlan index %d\n", index); - return; + mutex_lock(&table->mutex); + if (mlx4_find_cached_vlan(dev, port, vlan, &index)) { + mlx4_warn(dev, "vlan 0x%x is not in the vlan table\n", vlan); + goto out; } - mutex_lock(&table->mutex); - if (!table->refs[index]) { - mlx4_warn(dev, "No vlan entry for index %d\n", index); + if (index < MLX4_VLAN_REGULAR) { + mlx4_warn(dev, "Trying to free special vlan index %d\n", index); goto out; } + if (--table->refs[index]) { - mlx4_dbg(dev, "Have more references for index %d," - "no need to modify vlan table\n", index); + mlx4_dbg(dev, "Have %d more references for index %d," + "no need to modify vlan table\n", table->refs[index], + index); goto out; } table->entries[index] = 0; @@ -432,19 +435,19 @@ out: mutex_unlock(&table->mutex); } -void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index) +void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) { u64 out_param = 0; if (mlx4_is_mfunc(dev)) { - (void) mlx4_cmd_imm(dev, index, &out_param, + (void) mlx4_cmd_imm(dev, vlan, &out_param, ((u32) port) << 8 | (u32) RES_VLAN, RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); return; } - __mlx4_unregister_vlan(dev, port, index); + __mlx4_unregister_vlan(dev, port, vlan); } EXPORT_SYMBOL_GPL(mlx4_unregister_vlan); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index a5aa3be554fe..993a2ef13866 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4085,7 +4085,7 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_VLAN && !errors && NO_INDX != work->orig_vlan_ix) __mlx4_unregister_vlan(&work->priv->dev, work->port, - work->orig_vlan_ix); + work->orig_vlan_id); out: kfree(work); return; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 297a16309f00..e2e92885bdc1 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1079,7 +1079,7 @@ int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, u8 *pg, u16 *ratelimit); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); -void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index); +void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, int npages, u64 iova, u32 *lkey, u32 *rkey); -- cgit v1.2.3-71-gd317 From 5a0d0a6161aecbbc76e4c1d2b82e4c7cef88bb29 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 3 Nov 2013 10:03:23 +0200 Subject: mlx4: Structures and init/teardown for VF resource quotas This is step #1 for implementing SRIOV resource quotas for VFs. Quotas are implemented per resource type for VFs and the PF, to prevent any entity from simply grabbing all the resources for itself and leaving the other entities unable to obtain such resources. Resources which are allocated using quotas: QPs, CQs, SRQs, MPTs, MTTs, MAC, VLAN, and Counters. The quota system works as follows: Each entity (VF or PF) is given a max number of a given resource (its quota), and a guaranteed minimum number for each resource (starvation prevention). For QPs, CQs, SRQs, MPTs and MTTs: 50% of the available quantity for the resource is divided equally among the PF and all the active VFs (i.e., the number of VFs in the mlx4_core module parameter "num_vfs"). This 50% represents the "guaranteed minimum" pool. The other 50% is the "free pool", allocated on a first-come-first-serve basis. For each VF/PF, resources are first allocated from its "guaranteed-minimum" pool. When that pool is exhausted, the driver attempts to allocate from the resource "free-pool". The quota (i.e., max) for the VFs and the PF is: The free-pool amount (50% of the real max) + the guaranteed minimum For MACs: Guarantee 2 MACs per VF/PF per port. As a result, since we have only 128 MACs per port, reduce the allowable number of VFs from 64 to 63. Any remaining MACs are put into a free pool. For VLANs: For the PF, the per-port quota is 128 and guarantee is 64 (to allow the PF to register at least a VLAN per VF in VST mode). For the VFs, the per-port quota is 64 and the guarantee is 0. We assume that VGT VFs are trusted not to abuse the VLAN resource. For Counters: For all functions (PF and VFs), the quota is 128 and the guarantee is 0. In this patch, we define the needed structures, which are added to the resource-tracker struct. In addition, we do initialization for the resource quota, and adjust the query_device response to use quotas rather than resource maxima. As part of the implementation, we introduce a new field in mlx4_dev: quotas. This field holds the resource quotas used to report maxima to the upper layers (ib_core, via query_device). The HCA maxima of these values are passed to the VFs (via QUERY_HCA) so that they may continue to use these in handling QPs, CQs, SRQs and MPTs. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 8 +- drivers/net/ethernet/mellanox/mlx4/fw.c | 11 +- drivers/net/ethernet/mellanox/mlx4/main.c | 32 +++-- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 17 +++ drivers/net/ethernet/mellanox/mlx4/qp.c | 3 +- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 157 ++++++++++++++++++++- include/linux/mlx4/device.h | 17 +++ 7 files changed, 222 insertions(+), 23 deletions(-) (limited to 'include/linux/mlx4') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f0612645de99..7567437dbd34 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -177,18 +177,18 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_mr_size = ~0ull; props->page_size_cap = dev->dev->caps.page_size_cap; - props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps; + props->max_qp = dev->dev->quotas.qp; props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; props->max_sge = min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg); - props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs; + props->max_cq = dev->dev->quotas.cq; props->max_cqe = dev->dev->caps.max_cqes; - props->max_mr = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws; + props->max_mr = dev->dev->quotas.mpt; props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds; props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma; props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma; props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; - props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; + props->max_srq = dev->dev->quotas.srq; props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; props->max_srq_sge = dev->dev->caps.max_srq_sge; props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index c151e7a6710a..f8c88c3ad9fc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -177,6 +177,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { + struct mlx4_priv *priv = mlx4_priv(dev); u8 field; u32 size; int err = 0; @@ -250,13 +251,13 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, field = 0; /* protected FMR support not available as yet */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FMR_OFFSET); - size = dev->caps.num_qps; + size = priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); - size = dev->caps.num_srqs; + size = priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); - size = dev->caps.num_cqs; + size = priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); size = dev->caps.num_eqs; @@ -265,10 +266,10 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, size = dev->caps.reserved_eqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); - size = dev->caps.num_mpts; + size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); - size = dev->caps.num_mtts; + size = priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); size = dev->caps.num_mgms + dev->caps.num_amgms; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 179d26709c94..7d2628dfdc29 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -562,13 +562,17 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) } dev->caps.num_ports = func_cap.num_ports; - dev->caps.num_qps = func_cap.qp_quota; - dev->caps.num_srqs = func_cap.srq_quota; - dev->caps.num_cqs = func_cap.cq_quota; - dev->caps.num_eqs = func_cap.max_eq; - dev->caps.reserved_eqs = func_cap.reserved_eq; - dev->caps.num_mpts = func_cap.mpt_quota; - dev->caps.num_mtts = func_cap.mtt_quota; + dev->quotas.qp = func_cap.qp_quota; + dev->quotas.srq = func_cap.srq_quota; + dev->quotas.cq = func_cap.cq_quota; + dev->quotas.mpt = func_cap.mpt_quota; + dev->quotas.mtt = func_cap.mtt_quota; + dev->caps.num_qps = 1 << hca_param.log_num_qps; + dev->caps.num_srqs = 1 << hca_param.log_num_srqs; + dev->caps.num_cqs = 1 << hca_param.log_num_cqs; + dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; + dev->caps.num_eqs = func_cap.max_eq; + dev->caps.reserved_eqs = func_cap.reserved_eq; dev->caps.num_pds = MLX4_NUM_PDS; dev->caps.num_mgms = 0; dev->caps.num_amgms = 0; @@ -2102,9 +2106,15 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) "aborting.\n"); return err; } - if (num_vfs > MLX4_MAX_NUM_VF) { - printk(KERN_ERR "There are more VF's (%d) than allowed(%d)\n", - num_vfs, MLX4_MAX_NUM_VF); + + /* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS + * per port, we must limit the number of VFs to 63 (since their are + * 128 MACs) + */ + if (num_vfs >= MLX4_MAX_NUM_VF) { + dev_err(&pdev->dev, + "Requested more VF's (%d) than allowed (%d)\n", + num_vfs, MLX4_MAX_NUM_VF - 1); return -EINVAL; } @@ -2322,6 +2332,8 @@ slave_start: if (err) goto err_steer; + mlx4_init_quotas(dev); + for (port = 1; port <= dev->caps.num_ports; port++) { err = mlx4_init_port_info(dev, port); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 97941269bc14..e7eb86ecc6ea 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -504,12 +504,27 @@ struct slave_list { struct list_head res_list[MLX4_NUM_OF_RESOURCE_TYPE]; }; +struct resource_allocator { + union { + int res_reserved; + int res_port_rsvd[MLX4_MAX_PORTS]; + }; + union { + int res_free; + int res_port_free[MLX4_MAX_PORTS]; + }; + int *quota; + int *allocated; + int *guaranteed; +}; + struct mlx4_resource_tracker { spinlock_t lock; /* tree for each resources */ struct rb_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE]; /* num_of_slave's lists, one per slave */ struct slave_list *slave_list; + struct resource_allocator res_alloc[MLX4_NUM_OF_RESOURCE_TYPE]; }; #define SLAVE_EVENT_EQ_SIZE 128 @@ -1253,4 +1268,6 @@ static inline spinlock_t *mlx4_tlock(struct mlx4_dev *dev) void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work); +void mlx4_init_quotas(struct mlx4_dev *dev); + #endif /* MLX4_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index e891b058c1be..2715e61dbb74 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -480,8 +480,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) */ err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps, - (1 << 23) - 1, dev->phys_caps.base_sqpn + 8 + - 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev), + (1 << 23) - 1, mlx4_num_reserved_sqps(dev), reserved_from_top); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 35863889bec0..cc5d6d0aad16 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -284,10 +284,59 @@ static const char *ResourceType(enum mlx4_resource rt) } static void rem_slave_vlans(struct mlx4_dev *dev, int slave); +static inline void initialize_res_quotas(struct mlx4_dev *dev, + struct resource_allocator *res_alloc, + enum mlx4_resource res_type, + int vf, int num_instances) +{ + res_alloc->guaranteed[vf] = num_instances / (2 * (dev->num_vfs + 1)); + res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf]; + if (vf == mlx4_master_func_num(dev)) { + res_alloc->res_free = num_instances; + if (res_type == RES_MTT) { + /* reserved mtts will be taken out of the PF allocation */ + res_alloc->res_free += dev->caps.reserved_mtts; + res_alloc->guaranteed[vf] += dev->caps.reserved_mtts; + res_alloc->quota[vf] += dev->caps.reserved_mtts; + } + } +} + +void mlx4_init_quotas(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int pf; + + /* quotas for VFs are initialized in mlx4_slave_cap */ + if (mlx4_is_slave(dev)) + return; + + if (!mlx4_is_mfunc(dev)) { + dev->quotas.qp = dev->caps.num_qps - dev->caps.reserved_qps - + mlx4_num_reserved_sqps(dev); + dev->quotas.cq = dev->caps.num_cqs - dev->caps.reserved_cqs; + dev->quotas.srq = dev->caps.num_srqs - dev->caps.reserved_srqs; + dev->quotas.mtt = dev->caps.num_mtts - dev->caps.reserved_mtts; + dev->quotas.mpt = dev->caps.num_mpts - dev->caps.reserved_mrws; + return; + } + + pf = mlx4_master_func_num(dev); + dev->quotas.qp = + priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[pf]; + dev->quotas.cq = + priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[pf]; + dev->quotas.srq = + priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[pf]; + dev->quotas.mtt = + priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[pf]; + dev->quotas.mpt = + priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf]; +} int mlx4_init_resource_tracker(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - int i; + int i, j; int t; priv->mfunc.master.res_tracker.slave_list = @@ -308,8 +357,104 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) for (i = 0 ; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) priv->mfunc.master.res_tracker.res_tree[i] = RB_ROOT; + for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { + struct resource_allocator *res_alloc = + &priv->mfunc.master.res_tracker.res_alloc[i]; + res_alloc->quota = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); + res_alloc->guaranteed = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); + if (i == RES_MAC || i == RES_VLAN) + res_alloc->allocated = kzalloc(MLX4_MAX_PORTS * + (dev->num_vfs + 1) * sizeof(int), + GFP_KERNEL); + else + res_alloc->allocated = kzalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); + + if (!res_alloc->quota || !res_alloc->guaranteed || + !res_alloc->allocated) + goto no_mem_err; + + for (t = 0; t < dev->num_vfs + 1; t++) { + switch (i) { + case RES_QP: + initialize_res_quotas(dev, res_alloc, RES_QP, + t, dev->caps.num_qps - + dev->caps.reserved_qps - + mlx4_num_reserved_sqps(dev)); + break; + case RES_CQ: + initialize_res_quotas(dev, res_alloc, RES_CQ, + t, dev->caps.num_cqs - + dev->caps.reserved_cqs); + break; + case RES_SRQ: + initialize_res_quotas(dev, res_alloc, RES_SRQ, + t, dev->caps.num_srqs - + dev->caps.reserved_srqs); + break; + case RES_MPT: + initialize_res_quotas(dev, res_alloc, RES_MPT, + t, dev->caps.num_mpts - + dev->caps.reserved_mrws); + break; + case RES_MTT: + initialize_res_quotas(dev, res_alloc, RES_MTT, + t, dev->caps.num_mtts - + dev->caps.reserved_mtts); + break; + case RES_MAC: + if (t == mlx4_master_func_num(dev)) { + res_alloc->quota[t] = MLX4_MAX_MAC_NUM; + res_alloc->guaranteed[t] = 2; + for (j = 0; j < MLX4_MAX_PORTS; j++) + res_alloc->res_port_free[j] = MLX4_MAX_MAC_NUM; + } else { + res_alloc->quota[t] = MLX4_MAX_MAC_NUM; + res_alloc->guaranteed[t] = 2; + } + break; + case RES_VLAN: + if (t == mlx4_master_func_num(dev)) { + res_alloc->quota[t] = MLX4_MAX_VLAN_NUM; + res_alloc->guaranteed[t] = MLX4_MAX_VLAN_NUM / 2; + for (j = 0; j < MLX4_MAX_PORTS; j++) + res_alloc->res_port_free[j] = + res_alloc->quota[t]; + } else { + res_alloc->quota[t] = MLX4_MAX_VLAN_NUM / 2; + res_alloc->guaranteed[t] = 0; + } + break; + case RES_COUNTER: + res_alloc->quota[t] = dev->caps.max_counters; + res_alloc->guaranteed[t] = 0; + if (t == mlx4_master_func_num(dev)) + res_alloc->res_free = res_alloc->quota[t]; + break; + default: + break; + } + if (i == RES_MAC || i == RES_VLAN) { + for (j = 0; j < MLX4_MAX_PORTS; j++) + res_alloc->res_port_rsvd[j] += + res_alloc->guaranteed[t]; + } else { + res_alloc->res_reserved += res_alloc->guaranteed[t]; + } + } + } spin_lock_init(&priv->mfunc.master.res_tracker.lock); - return 0 ; + return 0; + +no_mem_err: + for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { + kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated); + priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL; + kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed); + priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL; + kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota); + priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL; + } + return -ENOMEM; } void mlx4_free_resource_tracker(struct mlx4_dev *dev, @@ -333,6 +478,14 @@ void mlx4_free_resource_tracker(struct mlx4_dev *dev, } if (type != RES_TR_FREE_SLAVES_ONLY) { + for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { + kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated); + priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL; + kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed); + priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL; + kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota); + priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL; + } kfree(priv->mfunc.master.res_tracker.slave_list); priv->mfunc.master.res_tracker.slave_list = NULL; } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e2e92885bdc1..f6f59271f857 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -641,12 +641,23 @@ struct mlx4_counter { __be64 tx_bytes; }; +struct mlx4_quotas { + int qp; + int cq; + int srq; + int mpt; + int mtt; + int counter; + int xrcd; +}; + struct mlx4_dev { struct pci_dev *pdev; unsigned long flags; unsigned long num_slaves; struct mlx4_caps caps; struct mlx4_phys_caps phys_caps; + struct mlx4_quotas quotas; struct radix_tree_root qp_table_tree; u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; @@ -772,6 +783,12 @@ static inline int mlx4_is_master(struct mlx4_dev *dev) return dev->flags & MLX4_FLAG_MASTER; } +static inline int mlx4_num_reserved_sqps(struct mlx4_dev *dev) +{ + return dev->phys_caps.base_sqpn + 8 + + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev); +} + static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn) { return (qpn < dev->phys_caps.base_sqpn + 8 + -- cgit v1.2.3-71-gd317 From 6e7136ed7793fa4948b0192dcd6862d12a50d67c Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 7 Nov 2013 12:19:53 +0200 Subject: net/mlx4_core: ICM pages are allocated on device NUMA node This is done to optimize FW/HW access to host memory. Signed-off-by: Yevgeny Petrilin Signed-off-by: Eugenia Emantayev Reviewed-by: Hadar Hen Zion Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/icm.c | 42 ++++++++++++++++++++++--------- drivers/net/ethernet/mellanox/mlx4/main.c | 1 + include/linux/mlx4/device.h | 1 + 3 files changed, 32 insertions(+), 12 deletions(-) (limited to 'include/linux/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 31d02649be41..5fbf4924c272 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -93,13 +93,17 @@ void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent) kfree(icm); } -static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask) +static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, + gfp_t gfp_mask, int node) { struct page *page; - page = alloc_pages(gfp_mask, order); - if (!page) - return -ENOMEM; + page = alloc_pages_node(node, gfp_mask, order); + if (!page) { + page = alloc_pages(gfp_mask, order); + if (!page) + return -ENOMEM; + } sg_set_page(mem, page, PAGE_SIZE << order, 0); return 0; @@ -130,9 +134,15 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, /* We use sg_set_buf for coherent allocs, which assumes low memory */ BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM)); - icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); - if (!icm) - return NULL; + icm = kmalloc_node(sizeof(*icm), + gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN), + dev->numa_node); + if (!icm) { + icm = kmalloc(sizeof(*icm), + gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); + if (!icm) + return NULL; + } icm->refcount = 0; INIT_LIST_HEAD(&icm->chunk_list); @@ -141,10 +151,17 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, while (npages > 0) { if (!chunk) { - chunk = kmalloc(sizeof *chunk, - gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); - if (!chunk) - goto fail; + chunk = kmalloc_node(sizeof(*chunk), + gfp_mask & ~(__GFP_HIGHMEM | + __GFP_NOWARN), + dev->numa_node); + if (!chunk) { + chunk = kmalloc(sizeof(*chunk), + gfp_mask & ~(__GFP_HIGHMEM | + __GFP_NOWARN)); + if (!chunk) + goto fail; + } sg_init_table(chunk->mem, MLX4_ICM_CHUNK_LEN); chunk->npages = 0; @@ -161,7 +178,8 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, cur_order, gfp_mask); else ret = mlx4_alloc_icm_pages(&chunk->mem[chunk->npages], - cur_order, gfp_mask); + cur_order, gfp_mask, + dev->numa_node); if (ret) { if (--cur_order < 0) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 7d2628dfdc29..5789ea2c934d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2191,6 +2191,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) mutex_init(&priv->bf_mutex); dev->rev_id = pdev->revision; + dev->numa_node = dev_to_node(&pdev->dev); /* Detect if this device is a virtual function */ if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { /* When acting as pf, we normally skip vfs unless explicitly diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index f6f59271f857..4cf0b0153639 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -662,6 +662,7 @@ struct mlx4_dev { u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; int num_vfs; + int numa_node; int oper_log_mgm_entry_size; u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; -- cgit v1.2.3-71-gd317 From 163561a4e2f8af44e96453bc10c7a4f9bcc736e1 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 7 Nov 2013 12:19:54 +0200 Subject: net/mlx4_en: Datapath structures are allocated per NUMA node For each RX/TX ring and its CQ, allocation is done on a NUMA node that corresponds to the core that the data structure should operate on. The assumption is that the core number is reflected by the ring index. The affected allocations are the ring/CQ data structures, the TX/RX info and the shared HW/SW buffer. For TX rings, each core has rings of all UPs. Signed-off-by: Yevgeny Petrilin Signed-off-by: Eugenia Emantayev Reviewed-by: Hadar Hen Zion Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 17 ++++++++++--- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 12 ++++++--- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 23 +++++++++++------ drivers/net/ethernet/mellanox/mlx4/en_tx.c | 34 +++++++++++++++++--------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 6 ++--- drivers/net/ethernet/mellanox/mlx4/pd.c | 11 ++++++--- include/linux/mlx4/device.h | 2 +- 7 files changed, 71 insertions(+), 34 deletions(-) (limited to 'include/linux/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index d203f11b9edf..3a098cc4d349 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -45,16 +45,20 @@ static void mlx4_en_cq_event(struct mlx4_cq *cq, enum mlx4_event event) int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq, - int entries, int ring, enum cq_type mode) + int entries, int ring, enum cq_type mode, + int node) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_cq *cq; int err; - cq = kzalloc(sizeof(*cq), GFP_KERNEL); + cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, node); if (!cq) { - en_err(priv, "Failed to allocate CQ structure\n"); - return -ENOMEM; + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) { + en_err(priv, "Failed to allocate CQ structure\n"); + return -ENOMEM; + } } cq->size = entries; @@ -64,8 +68,13 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, cq->is_tx = mode; spin_lock_init(&cq->lock); + /* Allocate HW buffers on provided NUMA node. + * dev->numa_node is used in mtt range allocation flow. + */ + set_dev_node(&mdev->dev->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres, cq->buf_size, 2 * PAGE_SIZE); + set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); if (err) goto err_cq; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index f430788cc4fe..e72d8a112a6b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1895,6 +1895,7 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) struct mlx4_en_port_profile *prof = priv->prof; int i; int err; + int node; err = mlx4_qp_reserve_range(priv->mdev->dev, priv->tx_ring_num, 256, &priv->base_tx_qpn); if (err) { @@ -1904,23 +1905,26 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) /* Create tx Rings */ for (i = 0; i < priv->tx_ring_num; i++) { + node = cpu_to_node(i % num_online_cpus()); if (mlx4_en_create_cq(priv, &priv->tx_cq[i], - prof->tx_ring_size, i, TX)) + prof->tx_ring_size, i, TX, node)) goto err; if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], priv->base_tx_qpn + i, - prof->tx_ring_size, TXBB_SIZE)) + prof->tx_ring_size, TXBB_SIZE, node)) goto err; } /* Create rx Rings */ for (i = 0; i < priv->rx_ring_num; i++) { + node = cpu_to_node(i % num_online_cpus()); if (mlx4_en_create_cq(priv, &priv->rx_cq[i], - prof->rx_ring_size, i, RX)) + prof->rx_ring_size, i, RX, node)) goto err; if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i], - prof->rx_ring_size, priv->stride)) + prof->rx_ring_size, priv->stride, + node)) goto err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 1c45f88776c5..07a1d0fbae47 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -320,17 +320,20 @@ static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv, int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, - u32 size, u16 stride) + u32 size, u16 stride, int node) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rx_ring *ring; int err = -ENOMEM; int tmp; - ring = kzalloc(sizeof(*ring), GFP_KERNEL); + ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node); if (!ring) { - en_err(priv, "Failed to allocate RX ring structure\n"); - return -ENOMEM; + ring = kzalloc(sizeof(*ring), GFP_KERNEL); + if (!ring) { + en_err(priv, "Failed to allocate RX ring structure\n"); + return -ENOMEM; + } } ring->prod = 0; @@ -343,17 +346,23 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * sizeof(struct mlx4_en_rx_alloc)); - ring->rx_info = vmalloc(tmp); + ring->rx_info = vmalloc_node(tmp, node); if (!ring->rx_info) { - err = -ENOMEM; - goto err_ring; + ring->rx_info = vmalloc(tmp); + if (!ring->rx_info) { + err = -ENOMEM; + goto err_ring; + } } en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n", ring->rx_info, tmp); + /* Allocate HW buffers on provided NUMA node */ + set_dev_node(&mdev->dev->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); + set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); if (err) goto err_info; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index d4e4cf30a720..f54ebd5a1702 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -55,17 +55,20 @@ MODULE_PARM_DESC(inline_thold, "threshold for using inline data"); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, int qpn, u32 size, - u16 stride) + u16 stride, int node) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_ring *ring; int tmp; int err; - ring = kzalloc(sizeof(*ring), GFP_KERNEL); + ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node); if (!ring) { - en_err(priv, "Failed allocating TX ring\n"); - return -ENOMEM; + ring = kzalloc(sizeof(*ring), GFP_KERNEL); + if (!ring) { + en_err(priv, "Failed allocating TX ring\n"); + return -ENOMEM; + } } ring->size = size; @@ -75,24 +78,33 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, inline_thold = min(inline_thold, MAX_INLINE); tmp = size * sizeof(struct mlx4_en_tx_info); - ring->tx_info = vmalloc(tmp); + ring->tx_info = vmalloc_node(tmp, node); if (!ring->tx_info) { - err = -ENOMEM; - goto err_ring; + ring->tx_info = vmalloc(tmp); + if (!ring->tx_info) { + err = -ENOMEM; + goto err_ring; + } } en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n", ring->tx_info, tmp); - ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL); + ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, GFP_KERNEL, node); if (!ring->bounce_buf) { - err = -ENOMEM; - goto err_info; + ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL); + if (!ring->bounce_buf) { + err = -ENOMEM; + goto err_info; + } } ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE); + /* Allocate HW buffers on provided NUMA node */ + set_dev_node(&mdev->dev->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); + set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); if (err) { en_err(priv, "Failed allocating hwq resources\n"); goto err_bounce; @@ -118,7 +130,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, } ring->qp.event = mlx4_en_sqp_event; - err = mlx4_bf_alloc(mdev->dev, &ring->bf); + err = mlx4_bf_alloc(mdev->dev, &ring->bf, node); if (err) { en_dbg(DRV, priv, "working without blueflame (%d)", err); ring->bf.uar = &mdev->priv_uar; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index b2547ae07dfa..f3758de59c05 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -705,7 +705,7 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv); int mlx4_en_alloc_resources(struct mlx4_en_priv *priv); int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq, - int entries, int ring, enum cq_type mode); + int entries, int ring, enum cq_type mode, int node); void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq); int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, int cq_idx); @@ -719,7 +719,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, - int qpn, u32 size, u16 stride); + int qpn, u32 size, u16 stride, int node); void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring); int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, @@ -730,7 +730,7 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, - u32 size, u16 stride); + u32 size, u16 stride, int node); void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, u16 stride); diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c index 00f223acada7..84cfb40bf451 100644 --- a/drivers/net/ethernet/mellanox/mlx4/pd.c +++ b/drivers/net/ethernet/mellanox/mlx4/pd.c @@ -168,7 +168,7 @@ void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar) } EXPORT_SYMBOL_GPL(mlx4_uar_free); -int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf) +int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_uar *uar; @@ -186,10 +186,13 @@ int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf) err = -ENOMEM; goto out; } - uar = kmalloc(sizeof *uar, GFP_KERNEL); + uar = kmalloc_node(sizeof(*uar), GFP_KERNEL, node); if (!uar) { - err = -ENOMEM; - goto out; + uar = kmalloc(sizeof(*uar), GFP_KERNEL); + if (!uar) { + err = -ENOMEM; + goto out; + } } err = mlx4_uar_alloc(dev, uar); if (err) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 4cf0b0153639..7d3a523160ba 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -835,7 +835,7 @@ void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn); int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar); void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar); -int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf); +int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node); void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf); int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift, -- cgit v1.2.3-71-gd317