From 6e7333d315a768170a59ac771297ee0551bdddbf Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Mon, 1 Feb 2016 18:51:05 -0500 Subject: net: add rx_nohandler stat counter This adds an rx_nohandler stat counter, along with a sysfs statistics node, and copies the counter out via netlink as well. CC: "David S. Miller" CC: Eric Dumazet CC: Jiri Pirko CC: Daniel Borkmann CC: Tom Herbert CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 289c2314d766..78a20cec2a0a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1397,6 +1397,8 @@ enum netdev_priv_flags { * do not use this in drivers * @tx_dropped: Dropped packets by core network, * do not use this in drivers + * @rx_nohandler: nohandler dropped packets by core network on + * inactive devices, do not use this in drivers * * @wireless_handlers: List of functions to handle Wireless Extensions, * instead of ioctl, @@ -1611,6 +1613,7 @@ struct net_device { atomic_long_t rx_dropped; atomic_long_t tx_dropped; + atomic_long_t rx_nohandler; #ifdef CONFIG_WIRELESS_EXT const struct iw_handler_def * wireless_handlers; -- cgit v1.2.3-71-gd317 From ba905f5e2f63d86ed4cfbd3d9096fb28d156f1ee Mon Sep 17 00:00:00 2001 From: Kim Jones Date: Tue, 2 Feb 2016 03:51:16 +0000 Subject: ethtool: Declare netdev_rss_key as __read_mostly. netdev_rss_key is written to once and thereafter is read by drivers when they are initialising. The fact that it is mostly read and not written to makes it a candidate for a __read_mostly declaration. Signed-off-by: Kim Jones Signed-off-by: Alan Carey Acked-by: Rami Rosen Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- net/core/ethtool.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 78a20cec2a0a..219f53c30cb3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3744,7 +3744,7 @@ void netdev_lower_state_changed(struct net_device *lower_dev, /* RSS keys are 40 or 52 bytes long */ #define NETDEV_RSS_KEY_LEN 52 -extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN]; +extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly; void netdev_rss_key_fill(void *buffer, size_t len); int dev_get_nest_level(struct net_device *dev, diff --git a/net/core/ethtool.c b/net/core/ethtool.c index daf04709dd3c..453c803f1c87 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -632,7 +632,7 @@ static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr, return 0; } -u8 netdev_rss_key[NETDEV_RSS_KEY_LEN]; +u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly; void netdev_rss_key_fill(void *buffer, size_t len) { -- cgit v1.2.3-71-gd317 From d4ab4286276fcd6c155bafdf4422b712068d2516 Mon Sep 17 00:00:00 2001 From: "Keller, Jacob E" Date: Mon, 8 Feb 2016 16:05:03 -0800 Subject: ethtool: correctly ensure {GS}CHANNELS doesn't conflict with GS{RXFH} Ethernet drivers implementing both {GS}RXFH and {GS}CHANNELS ethtool ops incorrectly allow SCHANNELS when it would conflict with the settings from SRXFH. This occurs because it is not possible for drivers to understand whether their Rx flow indirection table has been configured or is in the default state. In addition, drivers currently behave in various ways when increasing the number of Rx channels. Some drivers will always destroy the Rx flow indirection table when this occurs, whether it has been set by the user or not. Other drivers will attempt to preserve the table even if the user has never modified it from the default driver settings. Neither of these situation is desirable because it leads to unexpected behavior or loss of user configuration. The correct behavior is to simply return -EINVAL when SCHANNELS would conflict with the current Rx flow table settings. However, it should only do so if the current settings were modified by the user. If we required that the new settings never conflict with the current (default) Rx flow settings, we would force users to first reduce their Rx flow settings and then reduce the number of Rx channels. This patch proposes a solution implemented in net/core/ethtool.c which ensures that all drivers behave correctly. It checks whether the RXFH table has been configured to non-default settings, and stores this information in a private netdev flag. When the number of channels is requested to change, it first ensures that the current Rx flow table is not going to assign flows to now disabled channels. Signed-off-by: Jacob Keller Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 +++++++ net/core/ethtool.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 219f53c30cb3..0499569c256d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1291,6 +1291,7 @@ struct net_device_ops { * @IFF_OPENVSWITCH: device is a Open vSwitch master * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device * @IFF_TEAM: device is a team device + * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1318,6 +1319,7 @@ enum netdev_priv_flags { IFF_OPENVSWITCH = 1<<22, IFF_L3MDEV_SLAVE = 1<<23, IFF_TEAM = 1<<24, + IFF_RXFH_CONFIGURED = 1<<25, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1345,6 +1347,7 @@ enum netdev_priv_flags { #define IFF_OPENVSWITCH IFF_OPENVSWITCH #define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE #define IFF_TEAM IFF_TEAM +#define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED /** * struct net_device - The DEVICE structure. @@ -4048,6 +4051,11 @@ static inline bool netif_is_lag_port(const struct net_device *dev) return netif_is_bond_slave(dev) || netif_is_team_port(dev); } +static inline bool netif_is_rxfh_configured(const struct net_device *dev) +{ + return dev->priv_flags & IFF_RXFH_CONFIGURED; +} + /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 453c803f1c87..379bdc59b1c8 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -642,6 +642,37 @@ void netdev_rss_key_fill(void *buffer, size_t len) } EXPORT_SYMBOL(netdev_rss_key_fill); +static int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max) +{ + u32 dev_size, current_max = 0; + u32 *indir; + int ret; + + if (!dev->ethtool_ops->get_rxfh_indir_size || + !dev->ethtool_ops->get_rxfh) + return -EOPNOTSUPP; + dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); + if (dev_size == 0) + return -EOPNOTSUPP; + + indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); + if (!indir) + return -ENOMEM; + + ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL); + if (ret) + goto out; + + while (dev_size--) + current_max = max(current_max, indir[dev_size]); + + *max = current_max; + +out: + kfree(indir); + return ret; +} + static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, void __user *useraddr) { @@ -738,6 +769,14 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, } ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE); + if (ret) + goto out; + + /* indicate whether rxfh was set to default */ + if (user_size == 0) + dev->priv_flags &= ~IFF_RXFH_CONFIGURED; + else + dev->priv_flags |= IFF_RXFH_CONFIGURED; out: kfree(indir); @@ -897,6 +936,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); + if (ret) + goto out; + + /* indicate whether rxfh was set to default */ + if (rxfh.indir_size == 0) + dev->priv_flags &= ~IFF_RXFH_CONFIGURED; + else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) + dev->priv_flags |= IFF_RXFH_CONFIGURED; out: kfree(rss_config); @@ -1228,6 +1275,7 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, void __user *useraddr) { struct ethtool_channels channels; + u32 max_rx_in_use = 0; if (!dev->ethtool_ops->set_channels) return -EOPNOTSUPP; @@ -1235,6 +1283,13 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, if (copy_from_user(&channels, useraddr, sizeof(channels))) return -EFAULT; + /* ensure the new Rx count fits within the configured Rx flow + * indirection table settings */ + if (netif_is_rxfh_configured(dev) && + !ethtool_get_max_rxfh_channel(dev, &max_rx_in_use) && + (channels.combined_count + channels.rx_count) <= max_rx_in_use) + return -EINVAL; + return dev->ethtool_ops->set_channels(dev, &channels); } -- cgit v1.2.3-71-gd317 From e4c6734eaab90695db0ea8456307790cb0c1ccb5 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 16 Feb 2016 21:16:15 -0800 Subject: net: rework ndo tc op to consume additional qdisc handle parameter The ndo_setup_tc() op was added to support drivers offloading tx qdiscs however only support for mqprio was ever added. So we only ever added support for passing the number of traffic classes to the driver. This patch generalizes the ndo_setup_tc op so that a handle can be provided to indicate if the offload is for ingress or egress or potentially even child qdiscs. CC: Murali Karicheri CC: Shradha Shah CC: Or Gerlitz CC: Ariel Elior CC: Jeff Kirsher CC: Bruce Allan CC: Jesse Brandeburg CC: Don Skidmore Signed-off-by: John Fastabend Acked-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 5 ++++- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 7 +++++++ drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h | 1 + drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++++- drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 10 +++++++++- drivers/net/ethernet/intel/i40e/i40e.h | 2 +- drivers/net/ethernet/intel/i40e/i40e_fcoe.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 17 ++++++++++++----- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 11 ++++++++++- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 12 ++++++++++-- drivers/net/ethernet/sfc/efx.h | 2 +- drivers/net/ethernet/sfc/tx.c | 5 ++++- drivers/net/ethernet/ti/netcp_core.c | 5 ++++- include/linux/netdevice.h | 3 ++- net/sched/sch_mqprio.c | 5 +++-- 16 files changed, 74 insertions(+), 20 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 8a9b493566c9..9955cae3cabc 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1626,12 +1626,15 @@ static void xgbe_poll_controller(struct net_device *netdev) } #endif /* End CONFIG_NET_POLL_CONTROLLER */ -static int xgbe_setup_tc(struct net_device *netdev, u8 tc) +static int xgbe_setup_tc(struct net_device *netdev, u32 handle, u8 tc) { struct xgbe_prv_data *pdata = netdev_priv(netdev); unsigned int offset, queue; u8 i; + if (handle != TC_H_ROOT) + return -EINVAL; + if (tc && (tc != pdata->hw_feat.tc_cnt)) return -EINVAL; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 9e42bcaf9917..b262cba34dfa 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -4272,6 +4272,13 @@ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc) return 0; } +int __bnx2x_setup_tc(struct net_device *dev, u32 handle, u8 num_tc) +{ + if (handle != TC_H_ROOT) + return -EINVAL; + return bnx2x_setup_tc(dev, num_tc); +} + /* called with rtnl_lock */ int bnx2x_change_mac_addr(struct net_device *dev, void *p) { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 37369865ca6d..60a4109dcdeb 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -486,6 +486,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev); /* setup_tc callback */ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc); +int __bnx2x_setup_tc(struct net_device *dev, u32 handle, u8 num_tc); int bnx2x_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index c5845252c920..81fc51c4ec2b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13061,7 +13061,7 @@ static const struct net_device_ops bnx2x_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = poll_bnx2x, #endif - .ndo_setup_tc = bnx2x_setup_tc, + .ndo_setup_tc = __bnx2x_setup_tc, #ifdef CONFIG_BNX2X_SRIOV .ndo_set_vf_mac = bnx2x_set_vf_mac, .ndo_set_vf_vlan = bnx2x_set_vf_vlan, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5dc89e527e7d..ff08faf44ee5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5370,10 +5370,13 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int bnxt_setup_tc(struct net_device *dev, u8 tc) +static int bnxt_setup_tc(struct net_device *dev, u32 handle, u8 tc) { struct bnxt *bp = netdev_priv(dev); + if (handle != TC_H_ROOT) + return -EINVAL; + if (tc > bp->max_tc) { netdev_err(dev, "too many traffic classes requested: %d Max supported is %d\n", tc, bp->max_tc); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 662569d5b7c0..12701a492325 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1204,6 +1204,14 @@ err_queueing_scheme: return err; } +static int __fm10k_setup_tc(struct net_device *dev, u32 handle, u8 tc) +{ + if (handle != TC_H_ROOT) + return -EINVAL; + + return fm10k_setup_tc(dev, tc); +} + static int fm10k_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { switch (cmd) { @@ -1386,7 +1394,7 @@ static const struct net_device_ops fm10k_netdev_ops = { .ndo_vlan_rx_kill_vid = fm10k_vlan_rx_kill_vid, .ndo_set_rx_mode = fm10k_set_rx_mode, .ndo_get_stats64 = fm10k_get_stats64, - .ndo_setup_tc = fm10k_setup_tc, + .ndo_setup_tc = __fm10k_setup_tc, .ndo_set_vf_mac = fm10k_ndo_set_vf_mac, .ndo_set_vf_vlan = fm10k_ndo_set_vf_vlan, .ndo_set_vf_rate = fm10k_ndo_set_vf_bw, diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 53ed3bdd8363..ef9ca075d5e5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -788,7 +788,7 @@ struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr, bool is_vf, bool is_netdev); #ifdef I40E_FCOE int i40e_close(struct net_device *netdev); -int i40e_setup_tc(struct net_device *netdev, u8 tc); +int __i40e_setup_tc(struct net_device *netdev, u32 handle, u8 tc); void i40e_netpoll(struct net_device *netdev); int i40e_fcoe_enable(struct net_device *netdev); int i40e_fcoe_disable(struct net_device *netdev); diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c index 579a46ca82df..7c66ce416ec7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c +++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c @@ -1457,7 +1457,7 @@ static const struct net_device_ops i40e_fcoe_netdev_ops = { .ndo_tx_timeout = i40e_tx_timeout, .ndo_vlan_rx_add_vid = i40e_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = i40e_vlan_rx_kill_vid, - .ndo_setup_tc = i40e_setup_tc, + .ndo_setup_tc = __i40e_setup_tc, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = i40e_netpoll, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 320b0491abd9..abcb6c152186 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5253,11 +5253,7 @@ void i40e_down(struct i40e_vsi *vsi) * @netdev: net device to configure * @tc: number of traffic classes to enable **/ -#ifdef I40E_FCOE -int i40e_setup_tc(struct net_device *netdev, u8 tc) -#else static int i40e_setup_tc(struct net_device *netdev, u8 tc) -#endif { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; @@ -5310,6 +5306,17 @@ exit: return ret; } +#ifdef I40E_FCOE +int __i40e_setup_tc(struct net_device *netdev, u32 handle, u8 tc) +#else +static int __i40e_setup_tc(struct net_device *netdev, u32 handle, u8 tc) +#endif +{ + if (handle != TC_H_ROOT) + return -EINVAL; + return i40e_setup_tc(netdev, tc); +} + /** * i40e_open - Called when a network interface is made active * @netdev: network interface device structure @@ -8951,7 +8958,7 @@ static const struct net_device_ops i40e_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = i40e_netpoll, #endif - .ndo_setup_tc = i40e_setup_tc, + .ndo_setup_tc = __i40e_setup_tc, #ifdef I40E_FCOE .ndo_fcoe_enable = i40e_fcoe_enable, .ndo_fcoe_disable = i40e_fcoe_disable, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0c701b8438b6..1ba714efd78c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8200,6 +8200,15 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) return 0; } +int __ixgbe_setup_tc(struct net_device *dev, u32 handle, u8 tc) +{ + /* Only support egress tc setup for now */ + if (handle != TC_H_ROOT) + return -EINVAL; + + return ixgbe_setup_tc(dev, tc); +} + #ifdef CONFIG_PCI_IOV void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter) { @@ -8658,7 +8667,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_get_vf_config = ixgbe_ndo_get_vf_config, .ndo_get_stats64 = ixgbe_get_stats64, #ifdef CONFIG_IXGBE_DCB - .ndo_setup_tc = ixgbe_setup_tc, + .ndo_setup_tc = __ixgbe_setup_tc, #endif #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ixgbe_netpoll, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 0c7e3f69a73b..d5c6c16b9457 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -69,6 +69,14 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) return 0; } +static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, u8 up) +{ + if (handle != TC_H_ROOT) + return -EINVAL; + + return mlx4_en_setup_tc(dev, up); +} + #ifdef CONFIG_RFS_ACCEL struct mlx4_en_filter { @@ -2466,7 +2474,7 @@ static const struct net_device_ops mlx4_netdev_ops = { #endif .ndo_set_features = mlx4_en_set_features, .ndo_fix_features = mlx4_en_fix_features, - .ndo_setup_tc = mlx4_en_setup_tc, + .ndo_setup_tc = __mlx4_en_setup_tc, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif @@ -2504,7 +2512,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { #endif .ndo_set_features = mlx4_en_set_features, .ndo_fix_features = mlx4_en_fix_features, - .ndo_setup_tc = mlx4_en_setup_tc, + .ndo_setup_tc = __mlx4_en_setup_tc, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 10827476bc0b..7815fa09b15d 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -32,7 +32,7 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev); netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb); void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); -int efx_setup_tc(struct net_device *net_dev, u8 num_tc); +int efx_setup_tc(struct net_device *net_dev, u32 handle, u8 num_tc); unsigned int efx_tx_max_skb_descs(struct efx_nic *efx); extern unsigned int efx_piobuf_size; extern bool efx_separate_tx_channels; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index f7a0ec1bca97..8f1d53e2aca7 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -562,7 +562,7 @@ void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue) efx->n_tx_channels : 0)); } -int efx_setup_tc(struct net_device *net_dev, u8 num_tc) +int efx_setup_tc(struct net_device *net_dev, u32 handle, u8 num_tc) { struct efx_nic *efx = netdev_priv(net_dev); struct efx_channel *channel; @@ -570,6 +570,9 @@ int efx_setup_tc(struct net_device *net_dev, u8 num_tc) unsigned tc; int rc; + if (handle != TC_H_ROOT) + return -EINVAL; + if (efx_nic_rev(efx) < EFX_REV_FALCON_B0 || num_tc > EFX_MAX_TX_TC) return -EINVAL; diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index c61d66d38634..40cde814608b 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1835,13 +1835,16 @@ static u16 netcp_select_queue(struct net_device *dev, struct sk_buff *skb, return 0; } -static int netcp_setup_tc(struct net_device *dev, u8 num_tc) +static int netcp_setup_tc(struct net_device *dev, u32 handle, u8 num_tc) { int i; /* setup tc must be called under rtnl lock */ ASSERT_RTNL(); + if (handle != TC_H_ROOT) + return -EINVAL; + /* Sanity-check the number of traffic classes requested */ if ((dev->real_num_tx_queues <= 1) || (dev->real_num_tx_queues < num_tc)) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0499569c256d..48928b6f9cb6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -51,6 +51,7 @@ #include #include #include +#include struct netpoll_info; struct device; @@ -1150,7 +1151,7 @@ struct net_device_ops { int (*ndo_set_vf_rss_query_en)( struct net_device *dev, int vf, bool setting); - int (*ndo_setup_tc)(struct net_device *dev, u8 tc); + int (*ndo_setup_tc)(struct net_device *dev, u32 handle, u8 tc); #if IS_ENABLED(CONFIG_FCOE) int (*ndo_fcoe_enable)(struct net_device *dev); int (*ndo_fcoe_disable)(struct net_device *dev); diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index ad70ecf57ce7..f5a0e8a4dbd7 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -39,7 +39,7 @@ static void mqprio_destroy(struct Qdisc *sch) } if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc) - dev->netdev_ops->ndo_setup_tc(dev, 0); + dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0); else netdev_set_num_tc(dev, 0); } @@ -141,7 +141,8 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) */ if (qopt->hw) { priv->hw_owned = 1; - err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc); + err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, + qopt->num_tc); if (err) goto err; } else { -- cgit v1.2.3-71-gd317 From 16e5cc647173a97e33b3e3ba81f73eb455561794 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 16 Feb 2016 21:16:43 -0800 Subject: net: rework setup_tc ndo op to consume general tc operand This patch updates setup_tc so we can pass additional parameters into the ndo op in a generic way. To do this we provide structured union and type flag. This lets each classifier and qdisc provide its own set of attributes without having to add new ndo ops or grow the signature of the callback. Signed-off-by: John Fastabend Acked-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 9 ++++++--- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 7 ++++--- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h | 3 ++- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++++-- drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 7 ++++--- drivers/net/ethernet/intel/i40e/i40e.h | 3 ++- drivers/net/ethernet/intel/i40e/i40e_main.c | 10 ++++++---- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 7 ++++--- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 7 ++++--- drivers/net/ethernet/sfc/efx.h | 3 ++- drivers/net/ethernet/sfc/tx.c | 9 ++++++--- drivers/net/ethernet/ti/netcp_core.c | 13 +++++++------ include/linux/netdevice.h | 20 +++++++++++++++++++- net/sched/sch_mqprio.c | 9 ++++++--- 14 files changed, 78 insertions(+), 37 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 9955cae3cabc..cfd3f7efda1c 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1626,15 +1626,18 @@ static void xgbe_poll_controller(struct net_device *netdev) } #endif /* End CONFIG_NET_POLL_CONTROLLER */ -static int xgbe_setup_tc(struct net_device *netdev, u32 handle, u8 tc) +static int xgbe_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc_to_netdev) { struct xgbe_prv_data *pdata = netdev_priv(netdev); unsigned int offset, queue; - u8 i; + u8 i, tc; - if (handle != TC_H_ROOT) + if (handle != TC_H_ROOT || tc_to_netdev->type != TC_SETUP_MQPRIO) return -EINVAL; + tc = tc_to_netdev->tc; + if (tc && (tc != pdata->hw_feat.tc_cnt)) return -EINVAL; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index b262cba34dfa..45843d150868 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -4272,11 +4272,12 @@ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc) return 0; } -int __bnx2x_setup_tc(struct net_device *dev, u32 handle, u8 num_tc) +int __bnx2x_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) { - if (handle != TC_H_ROOT) + if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO) return -EINVAL; - return bnx2x_setup_tc(dev, num_tc); + return bnx2x_setup_tc(dev, tc->tc); } /* called with rtnl_lock */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 60a4109dcdeb..0e68fadecfdb 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -486,7 +486,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev); /* setup_tc callback */ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc); -int __bnx2x_setup_tc(struct net_device *dev, u32 handle, u8 num_tc); +int __bnx2x_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc); int bnx2x_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ff08faf44ee5..169920aa39f3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5370,13 +5370,17 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int bnxt_setup_tc(struct net_device *dev, u32 handle, u8 tc) +static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *ntc) { struct bnxt *bp = netdev_priv(dev); + u8 tc; - if (handle != TC_H_ROOT) + if (handle != TC_H_ROOT || ntc->type != TC_SETUP_MQPRIO) return -EINVAL; + tc = ntc->tc; + if (tc > bp->max_tc) { netdev_err(dev, "too many traffic classes requested: %d Max supported is %d\n", tc, bp->max_tc); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 12701a492325..dc1a82148ff0 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1204,12 +1204,13 @@ err_queueing_scheme: return err; } -static int __fm10k_setup_tc(struct net_device *dev, u32 handle, u8 tc) +static int __fm10k_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) { - if (handle != TC_H_ROOT) + if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO) return -EINVAL; - return fm10k_setup_tc(dev, tc); + return fm10k_setup_tc(dev, tc->tc); } static int fm10k_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index ef9ca075d5e5..933c4b3d92c8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -788,7 +788,8 @@ struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr, bool is_vf, bool is_netdev); #ifdef I40E_FCOE int i40e_close(struct net_device *netdev); -int __i40e_setup_tc(struct net_device *netdev, u32 handle, u8 tc); +int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc); void i40e_netpoll(struct net_device *netdev); int i40e_fcoe_enable(struct net_device *netdev); int i40e_fcoe_disable(struct net_device *netdev); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index abcb6c152186..257d16207976 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5307,14 +5307,16 @@ exit: } #ifdef I40E_FCOE -int __i40e_setup_tc(struct net_device *netdev, u32 handle, u8 tc) +int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) #else -static int __i40e_setup_tc(struct net_device *netdev, u32 handle, u8 tc) +static int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) #endif { - if (handle != TC_H_ROOT) + if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO) return -EINVAL; - return i40e_setup_tc(netdev, tc); + return i40e_setup_tc(netdev, tc->tc); } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 1ba714efd78c..dca2298f4c36 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8200,13 +8200,14 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) return 0; } -int __ixgbe_setup_tc(struct net_device *dev, u32 handle, u8 tc) +int __ixgbe_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) { /* Only support egress tc setup for now */ - if (handle != TC_H_ROOT) + if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO) return -EINVAL; - return ixgbe_setup_tc(dev, tc); + return ixgbe_setup_tc(dev, tc->tc); } #ifdef CONFIG_PCI_IOV diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index d5c6c16b9457..01d6a9695586 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -69,12 +69,13 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) return 0; } -static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, u8 up) +static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) { - if (handle != TC_H_ROOT) + if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO) return -EINVAL; - return mlx4_en_setup_tc(dev, up); + return mlx4_en_setup_tc(dev, tc->tc); } #ifdef CONFIG_RFS_ACCEL diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 7815fa09b15d..5e3f93f04e62 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -32,7 +32,8 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev); netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb); void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); -int efx_setup_tc(struct net_device *net_dev, u32 handle, u8 num_tc); +int efx_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc); unsigned int efx_tx_max_skb_descs(struct efx_nic *efx); extern unsigned int efx_piobuf_size; extern bool efx_separate_tx_channels; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 8f1d53e2aca7..2cdb5718ed66 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -562,17 +562,20 @@ void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue) efx->n_tx_channels : 0)); } -int efx_setup_tc(struct net_device *net_dev, u32 handle, u8 num_tc) +int efx_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto, + struct tc_to_netdev *ntc) { struct efx_nic *efx = netdev_priv(net_dev); struct efx_channel *channel; struct efx_tx_queue *tx_queue; - unsigned tc; + unsigned tc, num_tc; int rc; - if (handle != TC_H_ROOT) + if (handle != TC_H_ROOT || ntc->type != TC_SETUP_MQPRIO) return -EINVAL; + num_tc = ntc->tc; + if (efx_nic_rev(efx) < EFX_REV_FALCON_B0 || num_tc > EFX_MAX_TX_TC) return -EINVAL; diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 40cde814608b..8586a2034019 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1835,25 +1835,26 @@ static u16 netcp_select_queue(struct net_device *dev, struct sk_buff *skb, return 0; } -static int netcp_setup_tc(struct net_device *dev, u32 handle, u8 num_tc) +static int netcp_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev tc) { int i; /* setup tc must be called under rtnl lock */ ASSERT_RTNL(); - if (handle != TC_H_ROOT) + if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO) return -EINVAL; /* Sanity-check the number of traffic classes requested */ if ((dev->real_num_tx_queues <= 1) || - (dev->real_num_tx_queues < num_tc)) + (dev->real_num_tx_queues < tc->tc)) return -EINVAL; /* Configure traffic class to queue mappings */ - if (num_tc) { - netdev_set_num_tc(dev, num_tc); - for (i = 0; i < num_tc; i++) + if (tc->tc) { + netdev_set_num_tc(dev, tc->tc); + for (i = 0; i < tc->tc; i++) netdev_set_tc_queue(dev, i, 1, i); } else { netdev_reset_tc(dev); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 48928b6f9cb6..e396060f815f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -779,6 +779,21 @@ static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a, typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); +/* This structure holds attributes of qdisc and classifiers + * that are being passed to the netdevice through the setup_tc op. + */ +enum { + TC_SETUP_MQPRIO, +}; + +struct tc_to_netdev { + unsigned int type; + union { + u8 tc; + }; +}; + + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1151,7 +1166,10 @@ struct net_device_ops { int (*ndo_set_vf_rss_query_en)( struct net_device *dev, int vf, bool setting); - int (*ndo_setup_tc)(struct net_device *dev, u32 handle, u8 tc); + int (*ndo_setup_tc)(struct net_device *dev, + u32 handle, + __be16 protocol, + struct tc_to_netdev *tc); #if IS_ENABLED(CONFIG_FCOE) int (*ndo_fcoe_enable)(struct net_device *dev); int (*ndo_fcoe_disable)(struct net_device *dev); diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index f5a0e8a4dbd7..f9947d1f4952 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -28,6 +28,7 @@ static void mqprio_destroy(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); + struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO}; unsigned int ntx; if (priv->qdiscs) { @@ -39,7 +40,7 @@ static void mqprio_destroy(struct Qdisc *sch) } if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc) - dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0); + dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); else netdev_set_num_tc(dev, 0); } @@ -140,9 +141,11 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) * supplied and verified mapping */ if (qopt->hw) { + struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO, + .tc = qopt->num_tc}; + priv->hw_owned = 1; - err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, - qopt->num_tc); + err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); if (err) goto err; } else { -- cgit v1.2.3-71-gd317 From a1b7c5fd7fe98f51fbbc393ee1fc4c1cdb2f0119 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 16 Feb 2016 21:17:09 -0800 Subject: net: sched: add cls_u32 offload hooks for netdevs This patch allows netdev drivers to consume cls_u32 offloads via the ndo_setup_tc ndo op. This works aligns with how network drivers have been doing qdisc offloads for mqprio. Signed-off-by: John Fastabend Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++- include/net/pkt_cls.h | 34 ++++++++++++++++ net/sched/cls_u32.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 136 insertions(+), 3 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e396060f815f..47671ce04ac4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -779,17 +779,21 @@ static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a, typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); -/* This structure holds attributes of qdisc and classifiers +/* These structures hold the attributes of qdisc and classifiers * that are being passed to the netdevice through the setup_tc op. */ enum { TC_SETUP_MQPRIO, + TC_SETUP_CLSU32, }; +struct tc_cls_u32_offload; + struct tc_to_netdev { unsigned int type; union { u8 tc; + struct tc_cls_u32_offload *cls_u32; }; }; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index bc49967e1a68..59789ca6e2c8 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -358,4 +358,38 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } #endif /* CONFIG_NET_CLS_IND */ +struct tc_cls_u32_knode { + struct tcf_exts *exts; + u8 fshift; + u32 handle; + u32 val; + u32 mask; + u32 link_handle; + struct tc_u32_sel *sel; +}; + +struct tc_cls_u32_hnode { + u32 handle; + u32 prio; + unsigned int divisor; +}; + +enum tc_clsu32_command { + TC_CLSU32_NEW_KNODE, + TC_CLSU32_REPLACE_KNODE, + TC_CLSU32_DELETE_KNODE, + TC_CLSU32_NEW_HNODE, + TC_CLSU32_REPLACE_HNODE, + TC_CLSU32_DELETE_HNODE, +}; + +struct tc_cls_u32_offload { + /* knode values */ + enum tc_clsu32_command command; + union { + struct tc_cls_u32_knode knode; + struct tc_cls_u32_hnode hnode; + }; +}; + #endif diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4fbb67430ce4..d54bc942ea87 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -43,6 +43,7 @@ #include #include #include +#include struct tc_u_knode { struct tc_u_knode __rcu *next; @@ -424,6 +425,93 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) return 0; } +static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_u32_offload u32_offload = {0}; + struct tc_to_netdev offload; + + offload.type = TC_SETUP_CLSU32; + offload.cls_u32 = &u32_offload; + + if (dev->netdev_ops->ndo_setup_tc) { + offload.cls_u32->command = TC_CLSU32_DELETE_KNODE; + offload.cls_u32->knode.handle = handle; + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + } +} + +static void u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_u32_offload u32_offload = {0}; + struct tc_to_netdev offload; + + offload.type = TC_SETUP_CLSU32; + offload.cls_u32 = &u32_offload; + + if (dev->netdev_ops->ndo_setup_tc) { + offload.cls_u32->command = TC_CLSU32_NEW_HNODE; + offload.cls_u32->hnode.divisor = h->divisor; + offload.cls_u32->hnode.handle = h->handle; + offload.cls_u32->hnode.prio = h->prio; + + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + } +} + +static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_u32_offload u32_offload = {0}; + struct tc_to_netdev offload; + + offload.type = TC_SETUP_CLSU32; + offload.cls_u32 = &u32_offload; + + if (dev->netdev_ops->ndo_setup_tc) { + offload.cls_u32->command = TC_CLSU32_DELETE_HNODE; + offload.cls_u32->hnode.divisor = h->divisor; + offload.cls_u32->hnode.handle = h->handle; + offload.cls_u32->hnode.prio = h->prio; + + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + } +} + +static void u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_u32_offload u32_offload = {0}; + struct tc_to_netdev offload; + + offload.type = TC_SETUP_CLSU32; + offload.cls_u32 = &u32_offload; + + if (dev->netdev_ops->ndo_setup_tc) { + offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; + offload.cls_u32->knode.handle = n->handle; + offload.cls_u32->knode.fshift = n->fshift; +#ifdef CONFIG_CLS_U32_MARK + offload.cls_u32->knode.val = n->val; + offload.cls_u32->knode.mask = n->mask; +#else + offload.cls_u32->knode.val = 0; + offload.cls_u32->knode.mask = 0; +#endif + offload.cls_u32->knode.sel = &n->sel; + offload.cls_u32->knode.exts = &n->exts; + if (n->ht_down) + offload.cls_u32->knode.link_handle = n->ht_down->handle; + + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + } +} + static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) { struct tc_u_knode *n; @@ -434,6 +522,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) RCU_INIT_POINTER(ht->ht[h], rtnl_dereference(n->next)); tcf_unbind_filter(tp, &n->res); + u32_remove_hw_knode(tp, n->handle); call_rcu(&n->rcu, u32_delete_key_freepf_rcu); } } @@ -454,6 +543,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) phn; hn = &phn->next, phn = rtnl_dereference(*hn)) { if (phn == ht) { + u32_clear_hw_hnode(tp, ht); RCU_INIT_POINTER(*hn, ht->next); kfree_rcu(ht, rcu); return 0; @@ -540,8 +630,10 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg) if (ht == NULL) return 0; - if (TC_U32_KEY(ht->handle)) + if (TC_U32_KEY(ht->handle)) { + u32_remove_hw_knode(tp, ht->handle); return u32_delete_key(tp, (struct tc_u_knode *)ht); + } if (root_ht == ht) return -EINVAL; @@ -769,6 +861,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, u32_replace_knode(tp, tp_c, new); tcf_unbind_filter(tp, &n->res); call_rcu(&n->rcu, u32_delete_key_rcu); + u32_replace_hw_knode(tp, new); return 0; } @@ -795,6 +888,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, ht); *arg = (unsigned long)ht; + + u32_replace_hw_hnode(tp, ht); return 0; } @@ -877,7 +972,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(n->next, pins); rcu_assign_pointer(*ins, n); - + u32_replace_hw_knode(tp, n); *arg = (unsigned long)n; return 0; } -- cgit v1.2.3-71-gd317 From 871b642adebe300be2e50aa5f65a418510f636ec Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 26 Feb 2016 10:45:37 +0100 Subject: netdev: introduce ndo_set_rx_headroom This method allows the controlling device (i.e. the bridge) to specify additional headroom to be allocated for skb head on frame reception. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/netdevice.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e52077ffe5ed..efe7cec111fa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1093,6 +1093,12 @@ struct tc_to_netdev { * This function is used to get egress tunnel information for given skb. * This is useful for retrieving outer tunnel header parameters while * sampling packet. + * void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom); + * This function is used to specify the headroom that the skb must + * consider when allocation skb during packet reception. Setting + * appropriate rx headroom value allows avoiding skb head copy on + * forward. Setting a negative value reset the rx headroom to the + * default value. * */ struct net_device_ops { @@ -1278,6 +1284,8 @@ struct net_device_ops { bool proto_down); int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb); + void (*ndo_set_rx_headroom)(struct net_device *dev, + int needed_headroom); }; /** @@ -1315,6 +1323,8 @@ struct net_device_ops { * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device * @IFF_TEAM: device is a team device * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured + * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external + * entity (i.e. the master device for bridged veth) */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1343,6 +1353,7 @@ enum netdev_priv_flags { IFF_L3MDEV_SLAVE = 1<<23, IFF_TEAM = 1<<24, IFF_RXFH_CONFIGURED = 1<<25, + IFF_PHONY_HEADROOM = 1<<26, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1937,6 +1948,26 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, void *accel_priv); +/* returns the headroom that the master device needs to take in account + * when forwarding to this dev + */ +static inline unsigned netdev_get_fwd_headroom(struct net_device *dev) +{ + return dev->priv_flags & IFF_PHONY_HEADROOM ? 0 : dev->needed_headroom; +} + +static inline void netdev_set_rx_headroom(struct net_device *dev, int new_hr) +{ + if (dev->netdev_ops->ndo_set_rx_headroom) + dev->netdev_ops->ndo_set_rx_headroom(dev, new_hr); +} + +/* set the device rx headroom to the dev's default */ +static inline void netdev_reset_rx_headroom(struct net_device *dev) +{ + netdev_set_rx_headroom(dev, -1); +} + /* * Net namespace inlines */ -- cgit v1.2.3-71-gd317 From 2793a23aacbd754dbbb5cb75093deb7e4103bace Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 9 Mar 2016 21:58:32 -0500 Subject: net: validate variable length ll headers Netdevice parameter hard_header_len is variously interpreted both as an upper and lower bound on link layer header length. The field is used as upper bound when reserving room at allocation, as lower bound when validating user input in PF_PACKET. Clarify the definition to be maximum header length. For validation of untrusted headers, add an optional validate member to header_ops. Allow bypassing of validation by passing CAP_SYS_RAWIO, for instance for deliberate testing of corrupt input. In this case, pad trailing bytes, as some device drivers expect completely initialized headers. See also http://comments.gmane.org/gmane.linux.network/401064 Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/netdevice.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index efe7cec111fa..fd30cb545c45 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -268,6 +268,7 @@ struct header_ops { void (*cache_update)(struct hh_cache *hh, const struct net_device *dev, const unsigned char *haddr); + bool (*validate)(const char *ll_header, unsigned int len); }; /* These flag bits are private to the generic network queueing @@ -1459,8 +1460,7 @@ enum netdev_priv_flags { * @dma: DMA channel * @mtu: Interface MTU value * @type: Interface hardware type - * @hard_header_len: Hardware header length, which means that this is the - * minimum size of a packet. + * @hard_header_len: Maximum hardware header length. * * @needed_headroom: Extra headroom the hardware may need, but not in all * cases can this be guaranteed @@ -2687,6 +2687,24 @@ static inline int dev_parse_header(const struct sk_buff *skb, return dev->header_ops->parse(skb, haddr); } +/* ll_header must have at least hard_header_len allocated */ +static inline bool dev_validate_header(const struct net_device *dev, + char *ll_header, int len) +{ + if (likely(len >= dev->hard_header_len)) + return true; + + if (capable(CAP_SYS_RAWIO)) { + memset(ll_header + len, 0, dev->hard_header_len - len); + return true; + } + + if (dev->header_ops && dev->header_ops->validate) + return dev->header_ops->validate(ll_header, len); + + return false; +} + typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len); int register_gifconf(unsigned int family, gifconf_func_t *gifconf); static inline int unregister_gifconf(unsigned int family) -- cgit v1.2.3-71-gd317 From 5b33f48842fa1e13e9c0ea8cc59c1d0df19042db Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Tue, 8 Mar 2016 12:42:29 +0200 Subject: net/flower: Introduce hardware offload support This patch is based on a patch made by John Fastabend. It adds support for offloading cls_flower. when NETIF_F_HW_TC is on: flags = 0 => Rule will be processed twice - by hardware, and if still relevant, by software. flags = SKIP_HW => Rull will be processed by software only If hardware fail/not capabale to apply the rule, operation will NOT fail. Filter will be processed by SW only. Acked-by: Jiri Pirko Suggested-by: John Fastabend Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ include/net/pkt_cls.h | 14 ++++++++++ include/uapi/linux/pkt_cls.h | 2 ++ net/sched/cls_flower.c | 64 +++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 81 insertions(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fd30cb545c45..41df0b450757 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -786,6 +786,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, enum { TC_SETUP_MQPRIO, TC_SETUP_CLSU32, + TC_SETUP_CLSFLOWER, }; struct tc_cls_u32_offload; @@ -795,6 +796,7 @@ struct tc_to_netdev { union { u8 tc; struct tc_cls_u32_offload *cls_u32; + struct tc_cls_flower_offload *cls_flower; }; }; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index bea14eee373e..5b4e8f08b8f0 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -409,4 +409,18 @@ static inline bool tc_should_offload(struct net_device *dev, u32 flags) return true; } +enum tc_fl_command { + TC_CLSFLOWER_REPLACE, + TC_CLSFLOWER_DESTROY, +}; + +struct tc_cls_flower_offload { + enum tc_fl_command command; + u64 cookie; + struct flow_dissector *dissector; + struct fl_flow_key *mask; + struct fl_flow_key *key; + struct tcf_exts *exts; +}; + #endif diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 9874f5680926..c43c5f78b9c4 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -417,6 +417,8 @@ enum { TCA_FLOWER_KEY_TCP_DST, /* be16 */ TCA_FLOWER_KEY_UDP_SRC, /* be16 */ TCA_FLOWER_KEY_UDP_DST, /* be16 */ + + TCA_FLOWER_FLAGS, __TCA_FLOWER_MAX, }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 95b021243233..25d87666bf1e 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -165,6 +165,51 @@ static void fl_destroy_filter(struct rcu_head *head) kfree(f); } +static void fl_hw_destroy_filter(struct tcf_proto *tp, u64 cookie) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_flower_offload offload = {0}; + struct tc_to_netdev tc; + + if (!tc_should_offload(dev, 0)) + return; + + offload.command = TC_CLSFLOWER_DESTROY; + offload.cookie = cookie; + + tc.type = TC_SETUP_CLSFLOWER; + tc.cls_flower = &offload; + + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); +} + +static void fl_hw_replace_filter(struct tcf_proto *tp, + struct flow_dissector *dissector, + struct fl_flow_key *mask, + struct fl_flow_key *key, + struct tcf_exts *actions, + u64 cookie, u32 flags) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_flower_offload offload = {0}; + struct tc_to_netdev tc; + + if (!tc_should_offload(dev, flags)) + return; + + offload.command = TC_CLSFLOWER_REPLACE; + offload.cookie = cookie; + offload.dissector = dissector; + offload.mask = mask; + offload.key = key; + offload.exts = actions; + + tc.type = TC_SETUP_CLSFLOWER; + tc.cls_flower = &offload; + + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); +} + static bool fl_destroy(struct tcf_proto *tp, bool force) { struct cls_fl_head *head = rtnl_dereference(tp->root); @@ -174,6 +219,7 @@ static bool fl_destroy(struct tcf_proto *tp, bool force) return false; list_for_each_entry_safe(f, next, &head->filters, list) { + fl_hw_destroy_filter(tp, (u64)f); list_del_rcu(&f->list); call_rcu(&f->rcu, fl_destroy_filter); } @@ -459,6 +505,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, struct cls_fl_filter *fnew; struct nlattr *tb[TCA_FLOWER_MAX + 1]; struct fl_flow_mask mask = {}; + u32 flags = 0; int err; if (!tca[TCA_OPTIONS]) @@ -486,6 +533,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } fnew->handle = handle; + if (tb[TCA_FLOWER_FLAGS]) + flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]); + err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr); if (err) goto errout; @@ -498,9 +548,20 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, head->ht_params); if (err) goto errout; - if (fold) + + fl_hw_replace_filter(tp, + &head->dissector, + &mask.key, + &fnew->key, + &fnew->exts, + (u64)fnew, + flags); + + if (fold) { rhashtable_remove_fast(&head->ht, &fold->ht_node, head->ht_params); + fl_hw_destroy_filter(tp, (u64)fold); + } *arg = (unsigned long) fnew; @@ -527,6 +588,7 @@ static int fl_delete(struct tcf_proto *tp, unsigned long arg) rhashtable_remove_fast(&head->ht, &f->ht_node, head->ht_params); list_del_rcu(&f->list); + fl_hw_destroy_filter(tp, (u64)f); tcf_unbind_filter(tp, &f->res); call_rcu(&f->rcu, fl_destroy_filter); return 0; -- cgit v1.2.3-71-gd317 From 3c17578473b9be5a6e7680a45ea97e1d56e13249 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 11 Mar 2016 18:07:32 +0100 Subject: net: add MACsec netdevice priv_flags and helper Signed-off-by: Sabrina Dubroca Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 41df0b450757..be693b34662f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1328,6 +1328,7 @@ struct net_device_ops { * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external * entity (i.e. the master device for bridged veth) + * @IFF_MACSEC: device is a MACsec device */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1357,6 +1358,7 @@ enum netdev_priv_flags { IFF_TEAM = 1<<24, IFF_RXFH_CONFIGURED = 1<<25, IFF_PHONY_HEADROOM = 1<<26, + IFF_MACSEC = 1<<27, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1385,6 +1387,7 @@ enum netdev_priv_flags { #define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE #define IFF_TEAM IFF_TEAM #define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED +#define IFF_MACSEC IFF_MACSEC /** * struct net_device - The DEVICE structure. @@ -4045,6 +4048,11 @@ static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, skb->mac_len = mac_len; } +static inline bool netif_is_macsec(const struct net_device *dev) +{ + return dev->priv_flags & IFF_MACSEC; +} + static inline bool netif_is_macvlan(const struct net_device *dev) { return dev->priv_flags & IFF_MACVLAN; -- cgit v1.2.3-71-gd317