From 52bd2d62ce6758d811edcbd2256eb9ea7f6a56cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:30:50 -0800 Subject: net: better skb->sender_cpu and skb->napi_id cohabitation skb->sender_cpu and skb->napi_id share a common storage, and we had various bugs about this. We had to call skb_sender_cpu_clear() in some places to not leave a prior skb->napi_id and fool netdev_pick_tx() As suggested by Alexei, we could split the space so that these errors can not happen. 0 value being reserved as the common (not initialized) value, let's reserve [1 .. NR_CPUS] range for valid sender_cpu, and [NR_CPUS+1 .. ~0U] for valid napi_id. This will allow proper busy polling support over tunnels. Signed-off-by: Eric Dumazet Suggested-by: Alexei Starovoitov Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4355129fff91..c9c394bf0771 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1082,9 +1082,6 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) static inline void skb_sender_cpu_clear(struct sk_buff *skb) { -#ifdef CONFIG_XPS - skb->sender_cpu = 0; -#endif } #ifdef NET_SKBUFF_DATA_USES_OFFSET -- cgit v1.2.3-71-gd317 From 02d62e86fe892c59a1259d089d4d16ac76977a37 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:30:52 -0800 Subject: net: un-inline sk_busy_loop() There is really little gain from inlining this big function. We'll soon make it even bigger in following patches. This means we no longer need to export napi_by_id() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 --------- include/net/busy_poll.h | 45 +----------------------------------------- net/core/dev.c | 50 +++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 49 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 67bfac1abfc1..2020a89df12b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -460,15 +460,6 @@ static inline void napi_complete(struct napi_struct *n) return napi_complete_done(n, 0); } -/** - * napi_by_id - lookup a NAPI by napi_id - * @napi_id: hashed napi_id - * - * lookup @napi_id in napi_hash table - * must be called under rcu_read_lock() - */ -struct napi_struct *napi_by_id(unsigned int napi_id); - /** * napi_hash_add - add a NAPI to global hashtable * @napi: napi context diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 1d67fb6b23a0..2fbeb1313c0f 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -72,50 +72,7 @@ static inline bool busy_loop_timeout(unsigned long end_time) return time_after(now, end_time); } -/* when used in sock_poll() nonblock is known at compile time to be true - * so the loop and end_time will be optimized out - */ -static inline bool sk_busy_loop(struct sock *sk, int nonblock) -{ - unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; - const struct net_device_ops *ops; - struct napi_struct *napi; - int rc = false; - - /* - * rcu read lock for napi hash - * bh so we don't race with net_rx_action - */ - rcu_read_lock_bh(); - - napi = napi_by_id(sk->sk_napi_id); - if (!napi) - goto out; - - ops = napi->dev->netdev_ops; - if (!ops->ndo_busy_poll) - goto out; - - do { - rc = ops->ndo_busy_poll(napi); - - if (rc == LL_FLUSH_FAILED) - break; /* permanent failure */ - - if (rc > 0) - /* local bh are disabled so it is ok to use _BH */ - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_BUSYPOLLRXPACKETS, rc); - cpu_relax(); - - } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && - !need_resched() && !busy_loop_timeout(end_time)); - - rc = !skb_queue_empty(&sk->sk_receive_queue); -out: - rcu_read_unlock_bh(); - return rc; -} +bool sk_busy_loop(struct sock *sk, int nonblock); /* used in the NIC receive handler to mark the skb */ static inline void skb_mark_napi_id(struct sk_buff *skb, diff --git a/net/core/dev.c b/net/core/dev.c index 2582c24a75c6..74a816b299df 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -96,6 +96,7 @@ #include #include #include +#include #include #include #include @@ -4663,7 +4664,7 @@ void napi_complete_done(struct napi_struct *n, int work_done) EXPORT_SYMBOL(napi_complete_done); /* must be called under rcu_read_lock(), as we dont take a reference */ -struct napi_struct *napi_by_id(unsigned int napi_id) +static struct napi_struct *napi_by_id(unsigned int napi_id) { unsigned int hash = napi_id % HASH_SIZE(napi_hash); struct napi_struct *napi; @@ -4674,7 +4675,52 @@ struct napi_struct *napi_by_id(unsigned int napi_id) return NULL; } -EXPORT_SYMBOL_GPL(napi_by_id); + +#if defined(CONFIG_NET_RX_BUSY_POLL) +bool sk_busy_loop(struct sock *sk, int nonblock) +{ + unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; + const struct net_device_ops *ops; + struct napi_struct *napi; + int rc = false; + + /* + * rcu read lock for napi hash + * bh so we don't race with net_rx_action + */ + rcu_read_lock_bh(); + + napi = napi_by_id(sk->sk_napi_id); + if (!napi) + goto out; + + ops = napi->dev->netdev_ops; + if (!ops->ndo_busy_poll) + goto out; + + do { + rc = ops->ndo_busy_poll(napi); + + if (rc == LL_FLUSH_FAILED) + break; /* permanent failure */ + + if (rc > 0) + /* local bh are disabled so it is ok to use _BH */ + NET_ADD_STATS_BH(sock_net(sk), + LINUX_MIB_BUSYPOLLRXPACKETS, rc); + cpu_relax(); + + } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && + !need_resched() && !busy_loop_timeout(end_time)); + + rc = !skb_queue_empty(&sk->sk_receive_queue); +out: + rcu_read_unlock_bh(); + return rc; +} +EXPORT_SYMBOL(sk_busy_loop); + +#endif /* CONFIG_NET_RX_BUSY_POLL */ void napi_hash_add(struct napi_struct *napi) { -- cgit v1.2.3-71-gd317 From d64b5e85bfe2fe4c790abcbd16d9ae32391ddd7e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:31:00 -0800 Subject: net: add netif_tx_napi_add() netif_tx_napi_add() is a variant of netif_napi_add() It should be used by drivers that use a napi structure to exclusively poll TX. We do not want to add this kind of napi in napi_hash[] in following patches, adding generic busy polling to all NAPI drivers. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 2 +- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++-- .../net/ethernet/freescale/fs_enet/fs_enet-main.c | 2 +- drivers/net/ethernet/freescale/gianfar.c | 4 ++-- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 4 ++-- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 4 ++-- drivers/net/ethernet/rocker/rocker.c | 2 +- drivers/net/ethernet/ti/cpsw.c | 2 +- drivers/net/ethernet/ti/netcp_core.c | 2 +- drivers/net/wireless/ath/wil6210/netdev.c | 2 +- include/linux/netdevice.h | 23 +++++++++++++++++++++- net/core/dev.c | 3 ++- 12 files changed, 38 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 858106352ce9..993c780bdfab 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1216,7 +1216,7 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv, /* Initialize SW view of the ring */ spin_lock_init(&ring->lock); ring->priv = priv; - netif_napi_add(priv->netdev, &ring->napi, bcm_sysport_tx_poll, 64); + netif_tx_napi_add(priv->netdev, &ring->napi, bcm_sysport_tx_poll, 64); ring->index = index; ring->size = size; ring->alloc_size = ring->size; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 17f017ab4dac..b15a60d787c7 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2041,11 +2041,11 @@ static void bcmgenet_init_tx_napi(struct bcmgenet_priv *priv) for (i = 0; i < priv->hw_params->tx_queues; ++i) { ring = &priv->tx_rings[i]; - netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); + netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); } ring = &priv->tx_rings[DESC_INDEX]; - netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); + netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); } static void bcmgenet_enable_tx_napi(struct bcmgenet_priv *priv) diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index cf8e54652df9..48a9c176e0d1 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -1050,7 +1050,7 @@ static int fs_enet_probe(struct platform_device *ofdev) ndev->netdev_ops = &fs_enet_netdev_ops; ndev->watchdog_timeo = 2 * HZ; netif_napi_add(ndev, &fep->napi, fs_enet_rx_napi, fpi->napi_weight); - netif_napi_add(ndev, &fep->napi_tx, fs_enet_tx_napi, 2); + netif_tx_napi_add(ndev, &fep->napi_tx, fs_enet_tx_napi, 2); ndev->ethtool_ops = &fs_ethtool_ops; diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 3e6b9b437497..c8bc43e99a35 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1347,12 +1347,12 @@ static int gfar_probe(struct platform_device *ofdev) if (priv->poll_mode == GFAR_SQ_POLLING) { netif_napi_add(dev, &priv->gfargrp[i].napi_rx, gfar_poll_rx_sq, GFAR_DEV_WEIGHT); - netif_napi_add(dev, &priv->gfargrp[i].napi_tx, + netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, gfar_poll_tx_sq, 2); } else { netif_napi_add(dev, &priv->gfargrp[i].napi_rx, gfar_poll_rx, GFAR_DEV_WEIGHT); - netif_napi_add(dev, &priv->gfargrp[i].napi_tx, + netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, gfar_poll_tx, 2); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index eb8a4988de63..3a6176fea78d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -156,8 +156,8 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, cq->mcq.event = mlx4_en_cq_event; if (cq->is_tx) { - netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, - NAPI_POLL_WEIGHT); + netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, + NAPI_POLL_WEIGHT); } else { netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64); napi_hash_add(&cq->napi); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index d4b5085a21fa..7bd6f25b4625 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -1604,7 +1604,7 @@ int qlcnic_82xx_napi_add(struct qlcnic_adapter *adapter, if (qlcnic_check_multi_tx(adapter) && !adapter->ahw->diag_test) { for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; - netif_napi_add(netdev, &tx_ring->napi, qlcnic_tx_poll, + netif_tx_napi_add(netdev, &tx_ring->napi, qlcnic_tx_poll, NAPI_POLL_WEIGHT); } } @@ -2135,7 +2135,7 @@ int qlcnic_83xx_napi_add(struct qlcnic_adapter *adapter, !(adapter->flags & QLCNIC_TX_INTR_SHARED)) { for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; - netif_napi_add(netdev, &tx_ring->napi, + netif_tx_napi_add(netdev, &tx_ring->napi, qlcnic_83xx_msix_tx_poll, NAPI_POLL_WEIGHT); } diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index e9f2349e98bc..a4ab71d43e4e 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4998,7 +4998,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) dev->netdev_ops = &rocker_port_netdev_ops; dev->ethtool_ops = &rocker_port_ethtool_ops; dev->switchdev_ops = &rocker_port_switchdev_ops; - netif_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx, + netif_tx_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx, NAPI_POLL_WEIGHT); netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx, NAPI_POLL_WEIGHT); diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 48b92c9de12a..15322c08de80 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2469,7 +2469,7 @@ static int cpsw_probe(struct platform_device *pdev) ndev->netdev_ops = &cpsw_netdev_ops; ndev->ethtool_ops = &cpsw_ethtool_ops; netif_napi_add(ndev, &priv->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT); - netif_napi_add(ndev, &priv->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT); + netif_tx_napi_add(ndev, &priv->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT); /* register the network device */ SET_NETDEV_DEV(ndev, &pdev->dev); diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 37b9b39192ec..e5e20e734f21 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1990,7 +1990,7 @@ static int netcp_create_interface(struct netcp_device *netcp_device, /* NAPI register */ netif_napi_add(ndev, &netcp->rx_napi, netcp_rx_poll, NETCP_NAPI_WEIGHT); - netif_napi_add(ndev, &netcp->tx_napi, netcp_tx_poll, NETCP_NAPI_WEIGHT); + netif_tx_napi_add(ndev, &netcp->tx_napi, netcp_tx_poll, NETCP_NAPI_WEIGHT); /* Register the network device */ ndev->dev_id = 0; diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c index e3b3c8fb4605..56aaa2d4fb0e 100644 --- a/drivers/net/wireless/ath/wil6210/netdev.c +++ b/drivers/net/wireless/ath/wil6210/netdev.c @@ -183,7 +183,7 @@ void *wil_if_alloc(struct device *dev) netif_napi_add(ndev, &wil->napi_rx, wil6210_netdev_poll_rx, WIL6210_NAPI_BUDGET); - netif_napi_add(ndev, &wil->napi_tx, wil6210_netdev_poll_tx, + netif_tx_napi_add(ndev, &wil->napi_tx, wil6210_netdev_poll_tx, WIL6210_NAPI_BUDGET); netif_tx_stop_all_queues(ndev); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2020a89df12b..838935d1cdbb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -326,7 +326,8 @@ enum { NAPI_STATE_SCHED, /* Poll is scheduled */ NAPI_STATE_DISABLE, /* Disable pending */ NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ - NAPI_STATE_HASHED, /* In NAPI hash */ + NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ + NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ }; enum gro_result { @@ -1938,6 +1939,26 @@ static inline void *netdev_priv(const struct net_device *dev) void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight); +/** + * netif_tx_napi_add - initialize a napi context + * @dev: network device + * @napi: napi context + * @poll: polling function + * @weight: default weight + * + * This variant of netif_napi_add() should be used from drivers using NAPI + * to exclusively poll a TX queue. + * This will avoid we add it into napi_hash[], thus polluting this hash table. + */ +static inline void netif_tx_napi_add(struct net_device *dev, + struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), + int weight) +{ + set_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state); + netif_napi_add(dev, napi, poll, weight); +} + /** * netif_napi_del - remove a napi context * @napi: napi context diff --git a/net/core/dev.c b/net/core/dev.c index 83b48747928c..ff58a8bc5e3c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4737,7 +4737,8 @@ EXPORT_SYMBOL(sk_busy_loop); void napi_hash_add(struct napi_struct *napi) { - if (test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) + if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) || + test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) return; spin_lock(&napi_hash_lock); -- cgit v1.2.3-71-gd317 From 6180d9de61a5c461f9e3efef5417a844701dbbb2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:31:01 -0800 Subject: net: move napi_hash[] into read mostly section We do not often add/delete a napi context. Moving napi_hash[] into read mostly section avoids potential false sharing. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/hashtable.h | 4 ++++ net/core/dev.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h index 519b6e2d769e..661e5c2a8e2a 100644 --- a/include/linux/hashtable.h +++ b/include/linux/hashtable.h @@ -16,6 +16,10 @@ struct hlist_head name[1 << (bits)] = \ { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } +#define DEFINE_READ_MOSTLY_HASHTABLE(name, bits) \ + struct hlist_head name[1 << (bits)] __read_mostly = \ + { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } + #define DECLARE_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] diff --git a/net/core/dev.c b/net/core/dev.c index ff58a8bc5e3c..02dfbd91a8e4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -184,7 +184,7 @@ EXPORT_SYMBOL(dev_base_lock); static DEFINE_SPINLOCK(napi_hash_lock); static unsigned int napi_gen_id = NR_CPUS; -static DEFINE_HASHTABLE(napi_hash, 8); +static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8); static seqcount_t devnet_rename_seq; -- cgit v1.2.3-71-gd317 From 34cbe27e811c591c854a39c0dee1b461bb796953 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:31:02 -0800 Subject: net: napi_hash_del() returns a boolean status napi_hash_del() will soon be used from both drivers (if they want) or core networking stack. Callers are responsibles to ensure an RCU grace period is respected before freeing napi structure : napi_hash_del() can signal if this RCU grace period is needed or not. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++-- net/core/dev.c | 10 +++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 838935d1cdbb..e5c33b29471b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -474,9 +474,10 @@ void napi_hash_add(struct napi_struct *napi); * @napi: napi context * * Warning: caller must observe rcu grace period - * before freeing memory containing @napi + * before freeing memory containing @napi, if + * this function returns true. */ -void napi_hash_del(struct napi_struct *napi); +bool napi_hash_del(struct napi_struct *napi); /** * napi_disable - prevent NAPI from scheduling diff --git a/net/core/dev.c b/net/core/dev.c index 02dfbd91a8e4..59dddac1c2e7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4760,14 +4760,18 @@ EXPORT_SYMBOL_GPL(napi_hash_add); /* Warning : caller is responsible to make sure rcu grace period * is respected before freeing memory containing @napi */ -void napi_hash_del(struct napi_struct *napi) +bool napi_hash_del(struct napi_struct *napi) { + bool rcu_sync_needed = false; + spin_lock(&napi_hash_lock); - if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) + if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) { + rcu_sync_needed = true; hlist_del_rcu(&napi->napi_hash_node); - + } spin_unlock(&napi_hash_lock); + return rcu_sync_needed; } EXPORT_SYMBOL_GPL(napi_hash_del); -- cgit v1.2.3-71-gd317 From 93d05d4a320cb16712bb3d57a9658f395d8cecb9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:31:03 -0800 Subject: net: provide generic busy polling to all NAPI drivers NAPI drivers no longer need to observe a particular protocol to benefit from busy polling (CONFIG_NET_RX_BUSY_POLL=y) napi_hash_add() and napi_hash_del() are automatically called from core networking stack, respectively from netif_napi_add() and netif_napi_del() This patch depends on free_netdev() and netif_napi_del() being called from process context, which seems to be the norm. Drivers might still prefer to call napi_hash_del() on their own, since they might combine all the rcu grace periods into a single one, knowing their NAPI structures lifetime, while core networking stack has no idea of a possible combining. Once this patch proves to not bring serious regressions, we will cleanup drivers to either remove napi_hash_del() or provide appropriate rcu grace periods combining. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 -- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 -- drivers/net/ethernet/chelsio/cxgb4/sge.c | 1 - drivers/net/ethernet/cisco/enic/enic_main.c | 2 -- drivers/net/ethernet/emulex/benet/be_main.c | 1 - drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 1 - drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 3 --- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 6 ++---- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 - drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 1 - drivers/net/ethernet/sfc/efx.c | 1 - drivers/net/virtio_net.c | 1 - include/linux/netdevice.h | 7 +++++++ net/core/dev.c | 7 +++++++ 14 files changed, 16 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index ab9222924bd9..d9add7c02e42 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -46,7 +46,6 @@ static void bnx2x_add_all_napi_cnic(struct bnx2x *bp) for_each_rx_queue_cnic(bp, i) { netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi), bnx2x_poll, NAPI_POLL_WEIGHT); - napi_hash_add(&bnx2x_fp(bp, i, napi)); } } @@ -58,7 +57,6 @@ static void bnx2x_add_all_napi(struct bnx2x *bp) for_each_eth_queue(bp, i) { netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi), bnx2x_poll, NAPI_POLL_WEIGHT); - napi_hash_add(&bnx2x_fp(bp, i, napi)); } } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index db15c5ee09c5..f2d0dc9b1c41 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4227,12 +4227,10 @@ static void bnxt_init_napi(struct bnxt *bp) bnapi = bp->bnapi[i]; netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll, 64); - napi_hash_add(&bnapi->napi); } } else { bnapi = bp->bnapi[0]; netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll, 64); - napi_hash_add(&bnapi->napi); } } diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index f650f295f264..48d8fbb1c220 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2527,7 +2527,6 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, goto err; netif_napi_add(dev, &iq->napi, napi_rx_handler, 64); - napi_hash_add(&iq->napi); iq->cur_desc = iq->desc; iq->cidx = 0; iq->gen = 1; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index b36643ef0593..b2182d3ba3cc 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2458,13 +2458,11 @@ static int enic_dev_init(struct enic *enic) switch (vnic_dev_get_intr_mode(enic->vdev)) { default: netif_napi_add(netdev, &enic->napi[0], enic_poll, 64); - napi_hash_add(&enic->napi[0]); break; case VNIC_DEV_INTR_MODE_MSIX: for (i = 0; i < enic->rq_count; i++) { netif_napi_add(netdev, &enic->napi[i], enic_poll_msix_rq, NAPI_POLL_WEIGHT); - napi_hash_add(&enic->napi[i]); } for (i = 0; i < enic->wq_count; i++) netif_napi_add(netdev, &enic->napi[enic_cq_wq(enic, i)], diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index c29d62496ad9..4cab8879f5ae 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2630,7 +2630,6 @@ static int be_evt_queues_create(struct be_adapter *adapter) eqo->affinity_mask); netif_napi_add(adapter->netdev, &eqo->napi, be_poll, BE_NAPI_WEIGHT); - napi_hash_add(&eqo->napi); } return 0; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index f3168bcc7d87..e771e764daa3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -844,7 +844,6 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, /* initialize NAPI */ netif_napi_add(adapter->netdev, &q_vector->napi, ixgbe_poll, 64); - napi_hash_add(&q_vector->napi); #ifdef CONFIG_NET_RX_BUSY_POLL /* initialize busy poll */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 592ff237d692..2955186cd4f6 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2483,9 +2483,6 @@ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter) q_vector->v_idx = q_idx; netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll, 64); -#ifdef CONFIG_NET_RX_BUSY_POLL - napi_hash_add(&q_vector->napi); -#endif adapter->q_vector[q_idx] = q_vector; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 3a6176fea78d..af975a2b74c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -155,13 +155,11 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq; cq->mcq.event = mlx4_en_cq_event; - if (cq->is_tx) { + if (cq->is_tx) netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, NAPI_POLL_WEIGHT); - } else { + else netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64); - napi_hash_add(&cq->napi); - } napi_enable(&cq->napi); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ffb1f9c1b973..f6a8cc787603 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -982,7 +982,6 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, mlx5e_build_channeltc_to_txq_map(priv, ix); netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); - napi_hash_add(&c->napi); err = mlx5e_open_tx_cqs(c, cparam); if (err) diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index acf866147d65..270c9eeb7ab6 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -3814,7 +3814,6 @@ static int myri10ge_alloc_slices(struct myri10ge_priv *mgp) ss->dev = mgp->dev; netif_napi_add(ss->dev, &ss->napi, myri10ge_poll, myri10ge_napi_weight); - napi_hash_add(&ss->napi); } return 0; abort: diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index a3c42a376741..4e82bcfbe3e0 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2059,7 +2059,6 @@ static void efx_init_napi_channel(struct efx_channel *channel) channel->napi_dev = efx->net_dev; netif_napi_add(channel->napi_dev, &channel->napi_str, efx_poll, napi_weight); - napi_hash_add(&channel->napi_str); efx_channel_busy_poll_init(channel); } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index d1d14cecf450..b1ae4cbf2453 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1610,7 +1610,6 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) vi->rq[i].pages = NULL; netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll, napi_weight); - napi_hash_add(&vi->rq[i].napi); sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e5c33b29471b..7d2d1d7aaec7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -466,6 +466,9 @@ static inline void napi_complete(struct napi_struct *n) * @napi: napi context * * generate a new napi_id and store a @napi under it in napi_hash + * Used for busy polling (CONFIG_NET_RX_BUSY_POLL) + * Note: This is normally automatically done from netif_napi_add(), + * so might disappear in a future linux version. */ void napi_hash_add(struct napi_struct *napi); @@ -476,6 +479,10 @@ void napi_hash_add(struct napi_struct *napi); * Warning: caller must observe rcu grace period * before freeing memory containing @napi, if * this function returns true. + * Note: core networking stack automatically calls it + * from netif_napi_del() + * Drivers might want to call this helper to combine all + * the needed rcu grace periods into a single one. */ bool napi_hash_del(struct napi_struct *napi); diff --git a/net/core/dev.c b/net/core/dev.c index 59dddac1c2e7..41cef3e3f558 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4807,6 +4807,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, napi->poll_owner = -1; #endif set_bit(NAPI_STATE_SCHED, &napi->state); + napi_hash_add(napi); } EXPORT_SYMBOL(netif_napi_add); @@ -4826,8 +4827,12 @@ void napi_disable(struct napi_struct *n) } EXPORT_SYMBOL(napi_disable); +/* Must be called in process context */ void netif_napi_del(struct napi_struct *napi) { + might_sleep(); + if (napi_hash_del(napi)) + synchronize_net(); list_del_init(&napi->dev_list); napi_free_frags(napi); @@ -7227,11 +7232,13 @@ EXPORT_SYMBOL(alloc_netdev_mqs); * This function does the last stage of destroying an allocated device * interface. The reference to the device object is released. * If this is the last reference then it will be freed. + * Must be called in process context. */ void free_netdev(struct net_device *dev) { struct napi_struct *p, *n; + might_sleep(); netif_free_tx_queues(dev); #ifdef CONFIG_SYSFS kvfree(dev->_rx); -- cgit v1.2.3-71-gd317 From a8acce6aa584aa731a2bed240bcd8dc955f01414 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 19 Nov 2015 12:53:21 +0100 Subject: ppp: remove PPPOX_ZOMBIE socket state PPPOX_ZOMBIE is never set anymore. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/pppoe.c | 4 ++-- drivers/net/ppp/pppox.c | 2 +- include/linux/if_pppox.h | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 1dedfbf1d423..277e6827d7cd 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -311,7 +311,7 @@ static void pppoe_flush_dev(struct net_device *dev) lock_sock(sk); if (po->pppoe_dev == dev && - sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND | PPPOX_ZOMBIE)) { + sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { pppox_unbind_sock(sk); sk->sk_state_change(sk); po->pppoe_dev = NULL; @@ -775,7 +775,7 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd, struct pppox_sock *relay_po; err = -EBUSY; - if (sk->sk_state & (PPPOX_BOUND | PPPOX_ZOMBIE | PPPOX_DEAD)) + if (sk->sk_state & (PPPOX_BOUND | PPPOX_DEAD)) break; err = -ENOTCONN; diff --git a/drivers/net/ppp/pppox.c b/drivers/net/ppp/pppox.c index 0e1b30622477..0200de74eebc 100644 --- a/drivers/net/ppp/pppox.c +++ b/drivers/net/ppp/pppox.c @@ -58,7 +58,7 @@ void pppox_unbind_sock(struct sock *sk) { /* Clear connection to ppp device, if attached. */ - if (sk->sk_state & (PPPOX_BOUND | PPPOX_CONNECTED | PPPOX_ZOMBIE)) { + if (sk->sk_state & (PPPOX_BOUND | PPPOX_CONNECTED)) { ppp_unregister_channel(&pppox_sk(sk)->chan); sk->sk_state = PPPOX_DEAD; } diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index b49cf923becc..ba7a9b0c7c57 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -91,7 +91,6 @@ enum { PPPOX_CONNECTED = 1, /* connection established ==TCP_ESTABLISHED */ PPPOX_BOUND = 2, /* bound to ppp device */ PPPOX_RELAY = 4, /* forwarding is enabled */ - PPPOX_ZOMBIE = 8, /* dead, but still bound to ppp device */ PPPOX_DEAD = 16 /* dead, useless, please clean me up!*/ }; -- cgit v1.2.3-71-gd317 From b11cfb5807e30333b36c02701382b820b7dcf0d5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Nov 2015 15:55:52 -0500 Subject: cgroup: record ancestor IDs and reimplement cgroup_is_descendant() using it cgroup_is_descendant() currently walks up the hierarchy and compares each ancestor to the cgroup in question. While enough for cgroup core usages, this can't be used in hot paths to test cgroup membership. This patch adds cgroup->ancestor_ids[] which records the IDs of all ancestors including self and cgroup->level for the nesting level. This allows testing whether a given cgroup is a descendant of another in three finite steps - testing whether the two belong to the same hierarchy, whether the descendant candidate is at the same or a higher level than the ancestor and comparing the recorded ancestor_id at the matching level. cgroup_is_descendant() is accordingly reimplmented and made inline. Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 14 ++++++++++++++ include/linux/cgroup.h | 18 +++++++++++++++++- kernel/cgroup.c | 32 ++++++++++---------------------- 3 files changed, 41 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 60d44b26276d..504d8591b6d3 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -234,6 +234,14 @@ struct cgroup { */ int id; + /* + * The depth this cgroup is at. The root is at depth zero and each + * step down the hierarchy increments the level. This along with + * ancestor_ids[] can determine whether a given cgroup is a + * descendant of another without traversing the hierarchy. + */ + int level; + /* * Each non-empty css_set associated with this cgroup contributes * one to populated_cnt. All children with non-zero popuplated_cnt @@ -289,6 +297,9 @@ struct cgroup { /* used to schedule release agent */ struct work_struct release_agent_work; + + /* ids of the ancestors at each level including self */ + int ancestor_ids[]; }; /* @@ -308,6 +319,9 @@ struct cgroup_root { /* The root cgroup. Root is destroyed on its release. */ struct cgroup cgrp; + /* for cgrp->ancestor_ids[0] */ + int cgrp_ancestor_id_storage; + /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ atomic_t nr_cgrps; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 22e3754f89c5..b5ee2c4210f9 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -81,7 +81,6 @@ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, struct cgroup_subsys *ss); -bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); @@ -459,6 +458,23 @@ static inline struct cgroup *task_cgroup(struct task_struct *task, return task_css(task, subsys_id)->cgroup; } +/** + * cgroup_is_descendant - test ancestry + * @cgrp: the cgroup to be tested + * @ancestor: possible ancestor of @cgrp + * + * Test whether @cgrp is a descendant of @ancestor. It also returns %true + * if @cgrp == @ancestor. This function is safe to call as long as @cgrp + * and @ancestor are accessible. + */ +static inline bool cgroup_is_descendant(struct cgroup *cgrp, + struct cgroup *ancestor) +{ + if (cgrp->root != ancestor->root || cgrp->level < ancestor->level) + return false; + return cgrp->ancestor_ids[ancestor->level] == ancestor->id; +} + /* no synchronization, the result can only be used as a hint */ static inline bool cgroup_is_populated(struct cgroup *cgrp) { diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f1603c153890..3190040792c8 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -459,25 +459,6 @@ struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) } EXPORT_SYMBOL_GPL(of_css); -/** - * cgroup_is_descendant - test ancestry - * @cgrp: the cgroup to be tested - * @ancestor: possible ancestor of @cgrp - * - * Test whether @cgrp is a descendant of @ancestor. It also returns %true - * if @cgrp == @ancestor. This function is safe to call as long as @cgrp - * and @ancestor are accessible. - */ -bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor) -{ - while (cgrp) { - if (cgrp == ancestor) - return true; - cgrp = cgroup_parent(cgrp); - } - return false; -} - static int notify_on_release(const struct cgroup *cgrp) { return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); @@ -1903,6 +1884,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask) if (ret < 0) goto out; root_cgrp->id = ret; + root_cgrp->ancestor_ids[0] = ret; ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0, GFP_KERNEL); @@ -4846,11 +4828,11 @@ err_free_css: static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) { - struct cgroup *parent, *cgrp; + struct cgroup *parent, *cgrp, *tcgrp; struct cgroup_root *root; struct cgroup_subsys *ss; struct kernfs_node *kn; - int ssid, ret; + int level, ssid, ret; /* Do not accept '\n' to prevent making /proc//cgroup unparsable. */ @@ -4861,9 +4843,11 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, if (!parent) return -ENODEV; root = parent->root; + level = parent->level + 1; /* allocate the cgroup and its ID, 0 is reserved for the root */ - cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL); + cgrp = kzalloc(sizeof(*cgrp) + + sizeof(cgrp->ancestor_ids[0]) * (level + 1), GFP_KERNEL); if (!cgrp) { ret = -ENOMEM; goto out_unlock; @@ -4887,6 +4871,10 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, cgrp->self.parent = &parent->self; cgrp->root = root; + cgrp->level = level; + + for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) + cgrp->ancestor_ids[tcgrp->level] = tcgrp->id; if (notify_on_release(parent)) set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); -- cgit v1.2.3-71-gd317 From bd96f76a2454c6b97d70945902e30b4c31510678 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Nov 2015 15:55:52 -0500 Subject: kernfs: implement kernfs_walk_and_get() Implement kernfs_walk_and_get() which is similar to kernfs_find_and_get() but can walk a path instead of just a name. v2: Use strlcpy() instead of strlen() + memcpy() as suggested by David. Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman Cc: David Miller --- fs/kernfs/dir.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/kernfs.h | 12 ++++++++++++ 2 files changed, 58 insertions(+) (limited to 'include/linux') diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 91e004518237..742bf4a230e8 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -694,6 +694,29 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, return NULL; } +static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, + const unsigned char *path, + const void *ns) +{ + static char path_buf[PATH_MAX]; /* protected by kernfs_mutex */ + size_t len = strlcpy(path_buf, path, PATH_MAX); + char *p = path_buf; + char *name; + + lockdep_assert_held(&kernfs_mutex); + + if (len >= PATH_MAX) + return NULL; + + while ((name = strsep(&p, "/")) && parent) { + if (*name == '\0') + continue; + parent = kernfs_find_ns(parent, name, ns); + } + + return parent; +} + /** * kernfs_find_and_get_ns - find and get kernfs_node with the given name * @parent: kernfs_node to search under @@ -718,6 +741,29 @@ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, } EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); +/** + * kernfs_walk_and_get_ns - find and get kernfs_node with the given path + * @parent: kernfs_node to search under + * @path: path to look for + * @ns: the namespace tag to use + * + * Look for kernfs_node with path @path under @parent and get a reference + * if found. This function may sleep and returns pointer to the found + * kernfs_node on success, %NULL on failure. + */ +struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, + const char *path, const void *ns) +{ + struct kernfs_node *kn; + + mutex_lock(&kernfs_mutex); + kn = kernfs_walk_ns(parent, path, ns); + kernfs_get(kn); + mutex_unlock(&kernfs_mutex); + + return kn; +} + /** * kernfs_create_root - create a new kernfs hierarchy * @scops: optional syscall operations for the hierarchy diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 5d4e9c4b821d..af51df35d749 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -274,6 +274,8 @@ void pr_cont_kernfs_path(struct kernfs_node *kn); struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn); struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns); +struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, + const char *path, const void *ns); void kernfs_get(struct kernfs_node *kn); void kernfs_put(struct kernfs_node *kn); @@ -350,6 +352,10 @@ static inline struct kernfs_node * kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns) { return NULL; } +static inline struct kernfs_node * +kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path, + const void *ns) +{ return NULL; } static inline void kernfs_get(struct kernfs_node *kn) { } static inline void kernfs_put(struct kernfs_node *kn) { } @@ -430,6 +436,12 @@ kernfs_find_and_get(struct kernfs_node *kn, const char *name) return kernfs_find_and_get_ns(kn, name, NULL); } +static inline struct kernfs_node * +kernfs_walk_and_get(struct kernfs_node *kn, const char *path) +{ + return kernfs_walk_and_get_ns(kn, path, NULL); +} + static inline struct kernfs_node * kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode, void *priv) -- cgit v1.2.3-71-gd317 From 16af439645455fbf36984ca5e72f31073ee19ab7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Nov 2015 15:55:52 -0500 Subject: cgroup: implement cgroup_get_from_path() and expose cgroup_put() Implement cgroup_get_from_path() using kernfs_walk_and_get() which obtains a default hierarchy cgroup from its path. This will be used to allow cgroup path based matching from outside cgroup proper - e.g. networking and perf. v2: Add EXPORT_SYMBOL_GPL(cgroup_get_from_path). Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 7 +++++++ kernel/cgroup.c | 39 ++++++++++++++++++++++++++++++++++----- 2 files changed, 41 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b5ee2c4210f9..4c3ffab81ba7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -81,6 +81,8 @@ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, struct cgroup_subsys *ss); +struct cgroup *cgroup_get_from_path(const char *path); + int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); @@ -351,6 +353,11 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) percpu_ref_put_many(&css->refcnt, n); } +static inline void cgroup_put(struct cgroup *cgrp) +{ + css_put(&cgrp->self); +} + /** * task_css_set_check - obtain a task's css_set with extra access conditions * @task: the task to obtain css_set for diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3190040792c8..3db5e8f5b702 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -434,11 +434,6 @@ static bool cgroup_tryget(struct cgroup *cgrp) return css_tryget(&cgrp->self); } -static void cgroup_put(struct cgroup *cgrp) -{ - css_put(&cgrp->self); -} - struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) { struct cgroup *cgrp = of->kn->parent->priv; @@ -5753,6 +5748,40 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss) return id > 0 ? idr_find(&ss->css_idr, id) : NULL; } +/** + * cgroup_get_from_path - lookup and get a cgroup from its default hierarchy path + * @path: path on the default hierarchy + * + * Find the cgroup at @path on the default hierarchy, increment its + * reference count and return it. Returns pointer to the found cgroup on + * success, ERR_PTR(-ENOENT) if @path doens't exist and ERR_PTR(-ENOTDIR) + * if @path points to a non-directory. + */ +struct cgroup *cgroup_get_from_path(const char *path) +{ + struct kernfs_node *kn; + struct cgroup *cgrp; + + mutex_lock(&cgroup_mutex); + + kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path); + if (kn) { + if (kernfs_type(kn) == KERNFS_DIR) { + cgrp = kn->priv; + cgroup_get(cgrp); + } else { + cgrp = ERR_PTR(-ENOTDIR); + } + kernfs_put(kn); + } else { + cgrp = ERR_PTR(-ENOENT); + } + + mutex_unlock(&cgroup_mutex); + return cgrp; +} +EXPORT_SYMBOL_GPL(cgroup_get_from_path); + #ifdef CONFIG_CGROUP_DEBUG static struct cgroup_subsys_state * debug_css_alloc(struct cgroup_subsys_state *parent_css) -- cgit v1.2.3-71-gd317 From f7ccdb96fa31305d480678b1ba81225907dd81ef Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 11 Nov 2015 20:17:37 -0200 Subject: netfilter: nf_ct_sctp: move ip_ct_sctp away from UAPI ip_ct_sctp is an internal structure, embedded by the union nf_conntrack_proto to store sctp-specific information at conntrack entries. It has no business with UAPI. This patch moves it from UAPI to a saner place, together with similar structs for other protocols. Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_sctp.h | 13 +++++++++++++ include/uapi/linux/netfilter/nf_conntrack_sctp.h | 12 +++--------- 2 files changed, 16 insertions(+), 9 deletions(-) create mode 100644 include/linux/netfilter/nf_conntrack_sctp.h (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sctp.h b/include/linux/netfilter/nf_conntrack_sctp.h new file mode 100644 index 000000000000..22a16a23cd8a --- /dev/null +++ b/include/linux/netfilter/nf_conntrack_sctp.h @@ -0,0 +1,13 @@ +#ifndef _NF_CONNTRACK_SCTP_H +#define _NF_CONNTRACK_SCTP_H +/* SCTP tracking. */ + +#include + +struct ip_ct_sctp { + enum sctp_conntrack state; + + __be32 vtag[IP_CT_DIR_MAX]; +}; + +#endif /* _NF_CONNTRACK_SCTP_H */ diff --git a/include/uapi/linux/netfilter/nf_conntrack_sctp.h b/include/uapi/linux/netfilter/nf_conntrack_sctp.h index ed4e776e1242..2cbc366c3fb4 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_sctp.h +++ b/include/uapi/linux/netfilter/nf_conntrack_sctp.h @@ -1,5 +1,5 @@ -#ifndef _NF_CONNTRACK_SCTP_H -#define _NF_CONNTRACK_SCTP_H +#ifndef _UAPI_NF_CONNTRACK_SCTP_H +#define _UAPI_NF_CONNTRACK_SCTP_H /* SCTP tracking. */ #include @@ -18,10 +18,4 @@ enum sctp_conntrack { SCTP_CONNTRACK_MAX }; -struct ip_ct_sctp { - enum sctp_conntrack state; - - __be32 vtag[IP_CT_DIR_MAX]; -}; - -#endif /* _NF_CONNTRACK_SCTP_H */ +#endif /* _UAPI_NF_CONNTRACK_SCTP_H */ -- cgit v1.2.3-71-gd317 From 9458ceab49179b7fd2d5192fd9dcf316ca195dc0 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 24 Nov 2015 15:30:21 -0800 Subject: net: phy: bcm7xxx: Add entry for Broadcom BCM7435 Add a PHY entry for the Broadcom BCM7435 chips, this is a 40nm generation Ethernet PHY which is analogous to its 7425 and 7429 counter parts. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/bcm7xxx.c | 14 ++++++++++++++ include/linux/brcmphy.h | 1 + 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 03d4809a9126..d4083c381cd1 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -360,6 +360,19 @@ static struct phy_driver bcm7xxx_driver[] = { .suspend = bcm7xxx_suspend, .resume = bcm7xxx_config_init, .driver = { .owner = THIS_MODULE }, +}, { + .phy_id = PHY_ID_BCM7435, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM7435", + .features = PHY_GBIT_FEATURES | + SUPPORTED_Pause | SUPPORTED_Asym_Pause, + .flags = PHY_IS_INTERNAL, + .config_init = bcm7xxx_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .suspend = bcm7xxx_suspend, + .resume = bcm7xxx_config_init, + .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_BCM_OUI_4, .phy_id_mask = 0xffff0000, @@ -395,6 +408,7 @@ static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = { { PHY_ID_BCM7425, 0xfffffff0, }, { PHY_ID_BCM7429, 0xfffffff0, }, { PHY_ID_BCM7439, 0xfffffff0, }, + { PHY_ID_BCM7435, 0xfffffff0, }, { PHY_ID_BCM7445, 0xfffffff0, }, { PHY_BCM_OUI_4, 0xffff0000 }, { PHY_BCM_OUI_5, 0xffffff00 }, diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 59f4a7304419..f0ba9c2ec639 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -26,6 +26,7 @@ #define PHY_ID_BCM7366 0x600d8490 #define PHY_ID_BCM7425 0x600d86b0 #define PHY_ID_BCM7429 0x600d8730 +#define PHY_ID_BCM7435 0x600d8750 #define PHY_ID_BCM7439 0x600d8480 #define PHY_ID_BCM7439_2 0xae025080 #define PHY_ID_BCM7445 0x600d8510 -- cgit v1.2.3-71-gd317 From 1ce0bf50ae2233c7115a18c0c623662d177b434c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 26 Nov 2015 13:55:39 +0800 Subject: net: Generalise wq_has_sleeper helper The memory barrier in the helper wq_has_sleeper is needed by just about every user of waitqueue_active. This patch generalises it by making it take a wait_queue_head_t directly. The existing helper is renamed to skwq_has_sleeper. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/algif_aead.c | 4 ++-- crypto/algif_skcipher.c | 4 ++-- include/linux/wait.h | 21 +++++++++++++++++++++ include/net/sock.h | 15 +++++---------- net/atm/common.c | 4 ++-- net/core/sock.c | 8 ++++---- net/core/stream.c | 2 +- net/dccp/output.c | 2 +- net/iucv/af_iucv.c | 2 +- net/rxrpc/af_rxrpc.c | 2 +- net/sctp/socket.c | 2 +- net/tipc/socket.c | 4 ++-- net/unix/af_unix.c | 2 +- 13 files changed, 44 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 0aa6fdfb448a..fb99f30849d2 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -106,7 +106,7 @@ static void aead_wmem_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); @@ -157,7 +157,7 @@ static void aead_data_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLRDNORM | POLLRDBAND); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index af31a0ee4057..0e6702e41472 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -238,7 +238,7 @@ static void skcipher_wmem_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); @@ -288,7 +288,7 @@ static void skcipher_data_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLRDNORM | POLLRDBAND); diff --git a/include/linux/wait.h b/include/linux/wait.h index 1e1bf9f963a9..6aa09a875fbd 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -107,6 +107,27 @@ static inline int waitqueue_active(wait_queue_head_t *q) return !list_empty(&q->task_list); } +/** + * wq_has_sleeper - check if there are any waiting processes + * @wq: wait queue head + * + * Returns true if wq has waiting processes + * + * Please refer to the comment for waitqueue_active. + */ +static inline bool wq_has_sleeper(wait_queue_head_t *wq) +{ + /* + * We need to be sure we are in sync with the + * add_wait_queue modifications to the wait queue. + * + * This memory barrier should be paired with one on the + * waiting side. + */ + smp_mb(); + return waitqueue_active(wq); +} + extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait); extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); diff --git a/include/net/sock.h b/include/net/sock.h index 7f89e4ba18d1..62d35afcb3ac 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -1879,12 +1880,12 @@ static inline bool sk_has_allocations(const struct sock *sk) } /** - * wq_has_sleeper - check if there are any waiting processes + * skwq_has_sleeper - check if there are any waiting processes * @wq: struct socket_wq * * Returns true if socket_wq has waiting processes * - * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory + * The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory * barrier call. They were added due to the race found within the tcp code. * * Consider following tcp code paths: @@ -1910,15 +1911,9 @@ static inline bool sk_has_allocations(const struct sock *sk) * data on the socket. * */ -static inline bool wq_has_sleeper(struct socket_wq *wq) +static inline bool skwq_has_sleeper(struct socket_wq *wq) { - /* We need to be sure we are in sync with the - * add_wait_queue modifications to the wait queue. - * - * This memory barrier is paired in the sock_poll_wait. - */ - smp_mb(); - return wq && waitqueue_active(&wq->wait); + return wq && wq_has_sleeper(&wq->wait); } /** diff --git a/net/atm/common.c b/net/atm/common.c index 49a872db7e42..6dc12305799e 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -96,7 +96,7 @@ static void vcc_def_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up(&wq->wait); rcu_read_unlock(); } @@ -117,7 +117,7 @@ static void vcc_write_space(struct sock *sk) if (vcc_writable(sk)) { wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); diff --git a/net/core/sock.c b/net/core/sock.c index 1e4dd54bfb5a..2769bd3a4d7c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2283,7 +2283,7 @@ static void sock_def_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_all(&wq->wait); rcu_read_unlock(); } @@ -2294,7 +2294,7 @@ static void sock_def_error_report(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_poll(&wq->wait, POLLERR); sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); rcu_read_unlock(); @@ -2306,7 +2306,7 @@ static void sock_def_readable(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); @@ -2324,7 +2324,7 @@ static void sock_def_write_space(struct sock *sk) */ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); diff --git a/net/core/stream.c b/net/core/stream.c index d70f77a0c889..8ff9d63b4265 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -35,7 +35,7 @@ void sk_stream_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) diff --git a/net/dccp/output.c b/net/dccp/output.c index 4ce912e691d0..b66c84db0766 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -201,7 +201,7 @@ void dccp_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible(&wq->wait); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index fcb2752419c6..4f0aa91470c6 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -303,7 +303,7 @@ static void iucv_sock_wake_msglim(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_all(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); rcu_read_unlock(); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 1f8a144a5dc2..7e2d1057d8bc 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -67,7 +67,7 @@ static void rxrpc_write_space(struct sock *sk) if (rxrpc_writable(sk)) { struct socket_wq *wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 897c01c029ca..ec10b66354b8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6978,7 +6978,7 @@ void sctp_data_ready(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 552dbaba9cf3..525acf6dd1c6 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1492,7 +1492,7 @@ static void tipc_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); rcu_read_unlock(); @@ -1509,7 +1509,7 @@ static void tipc_data_ready(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); rcu_read_unlock(); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 955ec152cb71..efb706e1d1c0 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -339,7 +339,7 @@ static void unix_write_space(struct sock *sk) rcu_read_lock(); if (unix_writable(sk)) { wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); -- cgit v1.2.3-71-gd317 From 06bd6c0370bb88a2256c6763a32bc4e4ade06521 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 26 Nov 2015 15:23:45 +0100 Subject: net: ipmr: remove unused MFC_NOTIFY flag and make the flags enum MFC_NOTIFY was introduced in kernel 2.1.68 but afaik it hasn't been used and I couldn't find any users currently so just remove it. Only MFC_STATIC is left, so move it into an enum, add a description and use BIT(). Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 10 +++++++--- net/ipv4/ipmr.c | 2 -- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 79aaa9fc1a15..fa66ebc1fed6 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -64,6 +64,13 @@ struct vif_device { #define VIFF_STATIC 0x8000 +/* mfc_flags: + * MFC_STATIC - the entry was added statically (not by a routing daemon) + */ +enum { + MFC_STATIC = BIT(0), +}; + struct mfc_cache { struct list_head list; __be32 mfc_mcastgrp; /* Group the entry belongs to */ @@ -89,9 +96,6 @@ struct mfc_cache { struct rcu_head rcu; }; -#define MFC_STATIC 1 -#define MFC_NOTIFY 2 - #define MFC_LINES 64 #ifdef __BIG_ENDIAN diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a2d248d9c35c..a74e61883b8f 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2199,8 +2199,6 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, } read_lock(&mrt_lock); - if (!nowait && (rtm->rtm_flags & RTM_F_NOTIFY)) - cache->mfc_flags |= MFC_NOTIFY; err = __ipmr_fill_mroute(mrt, skb, cache, rtm); read_unlock(&mrt_lock); rcu_read_unlock(); -- cgit v1.2.3-71-gd317 From 520191bb404c4b7b4cdb70a5480ada974b0c2d60 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 26 Nov 2015 15:23:46 +0100 Subject: net: ipmr: adjust mroute.h style and drop extern Remove extra spaces and tabs, adjust function definitions, remove an unnecessary ifdef (already used below, just move code) and drop extern from the functions. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 47 +++++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index fa66ebc1fed6..7c567a2679ce 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -9,38 +9,28 @@ #ifdef CONFIG_IP_MROUTE static inline int ip_mroute_opt(int opt) { - return (opt >= MRT_BASE) && (opt <= MRT_MAX); + return opt >= MRT_BASE && opt <= MRT_MAX; } -#else -static inline int ip_mroute_opt(int opt) -{ - return 0; -} -#endif -#ifdef CONFIG_IP_MROUTE -extern int ip_mroute_setsockopt(struct sock *, int, char __user *, unsigned int); -extern int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *); -extern int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg); -extern int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); -extern int ip_mr_init(void); +int ip_mroute_setsockopt(struct sock *, int, char __user *, unsigned int); +int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *); +int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg); +int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); +int ip_mr_init(void); #else -static inline -int ip_mroute_setsockopt(struct sock *sock, - int optname, char __user *optval, unsigned int optlen) +static inline int ip_mroute_setsockopt(struct sock *sock, int optname, + char __user *optval, unsigned int optlen) { return -ENOPROTOOPT; } -static inline -int ip_mroute_getsockopt(struct sock *sock, - int optname, char __user *optval, int __user *optlen) +static inline int ip_mroute_getsockopt(struct sock *sock, int optname, + char __user *optval, int __user *optlen) { return -ENOPROTOOPT; } -static inline -int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) +static inline int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) { return -ENOIOCTLCMD; } @@ -49,6 +39,11 @@ static inline int ip_mr_init(void) { return 0; } + +static inline int ip_mroute_opt(int opt) +{ + return 0; +} #endif struct vif_device { @@ -96,16 +91,16 @@ struct mfc_cache { struct rcu_head rcu; }; -#define MFC_LINES 64 +#define MFC_LINES 64 #ifdef __BIG_ENDIAN #define MFC_HASH(a,b) (((((__force u32)(__be32)a)>>24)^(((__force u32)(__be32)b)>>26))&(MFC_LINES-1)) #else #define MFC_HASH(a,b) ((((__force u32)(__be32)a)^(((__force u32)(__be32)b)>>2))&(MFC_LINES-1)) -#endif +#endif struct rtmsg; -extern int ipmr_get_route(struct net *net, struct sk_buff *skb, - __be32 saddr, __be32 daddr, - struct rtmsg *rtm, int nowait); +int ipmr_get_route(struct net *net, struct sk_buff *skb, + __be32 saddr, __be32 daddr, + struct rtmsg *rtm, int nowait); #endif -- cgit v1.2.3-71-gd317 From 5ea1f13299d8b8edcb2969eda4c81f8e3264b706 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 26 Nov 2015 15:23:47 +0100 Subject: net: ipmr: move struct mr_table and VIF_EXISTS to mroute.h Move the definitions of VIF_EXISTS() and struct mr_table to mroute.h Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 21 +++++++++++++++++++-- net/ipv4/ipmr.c | 18 ------------------ 2 files changed, 19 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 7c567a2679ce..bf9b322cb0b0 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -59,6 +59,25 @@ struct vif_device { #define VIFF_STATIC 0x8000 +#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) +#define MFC_LINES 64 + +struct mr_table { + struct list_head list; + possible_net_t net; + u32 id; + struct sock __rcu *mroute_sk; + struct timer_list ipmr_expire_timer; + struct list_head mfc_unres_queue; + struct list_head mfc_cache_array[MFC_LINES]; + struct vif_device vif_table[MAXVIFS]; + int maxvif; + atomic_t cache_resolve_queue_len; + bool mroute_do_assert; + bool mroute_do_pim; + int mroute_reg_vif_num; +}; + /* mfc_flags: * MFC_STATIC - the entry was added statically (not by a routing daemon) */ @@ -91,8 +110,6 @@ struct mfc_cache { struct rcu_head rcu; }; -#define MFC_LINES 64 - #ifdef __BIG_ENDIAN #define MFC_HASH(a,b) (((((__force u32)(__be32)a)>>24)^(((__force u32)(__be32)b)>>26))&(MFC_LINES-1)) #else diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a74e61883b8f..ff3dbbb9f11c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -67,22 +67,6 @@ #include #include -struct mr_table { - struct list_head list; - possible_net_t net; - u32 id; - struct sock __rcu *mroute_sk; - struct timer_list ipmr_expire_timer; - struct list_head mfc_unres_queue; - struct list_head mfc_cache_array[MFC_LINES]; - struct vif_device vif_table[MAXVIFS]; - int maxvif; - atomic_t cache_resolve_queue_len; - bool mroute_do_assert; - bool mroute_do_pim; - int mroute_reg_vif_num; -}; - struct ipmr_rule { struct fib_rule common; }; @@ -104,8 +88,6 @@ static DEFINE_RWLOCK(mrt_lock); /* Multicast router control variables */ -#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) - /* Special spinlock for queue of unresolved entries */ static DEFINE_SPINLOCK(mfc_unres_lock); -- cgit v1.2.3-71-gd317 From 1973a4ea6ceaa47671227c3077f90508ea30897b Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 26 Nov 2015 15:23:48 +0100 Subject: net: ipmr: move pimsm_enabled to pim.h and rename Move the inline pimsm_enabled() to pim.h and rename it to ipmr_pimsm_enabled to show it's for the ipv4 ipmr code since pim.h is used by IPv6 too. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/pim.h | 5 +++++ net/ipv4/ipmr.c | 11 +++-------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pim.h b/include/linux/pim.h index 252bf6644c51..e1d756f81348 100644 --- a/include/linux/pim.h +++ b/include/linux/pim.h @@ -13,6 +13,11 @@ #define PIM_NULL_REGISTER cpu_to_be32(0x40000000) +static inline bool ipmr_pimsm_enabled(void) +{ + return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); +} + /* PIMv2 register message header layout (ietf-draft-idmr-pimvsm-v2-00.ps */ struct pimreghdr { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index ff3dbbb9f11c..322fdc6ac75b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -75,11 +75,6 @@ struct ipmr_result { struct mr_table *mrt; }; -static inline bool pimsm_enabled(void) -{ - return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); -} - /* Big lock, protecting vif table, mrt cache and mroute socket state. * Note that the changes are semaphored via rtnl_lock. */ @@ -751,7 +746,7 @@ static int vif_add(struct net *net, struct mr_table *mrt, switch (vifc->vifc_flags) { case VIFF_REGISTER: - if (!pimsm_enabled()) + if (!ipmr_pimsm_enabled()) return -EINVAL; /* Special Purpose VIF in PIM * All the packets will be sent to the daemon @@ -1377,7 +1372,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, mrt->mroute_do_assert = val; break; case MRT_PIM: - if (!pimsm_enabled()) { + if (!ipmr_pimsm_enabled()) { ret = -ENOPROTOOPT; break; } @@ -1451,7 +1446,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int val = 0x0305; break; case MRT_PIM: - if (!pimsm_enabled()) + if (!ipmr_pimsm_enabled()) return -ENOPROTOOPT; val = mrt->mroute_do_pim; break; -- cgit v1.2.3-71-gd317 From 91420b83baa046ada1a899c97f3b2c52a9045705 Mon Sep 17 00:00:00 2001 From: Sudarsana Kalluru Date: Mon, 30 Nov 2015 12:25:03 +0200 Subject: qed: Add support for changing LED state Physical LEDs are being controlled by the management FW. This adds the qed functionality required to request management FW to change the LED configuration, as well as the necessary APIs for this functionality to later be used by the protocol drivers. Signed-off-by: Sudarsana Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 6 ++++++ drivers/net/ethernet/qlogic/qed/qed_main.c | 18 ++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_mcp.c | 27 +++++++++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_mcp.h | 13 +++++++++++++ include/linux/qed/qed_if.h | 17 +++++++++++++++++ 5 files changed, 81 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index b2f8e854dfd1..264e954675d1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -3993,6 +3993,8 @@ struct public_drv_mb { #define DRV_MSG_CODE_PHY_CORE_WRITE 0x000e0000 #define DRV_MSG_CODE_SET_VERSION 0x000f0000 +#define DRV_MSG_CODE_SET_LED_MODE 0x00200000 + #define DRV_MSG_SEQ_NUMBER_MASK 0x0000ffff u32 drv_mb_param; @@ -4044,6 +4046,10 @@ struct public_drv_mb { #define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_SHIFT 8 #define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_MASK 0x0000FF00 +#define DRV_MB_PARAM_SET_LED_MODE_OPER 0x0 +#define DRV_MB_PARAM_SET_LED_MODE_ON 0x1 +#define DRV_MB_PARAM_SET_LED_MODE_OFF 0x2 + u32 fw_mb_header; #define FW_MSG_CODE_MASK 0xffff0000 #define FW_MSG_CODE_DRV_LOAD_ENGINE 0x10100000 diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 947c7af72b25..6b02e1134360 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1135,6 +1135,23 @@ static int qed_drain(struct qed_dev *cdev) return 0; } +static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *ptt; + int status = 0; + + ptt = qed_ptt_acquire(hwfn); + if (!ptt) + return -EAGAIN; + + status = qed_mcp_set_led(hwfn, ptt, mode); + + qed_ptt_release(hwfn, ptt); + + return status; +} + const struct qed_common_ops qed_common_ops_pass = { .probe = &qed_probe, .remove = &qed_remove, @@ -1155,6 +1172,7 @@ const struct qed_common_ops qed_common_ops_pass = { .update_msglvl = &qed_init_dp, .chain_alloc = &qed_chain_alloc, .chain_free = &qed_chain_free, + .set_led = &qed_set_led, }; u32 qed_get_protocol_version(enum qed_protocol protocol) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 20d048cdcb88..ba1b1f1ef789 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -858,3 +858,30 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, return 0; } + +int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + enum qed_led_mode mode) +{ + u32 resp = 0, param = 0, drv_mb_param; + int rc; + + switch (mode) { + case QED_LED_MODE_ON: + drv_mb_param = DRV_MB_PARAM_SET_LED_MODE_ON; + break; + case QED_LED_MODE_OFF: + drv_mb_param = DRV_MB_PARAM_SET_LED_MODE_OFF; + break; + case QED_LED_MODE_RESTORE: + drv_mb_param = DRV_MB_PARAM_SET_LED_MODE_OPER; + break; + default: + DP_NOTICE(p_hwfn, "Invalid LED mode %d\n", mode); + return -EINVAL; + } + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_SET_LED_MODE, + drv_mb_param, &resp, ¶m); + + return rc; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index dbaae586b4a7..506197d5c3dd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -224,6 +224,19 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_mcp_drv_version *p_ver); +/** + * @brief Set LED status + * + * @param p_hwfn + * @param p_ptt + * @param mode - LED mode + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_set_led(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_led_mode mode); + /* Using hwfn number (and not pf_num) is required since in CMT mode, * same pf_num may be used by two different hwfn * TODO - this shouldn't really be in .h file, but until all fields diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index dc9a1353f971..d4a32e878180 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -25,6 +25,12 @@ #include #include +enum qed_led_mode { + QED_LED_MODE_OFF, + QED_LED_MODE_ON, + QED_LED_MODE_RESTORE +}; + #define DIRECT_REG_WR(reg_addr, val) writel((u32)val, \ (void __iomem *)(reg_addr)) @@ -252,6 +258,17 @@ struct qed_common_ops { void (*chain_free)(struct qed_dev *cdev, struct qed_chain *p_chain); + +/** + * @brief set_led - Configure LED mode + * + * @param cdev + * @param mode - LED mode + * + * @return 0 on success, error otherwise. + */ + int (*set_led)(struct qed_dev *cdev, + enum qed_led_mode mode); }; /** -- cgit v1.2.3-71-gd317 From c0eb454034aab783dc602739237a63b30867f5bd Mon Sep 17 00:00:00 2001 From: KY Srinivasan Date: Tue, 1 Dec 2015 16:43:10 -0800 Subject: hv_netvsc: Don't ask for additional head room in the skb The rndis header is 116 bytes big and can be placed in the default head room that will be available in the skb. Since the netvsc packet is less than 48 bytes, we can use the skb control buffer for the netvsc packet. With these changes we don't need to ask for additional head room. Signed-off-by: K. Y. Srinivasan Reviewed-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 3 +++ drivers/net/hyperv/netvsc_drv.c | 30 +++++++++++------------------- include/linux/netdevice.h | 4 +++- 3 files changed, 17 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index fc6d0c6de741..731054ef6da5 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -124,6 +124,9 @@ struct ndis_tcp_ip_checksum_info; /* * Represent netvsc packet which contains 1 RNDIS and 1 ethernet frame * within the RNDIS + * + * The size of this structure is less than 48 bytes and we can now + * place this structure in the skb->cb field. */ struct hv_netvsc_packet { /* Bookkeeping stuff */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 7e356a11c1d7..b820888409bc 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -433,7 +433,6 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) u32 net_trans_info; u32 hash; u32 skb_length; - u32 pkt_sz; struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats); @@ -461,16 +460,21 @@ check_size: goto check_size; } - pkt_sz = sizeof(struct hv_netvsc_packet) + RNDIS_AND_PPI_SIZE; - - ret = skb_cow_head(skb, pkt_sz); + /* + * Place the rndis header in the skb head room and + * the skb->cb will be used for hv_netvsc_packet + * structure. + */ + ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE); if (ret) { netdev_err(net, "unable to alloc hv_netvsc_packet\n"); ret = -ENOMEM; goto drop; } - /* Use the headroom for building up the packet */ - packet = (struct hv_netvsc_packet *)skb->head; + /* Use the skb control buffer for building up the packet */ + BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) > + FIELD_SIZEOF(struct sk_buff, cb)); + packet = (struct hv_netvsc_packet *)skb->cb; packet->status = 0; packet->xmit_more = skb->xmit_more; @@ -483,8 +487,7 @@ check_size: packet->is_data_pkt = true; packet->total_data_buflen = skb->len; - rndis_msg = (struct rndis_message *)((unsigned long)packet + - sizeof(struct hv_netvsc_packet)); + rndis_msg = (struct rndis_message *)skb->head; memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE); @@ -1118,16 +1121,12 @@ static int netvsc_probe(struct hv_device *dev, struct netvsc_device_info device_info; struct netvsc_device *nvdev; int ret; - u32 max_needed_headroom; net = alloc_etherdev_mq(sizeof(struct net_device_context), num_online_cpus()); if (!net) return -ENOMEM; - max_needed_headroom = sizeof(struct hv_netvsc_packet) + - RNDIS_AND_PPI_SIZE; - netif_carrier_off(net); net_device_ctx = netdev_priv(net); @@ -1166,13 +1165,6 @@ static int netvsc_probe(struct hv_device *dev, net->ethtool_ops = ðtool_ops; SET_NETDEV_DEV(net, &dev->device); - /* - * Request additional head room in the skb. - * We will use this space to build the rndis - * heaser and other state we need to maintain. - */ - net->needed_headroom = max_needed_headroom; - /* Notify the netvsc driver of the new device */ memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7d2d1d7aaec7..fcbc5259c630 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -132,7 +132,9 @@ static inline bool dev_xmit_complete(int rc) * used. */ -#if defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25) +#if defined(CONFIG_HYPERV_NET) +# define LL_MAX_HEADER 128 +#elif defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25) # if defined(CONFIG_MAC80211_MESH) # define LL_MAX_HEADER 128 # else -- cgit v1.2.3-71-gd317 From c981e4213e9d2d4ec79501bd607722ec712742a2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:06 +0100 Subject: net: add netif_is_team_master helper Similar to other helpers, caller can use this to find out if device is team master. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 1 + include/linux/netdevice.h | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 651d35ea22c5..d2f3ee832c47 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2054,6 +2054,7 @@ static void team_setup(struct net_device *dev) dev->flags |= IFF_MULTICAST; dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); dev->priv_flags |= IFF_NO_QUEUE; + dev->priv_flags |= IFF_TEAM; /* * Indicate we support unicast address filtering. That way core won't diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fcbc5259c630..2b889be65d88 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1273,6 +1273,7 @@ struct net_device_ops { * @IFF_NO_QUEUE: device can run without qdisc attached * @IFF_OPENVSWITCH: device is a Open vSwitch master * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device + * @IFF_TEAM: device is a team device */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1299,6 +1300,7 @@ enum netdev_priv_flags { IFF_NO_QUEUE = 1<<21, IFF_OPENVSWITCH = 1<<22, IFF_L3MDEV_SLAVE = 1<<23, + IFF_TEAM = 1<<24, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1325,6 +1327,7 @@ enum netdev_priv_flags { #define IFF_NO_QUEUE IFF_NO_QUEUE #define IFF_OPENVSWITCH IFF_OPENVSWITCH #define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE +#define IFF_TEAM IFF_TEAM /** * struct net_device - The DEVICE structure. @@ -3889,6 +3892,11 @@ static inline bool netif_is_ovs_master(const struct net_device *dev) return dev->priv_flags & IFF_OPENVSWITCH; } +static inline bool netif_is_team_master(struct net_device *dev) +{ + return dev->priv_flags & IFF_TEAM; +} + /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { -- cgit v1.2.3-71-gd317 From f7f019ee6d117de5007d0b10e7960696bbf111eb Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:07 +0100 Subject: net: add netif_is_team_port helper Similar to other helpers, caller can use this to find out if device is team port. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2b889be65d88..b3601f8a9b42 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3897,6 +3897,11 @@ static inline bool netif_is_team_master(struct net_device *dev) return dev->priv_flags & IFF_TEAM; } +static inline bool netif_is_team_port(struct net_device *dev) +{ + return dev->priv_flags & IFF_TEAM_PORT; +} + /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { -- cgit v1.2.3-71-gd317 From 7be61833042e7757745345eedc7b0efee240c189 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:08 +0100 Subject: net: add netif_is_lag_master helper Some code does not mind if the master is bond or team and treats them the same, as generic LAG. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b3601f8a9b42..3ca083efa560 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3902,6 +3902,11 @@ static inline bool netif_is_team_port(struct net_device *dev) return dev->priv_flags & IFF_TEAM_PORT; } +static inline bool netif_is_lag_master(struct net_device *dev) +{ + return netif_is_bond_master(dev) || netif_is_team_master(dev); +} + /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { -- cgit v1.2.3-71-gd317 From e0ba1414f310c83bf425fe26fa2cd5f1befcd6dc Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:09 +0100 Subject: net: add netif_is_lag_port helper Some code does not mind if a device is bond slave or team port and treats them the same, as generic LAG ports. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3ca083efa560..1506be58c59a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3907,6 +3907,11 @@ static inline bool netif_is_lag_master(struct net_device *dev) return netif_is_bond_master(dev) || netif_is_team_master(dev); } +static inline bool netif_is_lag_port(struct net_device *dev) +{ + return netif_is_bond_slave(dev) || netif_is_team_port(dev); +} + /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { -- cgit v1.2.3-71-gd317 From 6dffb0447c25476f499d205dfceb1972e8dae919 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:10 +0100 Subject: net: propagate upper priv via netdev_master_upper_dev_link Eliminate netdev_master_upper_dev_link_private and pass priv directly as a parameter of netdev_master_upper_dev_link. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- drivers/net/team/team.c | 2 +- drivers/net/vrf.c | 2 +- include/linux/netdevice.h | 6 ++---- net/batman-adv/hard-interface.c | 3 ++- net/bridge/br_if.c | 2 +- net/core/dev.c | 18 ++++++------------ net/openvswitch/vport-netdev.c | 2 +- 8 files changed, 15 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 9e0f8a7ef8b1..924015729b2d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1204,7 +1204,7 @@ static int bond_master_upper_dev_link(struct net_device *bond_dev, { int err; - err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave); + err = netdev_master_upper_dev_link(slave_dev, bond_dev, slave); if (err) return err; slave_dev->flags |= IFF_SLAVE; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index d2f3ee832c47..b37f8d14dca0 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1083,7 +1083,7 @@ static int team_upper_dev_link(struct net_device *dev, { int err; - err = netdev_master_upper_dev_link(port_dev, dev); + err = netdev_master_upper_dev_link(port_dev, dev, NULL); if (err) return err; port_dev->priv_flags |= IFF_TEAM_PORT; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index c2d54c4ed556..59c5bddeaedd 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -624,7 +624,7 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) goto out_fail; } - ret = netdev_master_upper_dev_link(port_dev, dev); + ret = netdev_master_upper_dev_link(port_dev, dev, NULL); if (ret < 0) goto out_unregister; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1506be58c59a..939b8f3de810 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3619,10 +3619,8 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev); struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev); int netdev_master_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev); -int netdev_master_upper_dev_link_private(struct net_device *dev, - struct net_device *upper_dev, - void *private); + struct net_device *upper_dev, + void *upper_priv); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev); void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index f11345e163d7..a7f4f1085dbb 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -464,7 +464,8 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->soft_iface = soft_iface; bat_priv = netdev_priv(hard_iface->soft_iface); - ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface); + ret = netdev_master_upper_dev_link(hard_iface->net_dev, + soft_iface, NULL); if (ret) goto err_dev; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ec02f5869a78..781abc34667a 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -493,7 +493,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev->priv_flags |= IFF_BRIDGE_PORT; - err = netdev_master_upper_dev_link(dev, br->dev); + err = netdev_master_upper_dev_link(dev, br->dev, NULL); if (err) goto err5; diff --git a/net/core/dev.c b/net/core/dev.c index 939cd1b1da15..27d052bb78bc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5421,7 +5421,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, - void *private) + void *upper_priv) { struct netdev_notifier_changeupper_info changeupper_info; struct netdev_adjacent *i, *j, *to_i, *to_j; @@ -5452,7 +5452,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (ret) return ret; - ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private, + ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv, master); if (ret) return ret; @@ -5557,6 +5557,7 @@ EXPORT_SYMBOL(netdev_upper_dev_link); * netdev_master_upper_dev_link - Add a master link to the upper device * @dev: device * @upper_dev: new upper device + * @upper_priv: upper device private * * Adds a link to device which is upper to this one. In this case, only * one master upper device can be linked, although other non-master devices @@ -5565,20 +5566,13 @@ EXPORT_SYMBOL(netdev_upper_dev_link); * counts are adjusted and the function returns zero. */ int netdev_master_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + void *upper_priv) { - return __netdev_upper_dev_link(dev, upper_dev, true, NULL); + return __netdev_upper_dev_link(dev, upper_dev, true, upper_priv); } EXPORT_SYMBOL(netdev_master_upper_dev_link); -int netdev_master_upper_dev_link_private(struct net_device *dev, - struct net_device *upper_dev, - void *private) -{ - return __netdev_upper_dev_link(dev, upper_dev, true, private); -} -EXPORT_SYMBOL(netdev_master_upper_dev_link_private); - /** * netdev_upper_dev_unlink - Removes a link to upper device * @dev: device diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index b327368a3848..3ee3df1edeae 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -105,7 +105,7 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) rtnl_lock(); err = netdev_master_upper_dev_link(vport->dev, - get_dpdev(vport->dp)); + get_dpdev(vport->dp), NULL); if (err) goto error_unlock; -- cgit v1.2.3-71-gd317 From 29bf24afb29042f568fa67b1b0eee46796725ed2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:11 +0100 Subject: net: add possibility to pass information about upper device via notifier Sometimes the drivers and other code would find it handy to know some internal information about upper device being changed. So allow upper-code to pass information down to notifier listeners during linking. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- drivers/net/team/team.c | 2 +- drivers/net/vrf.c | 2 +- include/linux/netdevice.h | 3 ++- net/batman-adv/hard-interface.c | 2 +- net/bridge/br_if.c | 2 +- net/core/dev.c | 11 +++++++---- net/openvswitch/vport-netdev.c | 2 +- 8 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 924015729b2d..fa3ed1d8a12d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1204,7 +1204,7 @@ static int bond_master_upper_dev_link(struct net_device *bond_dev, { int err; - err = netdev_master_upper_dev_link(slave_dev, bond_dev, slave); + err = netdev_master_upper_dev_link(slave_dev, bond_dev, slave, NULL); if (err) return err; slave_dev->flags |= IFF_SLAVE; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index b37f8d14dca0..f7b6ff7948b8 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1083,7 +1083,7 @@ static int team_upper_dev_link(struct net_device *dev, { int err; - err = netdev_master_upper_dev_link(port_dev, dev, NULL); + err = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL); if (err) return err; port_dev->priv_flags |= IFF_TEAM_PORT; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 59c5bddeaedd..8944a49cda15 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -624,7 +624,7 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) goto out_fail; } - ret = netdev_master_upper_dev_link(port_dev, dev, NULL); + ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL); if (ret < 0) goto out_unregister; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 939b8f3de810..aea556c64f2c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2163,6 +2163,7 @@ struct netdev_notifier_changeupper_info { struct net_device *upper_dev; /* new upper dev */ bool master; /* is upper dev master */ bool linking; /* is the nofication for link or unlink */ + void *upper_info; /* upper dev info */ }; static inline void netdev_notifier_info_init(struct netdev_notifier_info *info, @@ -3620,7 +3621,7 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev); int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, - void *upper_priv); + void *upper_priv, void *upper_info); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev); void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index a7f4f1085dbb..aa8867e1d983 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -465,7 +465,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, bat_priv = netdev_priv(hard_iface->soft_iface); ret = netdev_master_upper_dev_link(hard_iface->net_dev, - soft_iface, NULL); + soft_iface, NULL, NULL); if (ret) goto err_dev; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 781abc34667a..8d1d4a22c50d 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -493,7 +493,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev->priv_flags |= IFF_BRIDGE_PORT; - err = netdev_master_upper_dev_link(dev, br->dev, NULL); + err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL); if (err) goto err5; diff --git a/net/core/dev.c b/net/core/dev.c index 27d052bb78bc..8ed886663c6d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5421,7 +5421,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, - void *upper_priv) + void *upper_priv, void *upper_info) { struct netdev_notifier_changeupper_info changeupper_info; struct netdev_adjacent *i, *j, *to_i, *to_j; @@ -5445,6 +5445,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, changeupper_info.upper_dev = upper_dev; changeupper_info.master = master; changeupper_info.linking = true; + changeupper_info.upper_info = upper_info; ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, &changeupper_info.info); @@ -5549,7 +5550,7 @@ rollback_mesh: int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev) { - return __netdev_upper_dev_link(dev, upper_dev, false, NULL); + return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL); } EXPORT_SYMBOL(netdev_upper_dev_link); @@ -5558,6 +5559,7 @@ EXPORT_SYMBOL(netdev_upper_dev_link); * @dev: device * @upper_dev: new upper device * @upper_priv: upper device private + * @upper_info: upper info to be passed down via notifier * * Adds a link to device which is upper to this one. In this case, only * one master upper device can be linked, although other non-master devices @@ -5567,9 +5569,10 @@ EXPORT_SYMBOL(netdev_upper_dev_link); */ int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, - void *upper_priv) + void *upper_priv, void *upper_info) { - return __netdev_upper_dev_link(dev, upper_dev, true, upper_priv); + return __netdev_upper_dev_link(dev, upper_dev, true, + upper_priv, upper_info); } EXPORT_SYMBOL(netdev_master_upper_dev_link); diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 3ee3df1edeae..8f4dd4c39bfe 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -105,7 +105,7 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) rtnl_lock(); err = netdev_master_upper_dev_link(vport->dev, - get_dpdev(vport->dp), NULL); + get_dpdev(vport->dp), NULL, NULL); if (err) goto error_unlock; -- cgit v1.2.3-71-gd317 From 764f5e544118508add420724789f46e04dba91eb Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:12 +0100 Subject: net: add info struct for LAG changeupper This struct will be shared by bonding and team to pass internal information to notifier listeners. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index aea556c64f2c..3ab90ea0ed03 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2110,6 +2110,19 @@ struct pcpu_sw_netstats { #define netdev_alloc_pcpu_stats(type) \ __netdev_alloc_pcpu_stats(type, GFP_KERNEL); +enum netdev_lag_tx_type { + NETDEV_LAG_TX_TYPE_UNKNOWN, + NETDEV_LAG_TX_TYPE_RANDOM, + NETDEV_LAG_TX_TYPE_BROADCAST, + NETDEV_LAG_TX_TYPE_ROUNDROBIN, + NETDEV_LAG_TX_TYPE_ACTIVEBACKUP, + NETDEV_LAG_TX_TYPE_HASH, +}; + +struct netdev_lag_upper_info { + enum netdev_lag_tx_type tx_type; +}; + #include /* netdevice notifier chain. Please remember to update the rtnetlink -- cgit v1.2.3-71-gd317 From 8fd728566a354f7bc9cb6e781f185b8c39cf505b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:13 +0100 Subject: team: fill-up LAG changeupper info struct and pass it along Initialize netdev_lag_upper_info structure by TX type according to current team mode and pass it along via netdev_master_upper_dev_link. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 23 ++++++++++++----------- drivers/net/team/team_mode_activebackup.c | 1 + drivers/net/team/team_mode_broadcast.c | 1 + drivers/net/team/team_mode_loadbalance.c | 1 + drivers/net/team/team_mode_random.c | 1 + drivers/net/team/team_mode_roundrobin.c | 1 + include/linux/if_team.h | 1 + 7 files changed, 18 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index f7b6ff7948b8..dd1504bbb4a7 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1078,23 +1078,24 @@ static void team_port_disable_netpoll(struct team_port *port) } #endif -static int team_upper_dev_link(struct net_device *dev, - struct net_device *port_dev) +static int team_upper_dev_link(struct team *team, struct team_port *port) { + struct netdev_lag_upper_info lag_upper_info; int err; - err = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL); + lag_upper_info.tx_type = team->mode->lag_tx_type; + err = netdev_master_upper_dev_link(port->dev, team->dev, NULL, + &lag_upper_info); if (err) return err; - port_dev->priv_flags |= IFF_TEAM_PORT; + port->dev->priv_flags |= IFF_TEAM_PORT; return 0; } -static void team_upper_dev_unlink(struct net_device *dev, - struct net_device *port_dev) +static void team_upper_dev_unlink(struct team *team, struct team_port *port) { - netdev_upper_dev_unlink(port_dev, dev); - port_dev->priv_flags &= ~IFF_TEAM_PORT; + netdev_upper_dev_unlink(port->dev, team->dev); + port->dev->priv_flags &= ~IFF_TEAM_PORT; } static void __team_port_change_port_added(struct team_port *port, bool linkup); @@ -1194,7 +1195,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev) goto err_handler_register; } - err = team_upper_dev_link(dev, port_dev); + err = team_upper_dev_link(team, port); if (err) { netdev_err(dev, "Device %s failed to set upper link\n", portname); @@ -1220,7 +1221,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev) return 0; err_option_port_add: - team_upper_dev_unlink(dev, port_dev); + team_upper_dev_unlink(team, port); err_set_upper_link: netdev_rx_handler_unregister(port_dev); @@ -1264,7 +1265,7 @@ static int team_port_del(struct team *team, struct net_device *port_dev) team_port_disable(team, port); list_del_rcu(&port->list); - team_upper_dev_unlink(dev, port_dev); + team_upper_dev_unlink(team, port); netdev_rx_handler_unregister(port_dev); team_port_disable_netpoll(port); vlan_vids_del_by_dev(port_dev, dev); diff --git a/drivers/net/team/team_mode_activebackup.c b/drivers/net/team/team_mode_activebackup.c index 40fd3381b693..3f189823ba3b 100644 --- a/drivers/net/team/team_mode_activebackup.c +++ b/drivers/net/team/team_mode_activebackup.c @@ -127,6 +127,7 @@ static const struct team_mode ab_mode = { .owner = THIS_MODULE, .priv_size = sizeof(struct ab_priv), .ops = &ab_mode_ops, + .lag_tx_type = NETDEV_LAG_TX_TYPE_ACTIVEBACKUP, }; static int __init ab_init_module(void) diff --git a/drivers/net/team/team_mode_broadcast.c b/drivers/net/team/team_mode_broadcast.c index c366cd299c06..302ff35b0cbc 100644 --- a/drivers/net/team/team_mode_broadcast.c +++ b/drivers/net/team/team_mode_broadcast.c @@ -56,6 +56,7 @@ static const struct team_mode bc_mode = { .kind = "broadcast", .owner = THIS_MODULE, .ops = &bc_mode_ops, + .lag_tx_type = NETDEV_LAG_TX_TYPE_BROADCAST, }; static int __init bc_init_module(void) diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index a1536d0d83a9..cdb19b385d42 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -661,6 +661,7 @@ static const struct team_mode lb_mode = { .priv_size = sizeof(struct lb_priv), .port_priv_size = sizeof(struct lb_port_priv), .ops = &lb_mode_ops, + .lag_tx_type = NETDEV_LAG_TX_TYPE_HASH, }; static int __init lb_init_module(void) diff --git a/drivers/net/team/team_mode_random.c b/drivers/net/team/team_mode_random.c index cd2f692b8074..215f845782db 100644 --- a/drivers/net/team/team_mode_random.c +++ b/drivers/net/team/team_mode_random.c @@ -46,6 +46,7 @@ static const struct team_mode rnd_mode = { .kind = "random", .owner = THIS_MODULE, .ops = &rnd_mode_ops, + .lag_tx_type = NETDEV_LAG_TX_TYPE_RANDOM, }; static int __init rnd_init_module(void) diff --git a/drivers/net/team/team_mode_roundrobin.c b/drivers/net/team/team_mode_roundrobin.c index 53665850b59e..0aa234118c03 100644 --- a/drivers/net/team/team_mode_roundrobin.c +++ b/drivers/net/team/team_mode_roundrobin.c @@ -58,6 +58,7 @@ static const struct team_mode rr_mode = { .owner = THIS_MODULE, .priv_size = sizeof(struct rr_priv), .ops = &rr_mode_ops, + .lag_tx_type = NETDEV_LAG_TX_TYPE_ROUNDROBIN, }; static int __init rr_init_module(void) diff --git a/include/linux/if_team.h b/include/linux/if_team.h index a6aa970758a2..b84e49c3a738 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -164,6 +164,7 @@ struct team_mode { size_t priv_size; size_t port_priv_size; const struct team_mode_ops *ops; + enum netdev_lag_tx_type lag_tx_type; }; #define TEAM_PORT_HASHBITS 4 -- cgit v1.2.3-71-gd317 From 04d482660a07039fc4e9a42bb3517db236d98f96 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:15 +0100 Subject: net: introduce change lower state notifier When lower device like bonding slave, team/bridge port, etc changes its state, it is useful for others to notice this change. Currently this is implemented specificly for bonding as NETDEV_BONDING_INFO notifier. This patch aims to replace this specific usage and make this more generic to be used for all upper-lower devices. Introduce NETDEV_CHANGELOWERSTATE netdev notifier type and netdev_lower_state_changed() helper. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++++++ net/core/dev.c | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3ab90ea0ed03..ad69f237aa78 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2158,6 +2158,7 @@ struct netdev_lag_upper_info { #define NETDEV_CHANGEINFODATA 0x0018 #define NETDEV_BONDING_INFO 0x0019 #define NETDEV_PRECHANGEUPPER 0x001A +#define NETDEV_CHANGELOWERSTATE 0x001B int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); @@ -2179,6 +2180,11 @@ struct netdev_notifier_changeupper_info { void *upper_info; /* upper dev info */ }; +struct netdev_notifier_changelowerstate_info { + struct netdev_notifier_info info; /* must be first */ + void *lower_state_info; /* is lower dev state */ +}; + static inline void netdev_notifier_info_init(struct netdev_notifier_info *info, struct net_device *dev) { @@ -3640,6 +3646,8 @@ void netdev_upper_dev_unlink(struct net_device *dev, void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); +void netdev_lower_state_changed(struct net_device *lower_dev, + void *lower_state_info); /* RSS keys are 40 or 52 bytes long */ #define NETDEV_RSS_KEY_LEN 52 diff --git a/net/core/dev.c b/net/core/dev.c index 8ed886663c6d..d1706e88fbeb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5756,6 +5756,26 @@ int dev_get_nest_level(struct net_device *dev, } EXPORT_SYMBOL(dev_get_nest_level); +/** + * netdev_lower_change - Dispatch event about lower device state change + * @lower_dev: device + * @lower_state_info: state to dispatch + * + * Send NETDEV_CHANGELOWERSTATE to netdev notifiers with info. + * The caller must hold the RTNL lock. + */ +void netdev_lower_state_changed(struct net_device *lower_dev, + void *lower_state_info) +{ + struct netdev_notifier_changelowerstate_info changelowerstate_info; + + ASSERT_RTNL(); + changelowerstate_info.lower_state_info = lower_state_info; + call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev, + &changelowerstate_info.info); +} +EXPORT_SYMBOL(netdev_lower_state_changed); + static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; -- cgit v1.2.3-71-gd317 From fb1b2e3ce53aef80b3cef71f3885d584cdbdc6b8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:16 +0100 Subject: net: introduce lower state changed info structure for LAG lowers This is shared info structure for bonding and team. Serves to pass down info about link state and port activity to notification listeners. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ad69f237aa78..fa84b59eb197 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2123,6 +2123,11 @@ struct netdev_lag_upper_info { enum netdev_lag_tx_type tx_type; }; +struct netdev_lag_lower_state_info { + u8 link_up : 1, + tx_enabled : 1; +}; + #include /* netdevice notifier chain. Please remember to update the rtnetlink -- cgit v1.2.3-71-gd317 From fc50db98ff872372f266695858f87a12eb1b4f05 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 1 Dec 2015 18:03:09 +0200 Subject: net/mlx5_core: Add base sriov support This patch adds SRIOV base support for mlx5 supported devices. The same driver is used for both PFs and VFs; VFs are identified by the driver through the flag MLX5_PCI_DEV_IS_VF added to the pci table entries. Virtual functions are created as usual through writing a value to the sriov_numvs sysfs file of the PF device. Upon instantiating VFs, they will all be probed by the driver on the hypervisor. One can gracefully unbind them through /sys/bus/pci/drivers/mlx5_core/unbind. mlx5_wait_for_vf_pages() was added to ensure that when a VF dies without executing proper teardown, the hypervisor driver waits till all of the pages that were allocated at the hypervisor to maintain its operation are returned. In order for the VF to be operational, the PF needs to call enable_hca for it. This can be done before the VFs are created through a call to pci_enable_sriov. If the there are VFs assigned to a VMs when the driver of the PF is unloaded, all the VF will experience system error and PF driver unloads cleanly; in this case pci_disable_sriov is not called and the devices will show when running lspci. Once the PF driver is reloaded, it will sync its data structures which maintain state on its VFs. Signed-off-by: Eli Cohen Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 36 +++- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 + .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 38 ++++ drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 221 +++++++++++++++++++++ include/linux/mlx5/driver.h | 24 +++ include/linux/mlx5/mlx5_ifc.h | 4 +- 7 files changed, 318 insertions(+), 9 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sriov.c (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 26a68b8af2c5..4d5103911527 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o + mad.o transobj.o vport.o sriov.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o \ en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \ en_txrx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f2e64dc1a443..66e2b37cfbbf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -454,6 +454,9 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) struct mlx5_reg_host_endianess he_out; int err; + if (!mlx5_core_is_pf(dev)) + return 0; + memset(&he_in, 0, sizeof(he_in)); he_in.he = MLX5_SET_HOST_ENDIANNESS; err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in), @@ -1049,6 +1052,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_srq_table(dev); mlx5_init_mr_table(dev); + err = mlx5_sriov_init(dev); + if (err) { + dev_err(&pdev->dev, "sriov init failed %d\n", err); + goto err_sriov; + } + err = mlx5_register_device(dev); if (err) { dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); @@ -1065,6 +1074,10 @@ out: return 0; +err_sriov: + if (mlx5_sriov_cleanup(dev)) + dev_err(&dev->pdev->dev, "sriov cleanup failed\n"); + err_reg_dev: mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); @@ -1120,6 +1133,13 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { int err = 0; + err = mlx5_sriov_cleanup(dev); + if (err) { + dev_warn(&dev->pdev->dev, "%s: sriov cleanup failed - abort\n", + __func__); + return err; + } + mutex_lock(&dev->intf_state_mutex); if (dev->interface_state == MLX5_INTERFACE_STATE_DOWN) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", @@ -1192,6 +1212,7 @@ static int init_one(struct pci_dev *pdev, return -ENOMEM; } priv = &dev->priv; + priv->pci_dev_data = id->driver_data; pci_set_drvdata(pdev, dev); @@ -1362,12 +1383,12 @@ static const struct pci_error_handlers mlx5_err_handler = { }; static const struct pci_device_id mlx5_core_pci_table[] = { - { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ - { PCI_VDEVICE(MELLANOX, 0x1012) }, /* Connect-IB VF */ - { PCI_VDEVICE(MELLANOX, 0x1013) }, /* ConnectX-4 */ - { PCI_VDEVICE(MELLANOX, 0x1014) }, /* ConnectX-4 VF */ - { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ - { PCI_VDEVICE(MELLANOX, 0x1016) }, /* ConnectX-4LX VF */ + { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ + { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */ + { PCI_VDEVICE(MELLANOX, 0x1013) }, /* ConnectX-4 */ + { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */ + { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ + { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ { 0, } }; @@ -1378,7 +1399,8 @@ static struct pci_driver mlx5_core_driver = { .id_table = mlx5_core_pci_table, .probe = init_one, .remove = remove_one, - .err_handler = &mlx5_err_handler + .err_handler = &mlx5_err_handler, + .sriov_configure = mlx5_core_sriov_configure, }; static int __init init(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 1ed2239a6a6d..1649d5cf9e29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -90,8 +90,10 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); +int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); void mlx5e_init(void); void mlx5e_cleanup(void); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 4d3377b12657..9eeee0545f1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include "mlx5_core.h" @@ -95,6 +96,7 @@ struct mlx5_manage_pages_outbox { enum { MAX_RECLAIM_TIME_MSECS = 5000, + MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60, }; enum { @@ -352,6 +354,10 @@ retry: goto out_4k; } + dev->priv.fw_pages += npages; + if (func_id) + dev->priv.vfs_pages += npages; + mlx5_core_dbg(dev, "err %d\n", err); kvfree(in); @@ -405,6 +411,12 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, } num_claimed = be32_to_cpu(out->num_entries); + if (num_claimed > npages) { + mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n", + num_claimed, npages); + err = -EINVAL; + goto out_free; + } if (nclaimed) *nclaimed = num_claimed; @@ -412,6 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, addr = be64_to_cpu(out->pas[i]); free_4k(dev, addr); } + dev->priv.fw_pages -= num_claimed; + if (func_id) + dev->priv.vfs_pages -= num_claimed; out_free: kvfree(out); @@ -548,3 +563,26 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) { destroy_workqueue(dev->priv.pg_wq); } + +int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) +{ + unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + int prev_vfs_pages = dev->priv.vfs_pages; + + mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages, + dev->priv.name); + while (dev->priv.vfs_pages) { + if (time_after(jiffies, end)) { + mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages); + return -ETIMEDOUT; + } + if (dev->priv.vfs_pages < prev_vfs_pages) { + end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + prev_vfs_pages = dev->priv.vfs_pages; + } + msleep(50); + } + + mlx5_core_dbg(dev, "All pages received from %s\n", dev->priv.name); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c new file mode 100644 index 000000000000..19a43240e359 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2014, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "mlx5_core.h" + +static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + int vf; + + for (vf = 1; vf <= num_vfs; vf++) { + err = mlx5_core_enable_hca(dev, vf); + if (err) { + mlx5_core_warn(dev, "failed to enable VF %d\n", vf - 1); + } else { + sriov->vfs_ctx[vf - 1].enabled = 1; + mlx5_core_dbg(dev, "successfully enabled VF %d\n", vf - 1); + } + } +} + +static void disable_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int vf; + + for (vf = 1; vf <= num_vfs; vf++) { + if (sriov->vfs_ctx[vf - 1].enabled) { + if (mlx5_core_disable_hca(dev, vf)) + mlx5_core_warn(dev, "failed to disable VF %d\n", vf - 1); + else + sriov->vfs_ctx[vf - 1].enabled = 0; + } + } +} + +static int mlx5_core_create_vfs(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err; + + if (pci_num_vf(pdev)) + pci_disable_sriov(pdev); + + enable_vfs(dev, num_vfs); + + err = pci_enable_sriov(pdev, num_vfs); + if (err) { + dev_warn(&pdev->dev, "enable sriov failed %d\n", err); + goto ex; + } + + return 0; + +ex: + disable_vfs(dev, num_vfs); + return err; +} + +static int mlx5_core_sriov_enable(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = kcalloc(num_vfs, sizeof(*sriov->vfs_ctx), GFP_ATOMIC); + if (!sriov->vfs_ctx) + return -ENOMEM; + + sriov->enabled_vfs = num_vfs; + err = mlx5_core_create_vfs(pdev, num_vfs); + if (err) { + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = NULL; + return err; + } + + return 0; +} + +static void mlx5_core_init_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + + sriov->num_vfs = num_vfs; +} + +static void mlx5_core_cleanup_vfs(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov; + + sriov = &dev->priv.sriov; + disable_vfs(dev, sriov->num_vfs); + + if (mlx5_wait_for_vf_pages(dev)) + mlx5_core_warn(dev, "timeout claiming VFs pages\n"); + + sriov->num_vfs = 0; +} + +int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + + mlx5_core_dbg(dev, "requsted num_vfs %d\n", num_vfs); + if (!mlx5_core_is_pf(dev)) + return -EPERM; + + mlx5_core_cleanup_vfs(dev); + + if (!num_vfs) { + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = NULL; + if (!pci_vfs_assigned(pdev)) + pci_disable_sriov(pdev); + else + pr_info("unloading PF driver while leaving orphan VFs\n"); + + return 0; + } + + err = mlx5_core_sriov_enable(pdev, num_vfs); + if (err) { + dev_warn(&pdev->dev, "mlx5_core_sriov_enable failed %d\n", err); + return err; + } + + mlx5_core_init_vfs(dev, num_vfs); + + return num_vfs; +} + +static int sync_required(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int cur_vfs = pci_num_vf(pdev); + + if (cur_vfs != sriov->num_vfs) { + pr_info("current VFs %d, registered %d - sync needed\n", cur_vfs, sriov->num_vfs); + return 1; + } + + return 0; +} + +int mlx5_sriov_init(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + struct pci_dev *pdev = dev->pdev; + int cur_vfs; + + if (!mlx5_core_is_pf(dev)) + return 0; + + if (!sync_required(dev->pdev)) + return 0; + + cur_vfs = pci_num_vf(pdev); + sriov->vfs_ctx = kcalloc(cur_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); + if (!sriov->vfs_ctx) + return -ENOMEM; + + sriov->enabled_vfs = cur_vfs; + + mlx5_core_init_vfs(dev, cur_vfs); + + enable_vfs(dev, cur_vfs); + + return 0; +} + +int mlx5_sriov_cleanup(struct mlx5_core_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + int err; + + if (!mlx5_core_is_pf(dev)) + return 0; + + err = mlx5_core_sriov_configure(pdev, 0); + if (err) + return err; + + return 0; +} diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5c857f2a20d7..efebb87163c8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -426,6 +426,16 @@ struct mlx5_mr_table { struct radix_tree_root tree; }; +struct mlx5_vf_context { + int enabled; +}; + +struct mlx5_core_sriov { + struct mlx5_vf_context *vfs_ctx; + int num_vfs; + int enabled_vfs; +}; + struct mlx5_irq_info { cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; @@ -447,6 +457,7 @@ struct mlx5_priv { int fw_pages; atomic_t reg_pages; struct list_head free_list; + int vfs_pages; struct mlx5_core_health health; @@ -485,6 +496,8 @@ struct mlx5_priv { struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; + struct mlx5_core_sriov sriov; + unsigned long pci_dev_data; }; enum mlx5_device_state { @@ -739,6 +752,8 @@ void mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); int mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); +int mlx5_sriov_init(struct mlx5_core_dev *dev); +int mlx5_sriov_cleanup(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, s32 npages); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); @@ -884,6 +899,15 @@ struct mlx5_profile { } mr_cache[MAX_MR_CACHE_ENTRIES]; }; +enum { + MLX5_PCI_DEV_IS_VF = 1 << 0, +}; + +static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev) +{ + return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF); +} + static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1565324eb620..9b76fddd696b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -665,7 +665,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_17[0x1]; u8 ets[0x1]; u8 nic_flow_table[0x1]; - u8 reserved_18[0x4]; + u8 reserved_18_0; + u8 early_vf_enable; + u8 reserved_18[0x2]; u8 local_ca_ack_delay[0x5]; u8 reserved_19[0x6]; u8 port_type[0x2]; -- cgit v1.2.3-71-gd317 From 54f0a411ec72cb437d57d0c9654dcbd0f198ff3a Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:10 +0200 Subject: net/mlx5: Add HW capabilities and structs for SR-IOV E-Switch Update HCA capabilities and HW struct to include needed capabilities for upcoming Ethernet Switch (SR-IOV E-Switch). Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 9b76fddd696b..836cf0e43174 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -665,7 +665,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_17[0x1]; u8 ets[0x1]; u8 nic_flow_table[0x1]; - u8 reserved_18_0; + u8 eswitch_flow_table[0x1]; u8 early_vf_enable; u8 reserved_18[0x2]; u8 local_ca_ack_delay[0x5]; @@ -789,22 +789,30 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_60[0x1b]; u8 log_max_wq_sz[0x5]; - u8 reserved_61[0xa0]; - + u8 nic_vport_change_event[0x1]; + u8 reserved_61[0xa]; + u8 log_max_vlan_list[0x5]; u8 reserved_62[0x3]; + u8 log_max_current_mc_list[0x5]; + u8 reserved_63[0x3]; + u8 log_max_current_uc_list[0x5]; + + u8 reserved_64[0x80]; + + u8 reserved_65[0x3]; u8 log_max_l2_table[0x5]; - u8 reserved_63[0x8]; + u8 reserved_66[0x8]; u8 log_uar_page_sz[0x10]; - u8 reserved_64[0x100]; + u8 reserved_67[0xe0]; - u8 reserved_65[0x1f]; + u8 reserved_68[0x1f]; u8 cqe_zip[0x1]; u8 cqe_zip_timeout[0x10]; u8 cqe_zip_max_num[0x10]; - u8 reserved_66[0x220]; + u8 reserved_69[0x220]; }; enum { @@ -2135,10 +2143,6 @@ struct mlx5_ifc_rmpc_bits { struct mlx5_ifc_wq_bits wq; }; -enum { - MLX5_NIC_VPORT_CONTEXT_ALLOWED_LIST_TYPE_CURRENT_UC_MAC_ADDRESS = 0x0, -}; - struct mlx5_ifc_nic_vport_context_bits { u8 reserved_0[0x1f]; u8 roce_en[0x1]; -- cgit v1.2.3-71-gd317 From e1d7d349c69d12721c420f1fe673ce9aa462aadd Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:11 +0200 Subject: net/mlx5: Update access functions to Query/Modify vport MAC address In preparation for SR-IOV we add here an API to enable each e-switch client (PF/VF) to configure its L2 MAC addresses and for the e-switch manager (usually the PF) to access them in order to be able to configure them into the e-switch. Therefore we now pass vport num parameter to mlx5_query_nic_vport_context, so PF can access other vports contexts. preperation for ethernet sriov and l2 table management. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 87 ++++++++++++++++++++--- include/linux/mlx5/vport.h | 5 +- 3 files changed, 81 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f6a8cc787603..2ef717f98b0d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2028,7 +2028,7 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5_query_nic_vport_mac_address(priv->mdev, netdev->dev_addr); + mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr); } static void mlx5e_build_netdev(struct net_device *netdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index b94177ebcf3a..442916e98724 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -57,33 +57,98 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod) } EXPORT_SYMBOL(mlx5_query_vport_state); -void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) +static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); +} + +static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out)); +} + +int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, u8 *addr) { - u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); u8 *out_addr; + int err; out = mlx5_vzalloc(outlen); if (!out) - return; + return -ENOMEM; out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out, nic_vport_context.permanent_address); - memset(in, 0, sizeof(in)); - - MLX5_SET(query_nic_vport_context_in, in, opcode, - MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); - - memset(out, 0, outlen); - mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); + err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + if (err) + goto out; ether_addr_copy(addr, &out_addr[2]); +out: kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address); + +int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, u8 *addr) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + void *nic_vport_ctx; + u8 *perm_mac; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(mdev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.permanent_address, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + perm_mac = MLX5_ADDR_OF(nic_vport_context, nic_vport_ctx, + permanent_address); + + ether_addr_copy(&perm_mac[2], addr); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; } -EXPORT_SYMBOL(mlx5_query_nic_vport_mac_address); +EXPORT_SYMBOL(mlx5_modify_nic_vport_mac_address); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, u16 vf_num, u16 gid_index, diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 967e0fd06e89..43e82d9f5463 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -36,7 +36,10 @@ #include u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); -void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr); +int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, u8 *addr); +int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, + u16 vport, u8 *addr); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, u16 vf_num, u16 gid_index, union ib_gid *gid); -- cgit v1.2.3-71-gd317 From e16aea2744abea612c27ee0eef606c6a6a8204de Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:12 +0200 Subject: net/mlx5: Introduce access functions to modify/query vport mac lists Those functions are needed to notify the upcoming L2 table and SR-IOV E-Switch(FDB) manager(PF), of the NIC vport (vf) UC/MC mac lists changes. preperation for ethernet sriov and l2 table management. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 119 ++++++++++++++++++++++++ include/linux/mlx5/device.h | 6 ++ include/linux/mlx5/vport.h | 10 ++ 3 files changed, 135 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 442916e98724..986d0d364df7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -150,6 +150,125 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, } EXPORT_SYMBOL(mlx5_modify_nic_vport_mac_address); +int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, + u32 vport, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int *list_size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + void *nic_vport_ctx; + int max_list_size; + int req_list_size; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *list_size; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; + + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *list_size = req_list_size; + for (i = 0; i < req_list_size; i++) { + u8 *mac_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(addr_list[i], mac_addr); + } +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list); + +int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, list_type); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + u8 *curr_mac = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(curr_mac, addr_list[i]); + } + + err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); + int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, u16 vf_num, u16 gid_index, union ib_gid *gid) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0b473cbfa7ef..0d2f0435a9f0 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1102,6 +1102,12 @@ enum { MLX5_FLOW_CONTEXT_DEST_TYPE_TIR = 2, }; +enum mlx5_list_type { + MLX5_NVPRT_LIST_TYPE_UC = 0x0, + MLX5_NVPRT_LIST_TYPE_MC = 0x1, + MLX5_NVPRT_LIST_TYPE_VLAN = 0x2, +}; + enum { MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE = 0x0, MLX5_RQC_RQ_TYPE_MEMORY_RQ_RPM = 0x1, diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 43e82d9f5463..00bbec8d9527 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -34,6 +34,7 @@ #define __MLX5_VPORT_H__ #include +#include u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, @@ -54,5 +55,14 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, u64 *sys_image_guid); int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, u64 *node_guid); +int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, + u32 vport, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int *list_size); +int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int list_size); #endif /* __MLX5_VPORT_H__ */ -- cgit v1.2.3-71-gd317 From e75465148b7df7f2796c75bf98bf33f171edeb2b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:13 +0200 Subject: net/mlx5: Introduce access functions to modify/query vport state In preparation for SR-IOV we add here an API to enable each e-switch manager (PF) to configure its VFs link states in e-switch preparation for ethernet sriov. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 61 ++++++++++++++++++++--- include/linux/mlx5/mlx5_ifc.h | 1 + include/linux/mlx5/vport.h | 6 ++- 4 files changed, 62 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2ef717f98b0d..007e464b3e58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -63,7 +63,7 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv) u8 port_state; port_state = mlx5_query_vport_state(mdev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT); + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0); if (port_state == VPORT_STATE_UP) netif_carrier_on(priv->netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 986d0d364df7..b017a7e68b28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -36,26 +36,75 @@ #include #include "mlx5_core.h" -u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod) +static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(query_vport_state_in)]; - u32 out[MLX5_ST_SZ_DW(query_vport_state_out)]; int err; + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)]; memset(in, 0, sizeof(in)); MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); MLX5_SET(query_vport_state_in, in, op_mod, opmod); + MLX5_SET(query_vport_state_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_state_in, in, other_vport, 1); - err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, - sizeof(out)); + err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); if (err) mlx5_core_warn(mdev, "MLX5_CMD_OP_QUERY_VPORT_STATE failed\n"); + return err; +} + +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0}; + + _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out)); + return MLX5_GET(query_vport_state_out, out, state); } -EXPORT_SYMBOL(mlx5_query_vport_state); +EXPORT_SYMBOL_GPL(mlx5_query_vport_state); + +u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0}; + + _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out)); + + return MLX5_GET(query_vport_state_out, out, admin_state); +} +EXPORT_SYMBOL(mlx5_query_vport_admin_state); + +int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u8 state) +{ + u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)]; + u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(modify_vport_state_in, in, opcode, + MLX5_CMD_OP_MODIFY_VPORT_STATE); + MLX5_SET(modify_vport_state_in, in, op_mod, opmod); + MLX5_SET(modify_vport_state_in, in, vport_number, vport); + + if (vport) + MLX5_SET(modify_vport_state_in, in, other_vport, 1); + + MLX5_SET(modify_vport_state_in, in, admin_state, state); + + err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, + sizeof(out)); + if (err) + mlx5_core_warn(mdev, "MLX5_CMD_OP_MODIFY_VPORT_STATE failed\n"); + + return err; +} +EXPORT_SYMBOL(mlx5_modify_vport_admin_state); static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport, u32 *out, int outlen) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 836cf0e43174..655184702ea2 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2946,6 +2946,7 @@ struct mlx5_ifc_query_vport_state_out_bits { enum { MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT = 0x0, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT = 0x1, }; struct mlx5_ifc_query_vport_state_in_bits { diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 00bbec8d9527..c1bba5948851 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -36,7 +36,11 @@ #include #include -u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport); +u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport); +int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u8 state); int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, -- cgit v1.2.3-71-gd317 From d82b73186dab70d6d332dd2afdb48608be2e5230 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:14 +0200 Subject: net/mlx5: Introduce access functions to modify/query vport promisc mode Those functions are needed to notify the upcoming SR-IOV E-Switch(FDB) manager(PF), of the NIC vport (vf) promisc mode changes. Preperation for ethernet sriov and l2 table management. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 62 +++++++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 28 +++++++++-- include/linux/mlx5/vport.h | 9 ++++ 3 files changed, 94 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index b017a7e68b28..68aa51df29c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -576,3 +576,65 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, return err; } EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); + +int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, + u32 vport, + int *promisc_uc, + int *promisc_mc, + int *promisc_all) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + if (err) + goto out; + + *promisc_uc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_uc); + *promisc_mc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_mc); + *promisc_all = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_all); + +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc); + +int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, + int promisc_uc, + int promisc_mc, + int promisc_all) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_err(mdev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_uc, promisc_uc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_mc, promisc_mc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_all, promisc_all); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 655184702ea2..2728b5f6c017 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2147,16 +2147,31 @@ struct mlx5_ifc_nic_vport_context_bits { u8 reserved_0[0x1f]; u8 roce_en[0x1]; - u8 reserved_1[0x760]; + u8 arm_change_event[0x1]; + u8 reserved_1[0x1a]; + u8 event_on_mtu[0x1]; + u8 event_on_promisc_change[0x1]; + u8 event_on_vlan_change[0x1]; + u8 event_on_mc_address_change[0x1]; + u8 event_on_uc_address_change[0x1]; - u8 reserved_2[0x5]; + u8 reserved_2[0xf0]; + + u8 mtu[0x10]; + + u8 reserved_3[0x640]; + + u8 promisc_uc[0x1]; + u8 promisc_mc[0x1]; + u8 promisc_all[0x1]; + u8 reserved_4[0x2]; u8 allowed_list_type[0x3]; - u8 reserved_3[0xc]; + u8 reserved_5[0xc]; u8 allowed_list_size[0xc]; struct mlx5_ifc_mac_address_layout_bits permanent_address; - u8 reserved_4[0x20]; + u8 reserved_6[0x20]; u8 current_uc_mac_address[0][0x40]; }; @@ -4235,7 +4250,10 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits { }; struct mlx5_ifc_modify_nic_vport_field_select_bits { - u8 reserved_0[0x1c]; + u8 reserved_0[0x19]; + u8 mtu[0x1]; + u8 change_event[0x1]; + u8 promisc[0x1]; u8 permanent_address[0x1]; u8 addresses_list[0x1]; u8 roce_en[0x1]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index c1bba5948851..dbbaed9f975a 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -68,5 +68,14 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, enum mlx5_list_type list_type, u8 addr_list[][ETH_ALEN], int list_size); +int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, + u32 vport, + int *promisc_uc, + int *promisc_mc, + int *promisc_all); +int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, + int promisc_uc, + int promisc_mc, + int promisc_all); #endif /* __MLX5_VPORT_H__ */ -- cgit v1.2.3-71-gd317 From c0046cf7b81ac55b8bf056c71918ec04edd99379 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:15 +0200 Subject: net/mlx5: Introduce access functions to modify/query vport vlans Those functions are needed to notify the upcoming L2 table and SR-IOV E-Switch(FDB) manager(PF), of the NIC vport (vf) vlan table changes. preperation for ethernet sriov and l2 table management. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 112 ++++++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 7 ++ include/linux/mlx5/vport.h | 7 ++ 3 files changed, 126 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 68aa51df29c1..076197efea9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -318,6 +318,118 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); +int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, + u32 vport, + u16 vlans[], + int *size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + void *nic_vport_ctx; + int req_list_size; + int max_list_size; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *size; + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max list size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, + MLX5_NVPRT_LIST_TYPE_VLAN); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; + + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *size = req_list_size; + for (i = 0; i < req_list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + vlans[i] = MLX5_GET(vlan_layout, vlan_addr, vlan); + } +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_vlans); + +int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, + u16 vlans[], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, MLX5_NVPRT_LIST_TYPE_VLAN); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]); + } + + err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans); + int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, u16 vf_num, u16 gid_index, union ib_gid *gid) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2728b5f6c017..39487d0c305d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -910,6 +910,13 @@ struct mlx5_ifc_mac_address_layout_bits { u8 mac_addr_31_0[0x20]; }; +struct mlx5_ifc_vlan_layout_bits { + u8 reserved_0[0x14]; + u8 vlan[0x0c]; + + u8 reserved_1[0x20]; +}; + struct mlx5_ifc_cong_control_r_roce_ecn_np_bits { u8 reserved_0[0xa0]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index dbbaed9f975a..638f2ca7a527 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -77,5 +77,12 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, int promisc_uc, int promisc_mc, int promisc_all); +int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, + u32 vport, + u16 vlans[], + int *size); +int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, + u16 vlans[], + int list_size); #endif /* __MLX5_VPORT_H__ */ -- cgit v1.2.3-71-gd317 From 073bb189a41d7bbad509b576a690611c46c4858f Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:18 +0200 Subject: net/mlx5: Introducing E-Switch and l2 table E-Switch is the software entity that represents and manages ConnectX4 inter-HCA ethernet l2 switching. E-Switch has its own Virtual Ports, each Vport/vNIC/VF can be connected to the device through a vport of an e-switch. Each e-switch is managed by one vNIC identified by HCA_CAP.vport_group_manager (usually it is the PF/vport[0]), and its main responsibility is to forward each packet to the right vport. e-Switch needs to manage its own l2-table and FDB tables. L2 table is a flow table that is managed by FW, it is needed for Multi-host (Multi PF) configuration for inter HCA switching between PFs. FDB table is a flow table that is totally managed by e-Switch driver, its main responsibility is to switch packets between e-Swtich internal vports and uplink vport that belong to the same. This patch introduces only e-Swtich l2 table management, FDB managemnt will come later when ethernet SRIOV/VFs will be enabled. preperation for ethernet sriov and l2 table management. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 13 + drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 500 ++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 122 ++++++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 18 + include/linux/mlx5/device.h | 8 + include/linux/mlx5/driver.h | 4 + 7 files changed, 666 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 4d5103911527..a0755919ccaf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -3,6 +3,6 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o \ +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o eswitch.o \ en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \ en_txrx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 713ead583347..23c244a7e5d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -35,6 +35,9 @@ #include #include #include "mlx5_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif enum { MLX5_EQE_SIZE = sizeof(struct mlx5_eqe), @@ -287,6 +290,11 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) break; #endif +#ifdef CONFIG_MLX5_CORE_EN + case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: + mlx5_eswitch_vport_event(dev->priv.eswitch, eqe); + break; +#endif default: mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn); @@ -459,6 +467,11 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, pg)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT); + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && + MLX5_CAP_GEN(dev, vport_group_manager) && + mlx5_core_is_pf(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); + err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, "mlx5_cmd_eq", &dev->priv.uuari.uars[0]); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c new file mode 100644 index 000000000000..1f2f804bde3e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -0,0 +1,500 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "mlx5_core.h" +#include "eswitch.h" + +#define MLX5_DEBUG_ESWITCH_MASK BIT(3) + +#define esw_info(dev, format, ...) \ + pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_warn(dev, format, ...) \ + pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_debug(dev, format, ...) \ + mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) + +enum { + MLX5_ACTION_NONE = 0, + MLX5_ACTION_ADD = 1, + MLX5_ACTION_DEL = 2, +}; + +/* HW UC L2 table hash node */ +struct mlx5_uc_l2addr { + struct l2addr_node node; + u8 action; + u32 table_index; + u32 vport; +}; + +/* Vport UC L2 table hash node */ +struct mlx5_vport_addr { + struct l2addr_node node; + u8 action; +}; + +enum { + UC_ADDR_CHANGE = BIT(0), + MC_ADDR_CHANGE = BIT(1), +}; + +static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, int vport, + u32 events_mask) +{ + int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)]; + int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int err; + + memset(out, 0, sizeof(out)); + memset(in, 0, sizeof(in)); + + MLX5_SET(modify_nic_vport_context_in, in, + opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1); + + if (events_mask & UC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_uc_address_change, 1); + if (events_mask & MC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_mc_address_change, 1); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + goto ex; + err = mlx5_cmd_status_to_err_v2(out); + if (err) + goto ex; + return 0; +ex: + return err; +} + +/* HW L2 Table (MPFS) management */ +static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index, + u8 *mac, u8 vlan_valid, u16 vlan) +{ + u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)]; + u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)]; + u8 *in_mac_addr; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(set_l2_table_entry_in, in, opcode, + MLX5_CMD_OP_SET_L2_TABLE_ENTRY); + MLX5_SET(set_l2_table_entry_in, in, table_index, index); + MLX5_SET(set_l2_table_entry_in, in, vlan_valid, vlan_valid); + MLX5_SET(set_l2_table_entry_in, in, vlan, vlan); + + in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); + ether_addr_copy(&in_mac_addr[2], mac); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, sizeof(out)); +} + +static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index) +{ + u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)]; + u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(delete_l2_table_entry_in, in, opcode, + MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + MLX5_SET(delete_l2_table_entry_in, in, table_index, index); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, sizeof(out)); +} + +static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix) +{ + int err = 0; + + *ix = find_first_zero_bit(l2_table->bitmap, l2_table->size); + if (*ix >= l2_table->size) + err = -ENOSPC; + else + __set_bit(*ix, l2_table->bitmap); + + return err; +} + +static void free_l2_table_index(struct mlx5_l2_table *l2_table, u32 ix) +{ + __clear_bit(ix, l2_table->bitmap); +} + +static int set_l2_table_entry(struct mlx5_core_dev *dev, u8 *mac, + u8 vlan_valid, u16 vlan, + u32 *index) +{ + struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; + int err; + + err = alloc_l2_table_index(l2_table, index); + if (err) + return err; + + err = set_l2_table_entry_cmd(dev, *index, mac, vlan_valid, vlan); + if (err) + free_l2_table_index(l2_table, *index); + + return err; +} + +static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) +{ + struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; + + del_l2_table_entry_cmd(dev, index); + free_l2_table_index(l2_table, index); +} + +/* SW E-Switch L2 Table management */ +static int l2_table_addr_add(struct mlx5_eswitch *esw, + u8 mac[ETH_ALEN], u32 vport) +{ + struct hlist_head *hash; + struct mlx5_uc_l2addr *addr; + int err; + + hash = esw->l2_table.l2_hash; + addr = l2addr_hash_find(hash, mac, struct mlx5_uc_l2addr); + if (addr) { + esw_warn(esw->dev, + "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n", + mac, vport, addr->vport); + return -EEXIST; + } + + addr = l2addr_hash_add(hash, mac, struct mlx5_uc_l2addr, + GFP_KERNEL); + if (!addr) + return -ENOMEM; + + addr->vport = vport; + addr->action = MLX5_ACTION_NONE; + err = set_l2_table_entry(esw->dev, mac, 0, 0, &addr->table_index); + if (err) + l2addr_hash_del(addr); + else + esw_debug(esw->dev, "\tADDED L2 MAC: vport[%d] %pM index:%d\n", + vport, addr->node.addr, addr->table_index); + return err; +} + +static int l2_table_addr_del(struct mlx5_eswitch *esw, + u8 mac[ETH_ALEN], u32 vport) +{ + struct hlist_head *hash; + struct mlx5_uc_l2addr *addr; + + hash = esw->l2_table.l2_hash; + addr = l2addr_hash_find(hash, mac, struct mlx5_uc_l2addr); + if (!addr || addr->vport != vport) { + esw_warn(esw->dev, "MAC(%pM) doesn't belong to vport (%d)\n", + mac, vport); + return -EINVAL; + } + + esw_debug(esw->dev, "\tDELETE L2 MAC: vport[%d] %pM index:%d\n", + vport, addr->node.addr, addr->table_index); + del_l2_table_entry(esw->dev, addr->table_index); + l2addr_hash_del(addr); + return 0; +} + +/* E-Switch vport uc list management */ + +/* Apply vport uc list to HW l2 table */ +static void esw_apply_vport_uc_list(struct mlx5_eswitch *esw, + u32 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport_addr *addr; + struct l2addr_node *node; + struct hlist_head *hash; + struct hlist_node *tmp; + int hi; + + hash = vport->uc_list; + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct mlx5_vport_addr, node); + switch (addr->action) { + case MLX5_ACTION_ADD: + l2_table_addr_add(esw, addr->node.addr, vport_num); + addr->action = MLX5_ACTION_NONE; + break; + case MLX5_ACTION_DEL: + l2_table_addr_del(esw, addr->node.addr, vport_num); + l2addr_hash_del(addr); + break; + } + } +} + +/* Sync vport uc list from vport context */ +static void esw_update_vport_uc_list(struct mlx5_eswitch *esw, + u32 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport_addr *addr; + struct l2addr_node *node; + u8 (*mac_list)[ETH_ALEN]; + struct hlist_head *hash; + struct hlist_node *tmp; + int size; + int err; + int hi; + int i; + + size = MLX5_MAX_UC_PER_VPORT(esw->dev); + + mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!mac_list) + return; + + hash = vport->uc_list; + + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct mlx5_vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + + err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, + MLX5_NVPRT_LIST_TYPE_UC, + mac_list, &size); + if (err) + return; + esw_debug(esw->dev, "vport[%d] context update UC list size (%d)\n", + vport_num, size); + + for (i = 0; i < size; i++) { + if (!is_valid_ether_addr(mac_list[i])) + continue; + + addr = l2addr_hash_find(hash, mac_list[i], + struct mlx5_vport_addr); + if (addr) { + addr->action = MLX5_ACTION_NONE; + continue; + } + + addr = l2addr_hash_add(hash, mac_list[i], + struct mlx5_vport_addr, + GFP_KERNEL); + if (!addr) { + esw_warn(esw->dev, + "Failed to add MAC(%pM) to vport[%d] DB\n", + mac_list[i], vport_num); + continue; + } + addr->action = MLX5_ACTION_ADD; + } + kfree(mac_list); +} + +static void esw_vport_change_handler(struct work_struct *work) +{ + struct mlx5_vport *vport = + container_of(work, struct mlx5_vport, vport_change_handler); + struct mlx5_core_dev *dev = vport->dev; + u8 mac[ETH_ALEN]; + + mlx5_query_nic_vport_mac_address(dev, vport->vport, mac); + + if (!is_valid_ether_addr(mac)) + goto out; + + esw_update_vport_uc_list(dev->priv.eswitch, vport->vport); + esw_apply_vport_uc_list(dev->priv.eswitch, vport->vport); + +out: + if (vport->enabled) + arm_vport_context_events_cmd(dev, vport->vport, + UC_ADDR_CHANGE); +} + +static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + unsigned long flags; + + spin_lock_irqsave(&vport->lock, flags); + vport->enabled = true; + spin_unlock_irqrestore(&vport->lock, flags); + + arm_vport_context_events_cmd(esw->dev, vport_num, UC_ADDR_CHANGE); +} + +static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + unsigned long flags; + + if (!vport->enabled) + return; + + /* Mark this vport as disabled to discard new events */ + spin_lock_irqsave(&vport->lock, flags); + vport->enabled = false; + spin_unlock_irqrestore(&vport->lock, flags); + + /* Wait for current already scheduled events to complete */ + flush_workqueue(esw->work_queue); + + /* Disable events from this vport */ + arm_vport_context_events_cmd(esw->dev, vport->vport, 0); +} + +/* Public E-Switch API */ +int mlx5_eswitch_init(struct mlx5_core_dev *dev) +{ + int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); + int total_vports = 1 + pci_sriov_get_totalvfs(dev->pdev); + struct mlx5_eswitch *esw; + int vport_num; + int err; + + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + esw_info(dev, + "Total vports %d, l2 table size(%d), per vport: max uc(%d) max mc(%d)\n", + total_vports, l2_table_size, + MLX5_MAX_UC_PER_VPORT(dev), + MLX5_MAX_MC_PER_VPORT(dev)); + + esw = kzalloc(sizeof(*esw), GFP_KERNEL); + if (!esw) + return -ENOMEM; + + esw->dev = dev; + + esw->l2_table.bitmap = kcalloc(BITS_TO_LONGS(l2_table_size), + sizeof(uintptr_t), GFP_KERNEL); + if (!esw->l2_table.bitmap) { + err = -ENOMEM; + goto abort; + } + esw->l2_table.size = l2_table_size; + + esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); + if (!esw->work_queue) { + err = -ENOMEM; + goto abort; + } + + esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport), + GFP_KERNEL); + if (!esw->vports) { + err = -ENOMEM; + goto abort; + } + + esw->total_vports = total_vports; + for (vport_num = 0; vport_num < total_vports; vport_num++) { + struct mlx5_vport *vport = &esw->vports[vport_num]; + + vport->vport = vport_num; + vport->dev = dev; + INIT_WORK(&vport->vport_change_handler, + esw_vport_change_handler); + spin_lock_init(&vport->lock); + } + + dev->priv.eswitch = esw; + + esw_enable_vport(esw, 0); + /* VF Vports will be enabled when SRIOV is enabled */ + return 0; +abort: + if (esw->work_queue) + destroy_workqueue(esw->work_queue); + kfree(esw->l2_table.bitmap); + kfree(esw->vports); + kfree(esw); + return err; +} + +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) +{ + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_info(esw->dev, "cleanup\n"); + esw_disable_vport(esw, 0); + + esw->dev->priv.eswitch = NULL; + destroy_workqueue(esw->work_queue); + kfree(esw->l2_table.bitmap); + kfree(esw->vports); + kfree(esw); +} + +void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) +{ + struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change; + u16 vport_num = be16_to_cpu(vc_eqe->vport_num); + struct mlx5_vport *vport; + + if (!esw) { + pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n", + vport_num); + return; + } + + vport = &esw->vports[vport_num]; + spin_lock(&vport->lock); + if (vport->enabled) + queue_work(esw->work_queue, &vport->vport_change_handler); + spin_unlock(&vport->lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h new file mode 100644 index 000000000000..0c41f2657824 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_ESWITCH_H__ +#define __MLX5_ESWITCH_H__ + +#include + +#define MLX5_MAX_UC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list)) + +#define MLX5_MAX_MC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) + +#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) +#define MLX5_L2_ADDR_HASH(addr) (addr[5]) + +/* L2 -mac address based- hash helpers */ +struct l2addr_node { + struct hlist_node hlist; + u8 addr[ETH_ALEN]; +}; + +#define for_each_l2hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) + +#define l2addr_hash_find(hash, mac, type) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + bool found = false; \ + type *ptr = NULL; \ + \ + hlist_for_each_entry(ptr, &hash[ix], node.hlist) \ + if (ether_addr_equal(ptr->node.addr, mac)) {\ + found = true; \ + break; \ + } \ + if (!found) \ + ptr = NULL; \ + ptr; \ +}) + +#define l2addr_hash_add(hash, mac, type, gfp) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + type *ptr = NULL; \ + \ + ptr = kzalloc(sizeof(type), gfp); \ + if (ptr) { \ + ether_addr_copy(ptr->node.addr, mac); \ + hlist_add_head(&ptr->node.hlist, &hash[ix]);\ + } \ + ptr; \ +}) + +#define l2addr_hash_del(ptr) ({ \ + hlist_del(&ptr->node.hlist); \ + kfree(ptr); \ +}) + +struct mlx5_vport { + struct mlx5_core_dev *dev; + int vport; + struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct work_struct vport_change_handler; + + /* This spinlock protects access to vport data, between + * "esw_vport_disable" and ongoing interrupt "mlx5_eswitch_vport_event" + * once vport marked as disabled new interrupts are discarded. + */ + spinlock_t lock; /* vport events sync */ + bool enabled; +}; + +struct mlx5_l2_table { + struct hlist_head l2_hash[MLX5_L2_ADDR_HASH_SIZE]; + u32 size; + unsigned long *bitmap; +}; + +struct mlx5_eswitch { + struct mlx5_core_dev *dev; + struct mlx5_l2_table l2_table; + struct workqueue_struct *work_queue; + struct mlx5_vport *vports; + int total_vports; +}; + +/* E-Switch API */ +int mlx5_eswitch_init(struct mlx5_core_dev *dev); +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); +void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); + +#endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 66e2b37cfbbf..c6de3240f76f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -49,6 +49,9 @@ #include #include #include "mlx5_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); @@ -1052,6 +1055,14 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_srq_table(dev); mlx5_init_mr_table(dev); +#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_eswitch_init(dev); + if (err) { + dev_err(&pdev->dev, "eswitch init failed %d\n", err); + goto err_reg_dev; + } +#endif + err = mlx5_sriov_init(dev); if (err) { dev_err(&pdev->dev, "sriov init failed %d\n", err); @@ -1078,6 +1089,9 @@ err_sriov: if (mlx5_sriov_cleanup(dev)) dev_err(&dev->pdev->dev, "sriov cleanup failed\n"); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); +#endif err_reg_dev: mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); @@ -1147,6 +1161,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out; } mlx5_unregister_device(dev); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); +#endif + mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0d2f0435a9f0..90a4cb6dc4cb 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -251,6 +251,7 @@ enum mlx5_event { MLX5_EVENT_TYPE_PAGE_REQUEST = 0xb, MLX5_EVENT_TYPE_PAGE_FAULT = 0xc, + MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd, }; enum { @@ -520,6 +521,12 @@ struct mlx5_eqe_page_fault { __be32 flags_qpn; } __packed; +struct mlx5_eqe_vport_change { + u8 rsvd0[2]; + __be16 vport_num; + __be32 rsvd1[6]; +} __packed; + union ev_data { __be32 raw[7]; struct mlx5_eqe_cmd cmd; @@ -532,6 +539,7 @@ union ev_data { struct mlx5_eqe_stall_vl stall_vl; struct mlx5_eqe_page_req req_pages; struct mlx5_eqe_page_fault page_fault; + struct mlx5_eqe_vport_change vport_change; } __packed; struct mlx5_eqe { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index efebb87163c8..ac098b6b97bf 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -441,6 +441,8 @@ struct mlx5_irq_info { char name[MLX5_MAX_IRQ_NAME]; }; +struct mlx5_eswitch; + struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; @@ -496,6 +498,8 @@ struct mlx5_priv { struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; + + struct mlx5_eswitch *eswitch; struct mlx5_core_sriov sriov; unsigned long pci_dev_data; }; -- cgit v1.2.3-71-gd317 From 495716b191f607b2cb2175f7499966daef79f663 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:19 +0200 Subject: net/mlx5: E-Switch, Introduce FDB hardware capabilities Define needed hardware structures and capabilities needed for E-Switch FDB flow tables and read them on driver load. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 13 +++++++++++++ include/linux/mlx5/device.h | 15 +++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 13 +++++++++++++ 3 files changed, 41 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 9335e5ae18cc..bf6e3dfcef51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -160,6 +160,19 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) if (err) return err; } + + if (MLX5_CAP_GEN(dev, vport_group_manager) && + MLX5_CAP_GEN(dev, eswitch_flow_table)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + return 0; } diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 90a4cb6dc4cb..bce9caed1eed 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1138,6 +1138,7 @@ enum mlx5_cap_type { MLX5_CAP_IPOIB_OFFLOADS, MLX5_CAP_EOIB_OFFLOADS, MLX5_CAP_FLOW_TABLE, + MLX5_CAP_ESWITCH_FLOW_TABLE, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1175,6 +1176,20 @@ enum mlx5_cap_type { #define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap) +#define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ + MLX5_GET(flow_table_eswitch_cap, \ + mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) + +#define MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, cap) \ + MLX5_GET(flow_table_eswitch_cap, \ + mdev->hca_caps_max[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) + +#define MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) \ + MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_nic_esw_fdb.cap) + +#define MLX5_CAP_ESW_FLOWTABLE_FDB_MAX(mdev, cap) \ + MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_nic_esw_fdb.cap) + #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 39487d0c305d..ae7c08adba4a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -447,6 +447,18 @@ struct mlx5_ifc_flow_table_nic_cap_bits { u8 reserved_3[0x7200]; }; +struct mlx5_ifc_flow_table_eswitch_cap_bits { + u8 reserved_0[0x200]; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_ingress; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_egress; + + u8 reserved_1[0x7800]; +}; + struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 csum_cap[0x1]; u8 vlan_cap[0x1]; @@ -1846,6 +1858,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_roce_cap_bits roce_cap; struct mlx5_ifc_per_protocol_networking_offload_caps_bits per_protocol_networking_offload_caps; struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap; + struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap; u8 reserved_0[0x8000]; }; -- cgit v1.2.3-71-gd317 From 81848731ff4070a3e4136efa6a99d507177a53fe Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:20 +0200 Subject: net/mlx5: E-Switch, Add SR-IOV (FDB) support Enabling E-Switch SRIOV for nvfs+1 vports. Create E-Switch FDB for L2 UC/MC mac steering between VFs/PF and external vport (Uplink). FDB contains forwarding rules such as: UC MAC0 -> vport0(PF). UC MAC1 -> vport1. UC MAC2 -> vport2. MC MACX -> vport0, vport2, Uplink. MC MACY -> vport1, Uplink. For unmatched traffic FDB has the following default rules: Unmached Traffic (src vport != Uplink) -> Uplink. Unmached Traffic (src vport == Uplink) -> vport0(PF). FDB rules population: Each NIC vport (VF) will notify E-Switch manager of its UC/MC vport context changes via modify vport context command, which will be translated to an event that will be handled by E-Switch manager (PF) which will update FDB table accordingly. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 682 ++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 25 + .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 14 +- include/linux/mlx5/device.h | 6 + include/linux/mlx5/flow_table.h | 9 + include/linux/mlx5/mlx5_ifc.h | 7 +- 7 files changed, 661 insertions(+), 83 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1f2f804bde3e..6fa6f013d2c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -34,9 +34,12 @@ #include #include #include +#include #include "mlx5_core.h" #include "eswitch.h" +#define UPLINK_VPORT 0xFFFF + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(dev, format, ...) \ @@ -54,18 +57,26 @@ enum { MLX5_ACTION_DEL = 2, }; -/* HW UC L2 table hash node */ -struct mlx5_uc_l2addr { +/* E-Switch UC L2 table hash node */ +struct esw_uc_addr { struct l2addr_node node; - u8 action; u32 table_index; u32 vport; }; -/* Vport UC L2 table hash node */ -struct mlx5_vport_addr { - struct l2addr_node node; - u8 action; +/* E-Switch MC FDB table hash node */ +struct esw_mc_addr { /* SRIOV only */ + struct l2addr_node node; + struct mlx5_flow_rule *uplink_rule; /* Forward to uplink rule */ + u32 refcnt; +}; + +/* Vport UC/MC hash node */ +struct vport_addr { + struct l2addr_node node; + u8 action; + u32 vport; + struct mlx5_flow_rule *flow_rule; /* SRIOV only */ }; enum { @@ -73,7 +84,11 @@ enum { MC_ADDR_CHANGE = BIT(1), }; -static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, int vport, +/* Vport context events */ +#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \ + MC_ADDR_CHANGE) + +static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)]; @@ -196,97 +211,492 @@ static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) free_l2_table_index(l2_table, index); } -/* SW E-Switch L2 Table management */ -static int l2_table_addr_add(struct mlx5_eswitch *esw, - u8 mac[ETH_ALEN], u32 vport) +/* E-Switch FDB flow steering */ +struct dest_node { + struct list_head list; + struct mlx5_flow_destination dest; +}; + +static int _mlx5_flow_rule_apply(struct mlx5_flow_rule *fr) { - struct hlist_head *hash; - struct mlx5_uc_l2addr *addr; + bool was_valid = fr->valid; + struct dest_node *dest_n; + u32 dest_list_size = 0; + void *in_match_value; + u32 *flow_context; + u32 flow_index; int err; + int i; + + if (list_empty(&fr->dest_list)) { + if (fr->valid) + mlx5_del_flow_table_entry(fr->ft, fr->fi); + fr->valid = false; + return 0; + } + + list_for_each_entry(dest_n, &fr->dest_list, list) + dest_list_size++; - hash = esw->l2_table.l2_hash; - addr = l2addr_hash_find(hash, mac, struct mlx5_uc_l2addr); - if (addr) { + flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + + MLX5_ST_SZ_BYTES(dest_format_struct) * + dest_list_size); + if (!flow_context) + return -ENOMEM; + + MLX5_SET(flow_context, flow_context, flow_tag, fr->flow_tag); + MLX5_SET(flow_context, flow_context, action, fr->action); + MLX5_SET(flow_context, flow_context, destination_list_size, + dest_list_size); + + i = 0; + list_for_each_entry(dest_n, &fr->dest_list, list) { + void *dest_addr = MLX5_ADDR_OF(flow_context, flow_context, + destination[i++]); + + MLX5_SET(dest_format_struct, dest_addr, destination_type, + dest_n->dest.type); + MLX5_SET(dest_format_struct, dest_addr, destination_id, + dest_n->dest.vport_num); + } + + in_match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); + memcpy(in_match_value, fr->match_value, MLX5_ST_SZ_BYTES(fte_match_param)); + + err = mlx5_add_flow_table_entry(fr->ft, fr->match_criteria_enable, + fr->match_criteria, flow_context, + &flow_index); + if (!err) { + if (was_valid) + mlx5_del_flow_table_entry(fr->ft, fr->fi); + fr->fi = flow_index; + fr->valid = true; + } + kfree(flow_context); + return err; +} + +static int mlx5_flow_rule_add_dest(struct mlx5_flow_rule *fr, + struct mlx5_flow_destination *new_dest) +{ + struct dest_node *dest_n; + int err; + + dest_n = kzalloc(sizeof(*dest_n), GFP_KERNEL); + if (!dest_n) + return -ENOMEM; + + memcpy(&dest_n->dest, new_dest, sizeof(dest_n->dest)); + mutex_lock(&fr->mutex); + list_add(&dest_n->list, &fr->dest_list); + err = _mlx5_flow_rule_apply(fr); + if (err) { + list_del(&dest_n->list); + kfree(dest_n); + } + mutex_unlock(&fr->mutex); + return err; +} + +static int mlx5_flow_rule_del_dest(struct mlx5_flow_rule *fr, + struct mlx5_flow_destination *dest) +{ + struct dest_node *dest_n; + struct dest_node *n; + int err; + + mutex_lock(&fr->mutex); + list_for_each_entry_safe(dest_n, n, &fr->dest_list, list) { + if (dest->vport_num == dest_n->dest.vport_num) + goto found; + } + mutex_unlock(&fr->mutex); + return -ENOENT; + +found: + list_del(&dest_n->list); + err = _mlx5_flow_rule_apply(fr); + mutex_unlock(&fr->mutex); + kfree(dest_n); + + return err; +} + +static struct mlx5_flow_rule *find_fr(struct mlx5_eswitch *esw, + u8 match_criteria_enable, + u32 *match_value) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *dmac_v; + + dmac_v = MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.dmac_47_16); + + /* UNICAST FULL MATCH */ + if (!is_multicast_ether_addr(dmac_v)) + return NULL; + + /* MULTICAST FULL MATCH */ + esw_mc = l2addr_hash_find(hash, dmac_v, struct esw_mc_addr); + + return esw_mc ? esw_mc->uplink_rule : NULL; +} + +static struct mlx5_flow_rule *alloc_fr(void *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag) +{ + struct mlx5_flow_rule *fr = kzalloc(sizeof(*fr), GFP_KERNEL); + + if (!fr) + return NULL; + + fr->match_criteria = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + fr->match_value = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!fr->match_criteria || !fr->match_value) { + kfree(fr->match_criteria); + kfree(fr->match_value); + kfree(fr); + return NULL; + } + + memcpy(fr->match_criteria, match_criteria, MLX5_ST_SZ_BYTES(fte_match_param)); + memcpy(fr->match_value, match_value, MLX5_ST_SZ_BYTES(fte_match_param)); + fr->match_criteria_enable = match_criteria_enable; + fr->flow_tag = flow_tag; + fr->action = action; + + mutex_init(&fr->mutex); + INIT_LIST_HEAD(&fr->dest_list); + atomic_set(&fr->refcount, 0); + fr->ft = ft; + return fr; +} + +static void deref_fr(struct mlx5_flow_rule *fr) +{ + if (!atomic_dec_and_test(&fr->refcount)) + return; + + kfree(fr->match_criteria); + kfree(fr->match_value); + kfree(fr); +} + +static struct mlx5_flow_rule * +mlx5_add_flow_rule(struct mlx5_eswitch *esw, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *fr; + int err; + + fr = find_fr(esw, match_criteria_enable, match_value); + fr = fr ? fr : alloc_fr(esw->fdb_table.fdb, match_criteria_enable, match_criteria, + match_value, action, flow_tag); + if (!fr) + return NULL; + + atomic_inc(&fr->refcount); + + err = mlx5_flow_rule_add_dest(fr, dest); + if (err) { + deref_fr(fr); + return NULL; + } + + return fr; +} + +static void mlx5_del_flow_rule(struct mlx5_flow_rule *fr, u32 vport) +{ + struct mlx5_flow_destination dest; + + dest.vport_num = vport; + mlx5_flow_rule_del_dest(fr, &dest); + deref_fr(fr); +} + +/* E-Switch FDB */ +static struct mlx5_flow_rule * +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +{ + int match_header = MLX5_MATCH_OUTER_HEADERS; + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule = NULL; + u32 *match_v; + u32 *match_c; + u8 *dmac_v; + u8 *dmac_c; + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + pr_warn("FDB: Failed to alloc match parameters\n"); + goto out; + } + dmac_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers.dmac_47_16); + dmac_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers.dmac_47_16); + + ether_addr_copy(dmac_v, mac); + /* Match criteria mask */ + memset(dmac_c, 0xff, 6); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = vport; + + esw_debug(esw->dev, + "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", + dmac_v, dmac_c, vport); + flow_rule = + mlx5_add_flow_rule(esw, + match_header, + match_c, + match_v, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR_OR_NULL(flow_rule)) { + pr_warn( + "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", + dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); + flow_rule = NULL; + } +out: + kfree(match_v); + kfree(match_c); + return flow_rule; +} + +static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_table_group g; + struct mlx5_flow_table *fdb; + u8 *dmac; + + esw_debug(dev, "Create FDB log_max_size(%d)\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + memset(&g, 0, sizeof(g)); + /* UC MC Full match rules*/ + g.log_sz = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); + g.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g.match_criteria, + outer_headers.dmac_47_16); + /* Match criteria mask */ + memset(dmac, 0xff, 6); + + fdb = mlx5_create_flow_table(dev, 0, + MLX5_FLOW_TABLE_TYPE_ESWITCH, + 1, &g); + if (fdb) + esw_debug(dev, "ESW: FDB Table created fdb->id %d\n", mlx5_get_flow_table_id(fdb)); + else + esw_warn(dev, "ESW: Failed to create FDB Table\n"); + + esw->fdb_table.fdb = fdb; + return fdb ? 0 : -ENOMEM; +} + +static void esw_destroy_fdb_table(struct mlx5_eswitch *esw) +{ + if (!esw->fdb_table.fdb) + return; + + esw_debug(esw->dev, "Destroy FDB Table fdb(%d)\n", + mlx5_get_flow_table_id(esw->fdb_table.fdb)); + mlx5_destroy_flow_table(esw->fdb_table.fdb); + esw->fdb_table.fdb = NULL; +} + +/* E-Switch vport UC/MC lists management */ +typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, + struct vport_addr *vaddr); + +static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->l2_table.l2_hash; + struct esw_uc_addr *esw_uc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + int err; + + esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); + if (esw_uc) { esw_warn(esw->dev, "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n", - mac, vport, addr->vport); + mac, vport, esw_uc->vport); return -EEXIST; } - addr = l2addr_hash_add(hash, mac, struct mlx5_uc_l2addr, - GFP_KERNEL); - if (!addr) + esw_uc = l2addr_hash_add(hash, mac, struct esw_uc_addr, GFP_KERNEL); + if (!esw_uc) return -ENOMEM; + esw_uc->vport = vport; - addr->vport = vport; - addr->action = MLX5_ACTION_NONE; - err = set_l2_table_entry(esw->dev, mac, 0, 0, &addr->table_index); + err = set_l2_table_entry(esw->dev, mac, 0, 0, &esw_uc->table_index); if (err) - l2addr_hash_del(addr); - else - esw_debug(esw->dev, "\tADDED L2 MAC: vport[%d] %pM index:%d\n", - vport, addr->node.addr, addr->table_index); + goto abort; + + if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + + esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n", + vport, mac, esw_uc->table_index, vaddr->flow_rule); + return err; +abort: + l2addr_hash_del(esw_uc); return err; } -static int l2_table_addr_del(struct mlx5_eswitch *esw, - u8 mac[ETH_ALEN], u32 vport) +static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { - struct hlist_head *hash; - struct mlx5_uc_l2addr *addr; + struct hlist_head *hash = esw->l2_table.l2_hash; + struct esw_uc_addr *esw_uc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); + if (!esw_uc || esw_uc->vport != vport) { + esw_debug(esw->dev, + "MAC(%pM) doesn't belong to vport (%d)\n", + mac, vport); + return -EINVAL; + } + esw_debug(esw->dev, "\tDELETE UC MAC: vport[%d] %pM index:%d fr(%p)\n", + vport, mac, esw_uc->table_index, vaddr->flow_rule); + + del_l2_table_entry(esw->dev, esw_uc->table_index); + + if (vaddr->flow_rule) + mlx5_del_flow_rule(vaddr->flow_rule, vport); + vaddr->flow_rule = NULL; + + l2addr_hash_del(esw_uc); + return 0; +} + +static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + if (!esw->fdb_table.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (esw_mc) + goto add; + + esw_mc = l2addr_hash_add(hash, mac, struct esw_mc_addr, GFP_KERNEL); + if (!esw_mc) + return -ENOMEM; + + esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ + esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT); +add: + esw_mc->refcnt++; + /* Forward MC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + esw_debug(esw->dev, + "\tADDED MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, + esw_mc->refcnt, esw_mc->uplink_rule); + return 0; +} + +static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; - hash = esw->l2_table.l2_hash; - addr = l2addr_hash_find(hash, mac, struct mlx5_uc_l2addr); - if (!addr || addr->vport != vport) { - esw_warn(esw->dev, "MAC(%pM) doesn't belong to vport (%d)\n", + if (!esw->fdb_table.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (!esw_mc) { + esw_warn(esw->dev, + "Failed to find eswitch MC addr for MAC(%pM) vport(%d)", mac, vport); return -EINVAL; } + esw_debug(esw->dev, + "\tDELETE MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, esw_mc->refcnt, + esw_mc->uplink_rule); - esw_debug(esw->dev, "\tDELETE L2 MAC: vport[%d] %pM index:%d\n", - vport, addr->node.addr, addr->table_index); - del_l2_table_entry(esw->dev, addr->table_index); - l2addr_hash_del(addr); + if (vaddr->flow_rule) + mlx5_del_flow_rule(vaddr->flow_rule, vport); + vaddr->flow_rule = NULL; + + if (--esw_mc->refcnt) + return 0; + + if (esw_mc->uplink_rule) + mlx5_del_flow_rule(esw_mc->uplink_rule, UPLINK_VPORT); + + l2addr_hash_del(esw_mc); return 0; } -/* E-Switch vport uc list management */ - -/* Apply vport uc list to HW l2 table */ -static void esw_apply_vport_uc_list(struct mlx5_eswitch *esw, - u32 vport_num) +/* Apply vport UC/MC list to HW l2 table and FDB table */ +static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) { struct mlx5_vport *vport = &esw->vports[vport_num]; - struct mlx5_vport_addr *addr; + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; + vport_addr_action vport_addr_add; + vport_addr_action vport_addr_del; + struct vport_addr *addr; struct l2addr_node *node; struct hlist_head *hash; struct hlist_node *tmp; int hi; - hash = vport->uc_list; + vport_addr_add = is_uc ? esw_add_uc_addr : + esw_add_mc_addr; + vport_addr_del = is_uc ? esw_del_uc_addr : + esw_del_mc_addr; + + hash = is_uc ? vport->uc_list : vport->mc_list; for_each_l2hash_node(node, tmp, hash, hi) { - addr = container_of(node, struct mlx5_vport_addr, node); + addr = container_of(node, struct vport_addr, node); switch (addr->action) { case MLX5_ACTION_ADD: - l2_table_addr_add(esw, addr->node.addr, vport_num); + vport_addr_add(esw, addr); addr->action = MLX5_ACTION_NONE; break; case MLX5_ACTION_DEL: - l2_table_addr_del(esw, addr->node.addr, vport_num); + vport_addr_del(esw, addr); l2addr_hash_del(addr); break; } } } -/* Sync vport uc list from vport context */ -static void esw_update_vport_uc_list(struct mlx5_eswitch *esw, - u32 vport_num) +/* Sync vport UC/MC list from vport context */ +static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) { struct mlx5_vport *vport = &esw->vports[vport_num]; - struct mlx5_vport_addr *addr; - struct l2addr_node *node; + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; u8 (*mac_list)[ETH_ALEN]; + struct l2addr_node *node; + struct vport_addr *addr; struct hlist_head *hash; struct hlist_node *tmp; int size; @@ -294,40 +704,41 @@ static void esw_update_vport_uc_list(struct mlx5_eswitch *esw, int hi; int i; - size = MLX5_MAX_UC_PER_VPORT(esw->dev); + size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) : + MLX5_MAX_MC_PER_VPORT(esw->dev); mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL); if (!mac_list) return; - hash = vport->uc_list; + hash = is_uc ? vport->uc_list : vport->mc_list; for_each_l2hash_node(node, tmp, hash, hi) { - addr = container_of(node, struct mlx5_vport_addr, node); + addr = container_of(node, struct vport_addr, node); addr->action = MLX5_ACTION_DEL; } - err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, - MLX5_NVPRT_LIST_TYPE_UC, + err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type, mac_list, &size); if (err) return; - esw_debug(esw->dev, "vport[%d] context update UC list size (%d)\n", - vport_num, size); + esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n", + vport_num, is_uc ? "UC" : "MC", size); for (i = 0; i < size; i++) { - if (!is_valid_ether_addr(mac_list[i])) + if (is_uc && !is_valid_ether_addr(mac_list[i])) + continue; + + if (!is_uc && !is_multicast_ether_addr(mac_list[i])) continue; - addr = l2addr_hash_find(hash, mac_list[i], - struct mlx5_vport_addr); + addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr); if (addr) { addr->action = MLX5_ACTION_NONE; continue; } - addr = l2addr_hash_add(hash, mac_list[i], - struct mlx5_vport_addr, + addr = l2addr_hash_add(hash, mac_list[i], struct vport_addr, GFP_KERNEL); if (!addr) { esw_warn(esw->dev, @@ -335,6 +746,7 @@ static void esw_update_vport_uc_list(struct mlx5_eswitch *esw, mac_list[i], vport_num); continue; } + addr->vport = vport_num; addr->action = MLX5_ACTION_ADD; } kfree(mac_list); @@ -345,32 +757,80 @@ static void esw_vport_change_handler(struct work_struct *work) struct mlx5_vport *vport = container_of(work, struct mlx5_vport, vport_change_handler); struct mlx5_core_dev *dev = vport->dev; + struct mlx5_eswitch *esw = dev->priv.eswitch; u8 mac[ETH_ALEN]; mlx5_query_nic_vport_mac_address(dev, vport->vport, mac); + esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", + vport->vport, mac); + + if (vport->enabled_events & UC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_UC); + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_UC); + } - if (!is_valid_ether_addr(mac)) - goto out; - - esw_update_vport_uc_list(dev->priv.eswitch, vport->vport); - esw_apply_vport_uc_list(dev->priv.eswitch, vport->vport); + if (vport->enabled_events & MC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_MC); + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_MC); + } -out: + esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport); if (vport->enabled) arm_vport_context_events_cmd(dev, vport->vport, - UC_ADDR_CHANGE); + vport->enabled_events); } -static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num) +static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, + int enable_events) { struct mlx5_vport *vport = &esw->vports[vport_num]; unsigned long flags; + WARN_ON(vport->enabled); + + esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_AUTO); + + /* Sync with current vport context */ + vport->enabled_events = enable_events; + esw_vport_change_handler(&vport->vport_change_handler); + spin_lock_irqsave(&vport->lock, flags); vport->enabled = true; spin_unlock_irqrestore(&vport->lock, flags); - arm_vport_context_events_cmd(esw->dev, vport_num, UC_ADDR_CHANGE); + arm_vport_context_events_cmd(esw->dev, vport_num, enable_events); + + esw->enabled_vports++; + esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); +} + +static void esw_cleanup_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_node *tmp; + int hi; + + for_each_l2hash_node(node, tmp, vport->uc_list, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_UC); + + for_each_l2hash_node(node, tmp, vport->mc_list, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_MC); } static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) @@ -381,19 +841,82 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) if (!vport->enabled) return; + esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num); /* Mark this vport as disabled to discard new events */ spin_lock_irqsave(&vport->lock, flags); vport->enabled = false; + vport->enabled_events = 0; spin_unlock_irqrestore(&vport->lock, flags); + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_DOWN); /* Wait for current already scheduled events to complete */ flush_workqueue(esw->work_queue); - /* Disable events from this vport */ arm_vport_context_events_cmd(esw->dev, vport->vport, 0); + /* We don't assume VFs will cleanup after themselves */ + esw_cleanup_vport(esw, vport_num); + esw->enabled_vports--; } /* Public E-Switch API */ +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) +{ + int err; + int i; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { + esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); + return -ENOTSUPP; + } + + esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs); + + esw_disable_vport(esw, 0); + + err = esw_create_fdb_table(esw, nvfs + 1); + if (err) + goto abort; + + for (i = 0; i <= nvfs; i++) + esw_enable_vport(esw, i, SRIOV_VPORT_EVENTS); + + esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", + esw->enabled_vports); + return 0; + +abort: + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + return err; +} + +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) +{ + int i; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_info(esw->dev, "disable SRIOV: active vports(%d)\n", + esw->enabled_vports); + + for (i = 0; i < esw->total_vports; i++) + esw_disable_vport(esw, i); + + esw_destroy_fdb_table(esw); + + /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); +} + int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); @@ -439,7 +962,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } - esw->total_vports = total_vports; for (vport_num = 0; vport_num < total_vports; vport_num++) { struct mlx5_vport *vport = &esw->vports[vport_num]; @@ -450,9 +972,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) spin_lock_init(&vport->lock); } - dev->priv.eswitch = esw; + esw->total_vports = total_vports; + esw->enabled_vports = 0; - esw_enable_vport(esw, 0); + dev->priv.eswitch = esw; + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); /* VF Vports will be enabled when SRIOV is enabled */ return 0; abort: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 0c41f2657824..f222e336f34f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -86,10 +86,25 @@ struct l2addr_node { kfree(ptr); \ }) +struct mlx5_flow_rule { + void *ft; + u32 fi; + u8 match_criteria_enable; + u32 *match_criteria; + u32 *match_value; + u32 action; + u32 flow_tag; + bool valid; + atomic_t refcount; + struct mutex mutex; /* protect flow rule updates */ + struct list_head dest_list; +}; + struct mlx5_vport { struct mlx5_core_dev *dev; int vport; struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE]; struct work_struct vport_change_handler; /* This spinlock protects access to vport data, between @@ -98,6 +113,7 @@ struct mlx5_vport { */ spinlock_t lock; /* vport events sync */ bool enabled; + u16 enabled_events; }; struct mlx5_l2_table { @@ -106,17 +122,26 @@ struct mlx5_l2_table { unsigned long *bitmap; }; +struct mlx5_eswitch_fdb { + void *fdb; +}; + struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_l2_table l2_table; + struct mlx5_eswitch_fdb fdb_table; + struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct workqueue_struct *work_queue; struct mlx5_vport *vports; int total_vports; + int enabled_vports; }; /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs); +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 1649d5cf9e29..bee7da822dfe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -36,6 +36,7 @@ #include #include #include +#include #define DRIVER_NAME "mlx5_core" #define DRIVER_VERSION "3.0-1" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 19a43240e359..7b24386794f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -33,6 +33,9 @@ #include #include #include "mlx5_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs) { @@ -144,13 +147,15 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) mlx5_core_cleanup_vfs(dev); if (!num_vfs) { +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_disable_sriov(dev->priv.eswitch); +#endif kfree(sriov->vfs_ctx); sriov->vfs_ctx = NULL; if (!pci_vfs_assigned(pdev)) pci_disable_sriov(pdev); else pr_info("unloading PF driver while leaving orphan VFs\n"); - return 0; } @@ -161,6 +166,9 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) } mlx5_core_init_vfs(dev, num_vfs); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs); +#endif return num_vfs; } @@ -199,6 +207,10 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) sriov->enabled_vfs = cur_vfs; mlx5_core_init_vfs(dev, cur_vfs); +#ifdef CONFIG_MLX5_CORE_EN + if (cur_vfs) + mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs); +#endif enable_vfs(dev, cur_vfs); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index bce9caed1eed..88eb4490a8b3 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1074,6 +1074,12 @@ enum { VPORT_STATE_UP = 0x1, }; +enum { + MLX5_ESW_VPORT_ADMIN_STATE_DOWN = 0x0, + MLX5_ESW_VPORT_ADMIN_STATE_UP = 0x1, + MLX5_ESW_VPORT_ADMIN_STATE_AUTO = 0x2, +}; + enum { MLX5_L3_PROT_TYPE_IPV4 = 0, MLX5_L3_PROT_TYPE_IPV6 = 1, diff --git a/include/linux/mlx5/flow_table.h b/include/linux/mlx5/flow_table.h index 5f922c6d4fc2..0f2a15cf3317 100644 --- a/include/linux/mlx5/flow_table.h +++ b/include/linux/mlx5/flow_table.h @@ -41,6 +41,15 @@ struct mlx5_flow_table_group { u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; }; +struct mlx5_flow_destination { + enum mlx5_flow_destination_type type; + union { + u32 tir_num; + void *ft; + u32 vport_num; + }; +}; + void *mlx5_create_flow_table(struct mlx5_core_dev *dev, u8 level, u8 table_type, u16 num_groups, struct mlx5_flow_table_group *group); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ae7c08adba4a..a81b008738fd 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -827,9 +827,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_69[0x220]; }; -enum { - MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_FLOW_TABLE_ = 0x1, - MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_TIR = 0x2, +enum mlx5_flow_destination_type { + MLX5_FLOW_DESTINATION_TYPE_VPORT = 0x0, + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1, + MLX5_FLOW_DESTINATION_TYPE_TIR = 0x2, }; struct mlx5_ifc_dest_format_struct_bits { -- cgit v1.2.3-71-gd317 From d6666753c6e85834f1669c7b831cc2b7fc9e4390 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:22 +0200 Subject: net/mlx5: E-Switch, Introduce HCA cap and E-Switch vport context E-Switch vport context is unlike NIC vport context, managed by the E-Switch manager or vport_group_manager and not by the NIC(VF) driver. The E-Switch manager can access (read/modify) any of its vports E-Switch context. Currently E-Switch vport context includes only clietnt and server vlan insertion and striping data (for later support of VST mode). Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 11 ++++ include/linux/mlx5/device.h | 9 +++ include/linux/mlx5/mlx5_ifc.h | 90 ++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index bf6e3dfcef51..1c9f9a54a873 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -173,6 +173,17 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, vport_group_manager)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + return 0; } diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 88eb4490a8b3..7d3a85faefb7 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1145,6 +1145,7 @@ enum mlx5_cap_type { MLX5_CAP_EOIB_OFFLOADS, MLX5_CAP_FLOW_TABLE, MLX5_CAP_ESWITCH_FLOW_TABLE, + MLX5_CAP_ESWITCH, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1196,6 +1197,14 @@ enum mlx5_cap_type { #define MLX5_CAP_ESW_FLOWTABLE_FDB_MAX(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_nic_esw_fdb.cap) +#define MLX5_CAP_ESW(mdev, cap) \ + MLX5_GET(e_switch_cap, \ + mdev->hca_caps_cur[MLX5_CAP_ESWITCH], cap) + +#define MLX5_CAP_ESW_MAX(mdev, cap) \ + MLX5_GET(e_switch_cap, \ + mdev->hca_caps_max[MLX5_CAP_ESWITCH], cap) + #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a81b008738fd..f5d94495758a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -459,6 +459,17 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 reserved_1[0x7800]; }; +struct mlx5_ifc_e_switch_cap_bits { + u8 vport_svlan_strip[0x1]; + u8 vport_cvlan_strip[0x1]; + u8 vport_svlan_insert[0x1]; + u8 vport_cvlan_insert_if_not_exist[0x1]; + u8 vport_cvlan_insert_overwrite[0x1]; + u8 reserved_0[0x1b]; + + u8 reserved_1[0x7e0]; +}; + struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 csum_cap[0x1]; u8 vlan_cap[0x1]; @@ -1860,6 +1871,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_per_protocol_networking_offload_caps_bits per_protocol_networking_offload_caps; struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap; struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap; + struct mlx5_ifc_e_switch_cap_bits e_switch_cap; u8 reserved_0[0x8000]; }; @@ -2305,6 +2317,26 @@ struct mlx5_ifc_hca_vport_context_bits { u8 reserved_6[0xca0]; }; +struct mlx5_ifc_esw_vport_context_bits { + u8 reserved_0[0x3]; + u8 vport_svlan_strip[0x1]; + u8 vport_cvlan_strip[0x1]; + u8 vport_svlan_insert[0x1]; + u8 vport_cvlan_insert[0x2]; + u8 reserved_1[0x18]; + + u8 reserved_2[0x20]; + + u8 svlan_cfi[0x1]; + u8 svlan_pcp[0x3]; + u8 svlan_id[0xc]; + u8 cvlan_cfi[0x1]; + u8 cvlan_pcp[0x3]; + u8 cvlan_id[0xc]; + + u8 reserved_3[0x7a0]; +}; + enum { MLX5_EQC_STATUS_OK = 0x0, MLX5_EQC_STATUS_EQ_WRITE_FAILURE = 0xa, @@ -3743,6 +3775,64 @@ struct mlx5_ifc_query_flow_group_in_bits { u8 reserved_5[0x120]; }; +struct mlx5_ifc_query_esw_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_esw_vport_context_bits esw_vport_context; +}; + +struct mlx5_ifc_query_esw_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_modify_esw_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_esw_vport_context_fields_select_bits { + u8 reserved[0x1c]; + u8 vport_cvlan_insert[0x1]; + u8 vport_svlan_insert[0x1]; + u8 vport_cvlan_strip[0x1]; + u8 vport_svlan_strip[0x1]; +}; + +struct mlx5_ifc_modify_esw_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + struct mlx5_ifc_esw_vport_context_fields_select_bits field_select; + + struct mlx5_ifc_esw_vport_context_bits esw_vport_context; +}; + struct mlx5_ifc_query_eq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; -- cgit v1.2.3-71-gd317 From 2d1e0254ef8310e4f0756130a7ffc007ad1d58df Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 1 Dec 2015 14:55:21 +0000 Subject: pci_ids: add Netronome Systems vendor Add PCI vendor id for Netronome Systems. Signed-off-by: Jakub Kicinski Signed-off-by: Rolf Neugebauer Signed-off-by: David S. Miller --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d9ba49cedc5d..1acbefc4bbda 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2495,6 +2495,8 @@ #define PCI_DEVICE_ID_KORENIX_JETCARDF2 0x1700 #define PCI_DEVICE_ID_KORENIX_JETCARDF3 0x17ff +#define PCI_VENDOR_ID_NETRONOME 0x19ee + #define PCI_VENDOR_ID_QMI 0x1a32 #define PCI_VENDOR_ID_AZWAVE 0x1a3b -- cgit v1.2.3-71-gd317 From 80a19e338d458abb5a700df3fd00795c51361f06 Mon Sep 17 00:00:00 2001 From: Asias He Date: Wed, 2 Dec 2015 14:44:00 +0800 Subject: VSOCK: Introduce virtio-vsock-common.ko This module contains the common code and header files for the following virtio-vsock and virtio-vhost kernel modules. Signed-off-by: Asias He Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- include/linux/virtio_vsock.h | 209 +++++ include/uapi/linux/virtio_ids.h | 1 + include/uapi/linux/virtio_vsock.h | 89 +++ net/vmw_vsock/virtio_transport_common.c | 1272 +++++++++++++++++++++++++++++++ 4 files changed, 1571 insertions(+) create mode 100644 include/linux/virtio_vsock.h create mode 100644 include/uapi/linux/virtio_vsock.h create mode 100644 net/vmw_vsock/virtio_transport_common.c (limited to 'include/linux') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h new file mode 100644 index 000000000000..a5f3ecc038f7 --- /dev/null +++ b/include/linux/virtio_vsock.h @@ -0,0 +1,209 @@ +/* + * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so + * anyone can use the definitions to implement compatible drivers/servers: + * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright (C) Red Hat, Inc., 2013-2015 + * Copyright (C) Asias He , 2013 + * Copyright (C) Stefan Hajnoczi , 2015 + */ + +#ifndef _LINUX_VIRTIO_VSOCK_H +#define _LINUX_VIRTIO_VSOCK_H + +#include +#include +#include + +#define VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE 128 +#define VIRTIO_VSOCK_DEFAULT_BUF_SIZE (1024 * 256) +#define VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE (1024 * 256) +#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) +#define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL +#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) +#define VIRTIO_VSOCK_MAX_TX_BUF_SIZE (1024 * 1024 * 16) +#define VIRTIO_VSOCK_MAX_DGRAM_SIZE (1024 * 64) + +struct vsock_transport_recv_notify_data; +struct vsock_transport_send_notify_data; +struct sockaddr_vm; +struct vsock_sock; + +enum { + VSOCK_VQ_CTRL = 0, + VSOCK_VQ_RX = 1, /* for host to guest data */ + VSOCK_VQ_TX = 2, /* for guest to host data */ + VSOCK_VQ_MAX = 3, +}; + +/* virtio transport socket state */ +struct virtio_transport { + struct virtio_transport_pkt_ops *ops; + struct vsock_sock *vsk; + + u32 buf_size; + u32 buf_size_min; + u32 buf_size_max; + + struct mutex tx_lock; + struct mutex rx_lock; + + struct list_head rx_queue; + u32 rx_bytes; + + /* Protected by trans->tx_lock */ + u32 tx_cnt; + u32 buf_alloc; + u32 peer_fwd_cnt; + u32 peer_buf_alloc; + /* Protected by trans->rx_lock */ + u32 fwd_cnt; + + /* Protected by sk_lock */ + u16 dgram_id; + struct list_head incomplete_dgrams; /* dgram fragments */ +}; + +struct virtio_vsock_pkt { + struct virtio_vsock_hdr hdr; + struct virtio_transport *trans; + struct work_struct work; + struct list_head list; + void *buf; + u32 len; + u32 off; +}; + +struct virtio_vsock_pkt_info { + u32 remote_cid, remote_port; + struct msghdr *msg; + u32 pkt_len; + u16 type; + u16 op; + u32 flags; + u16 dgram_id; + u16 dgram_len; +}; + +struct virtio_transport_pkt_ops { + int (*send_pkt)(struct vsock_sock *vsk, + struct virtio_vsock_pkt_info *info); +}; + +void virtio_vsock_dumppkt(const char *func, + const struct virtio_vsock_pkt *pkt); + +struct sock * +virtio_transport_get_pending(struct sock *listener, + struct virtio_vsock_pkt *pkt); +struct virtio_vsock_pkt * +virtio_transport_alloc_pkt(struct vsock_sock *vsk, + struct virtio_vsock_pkt_info *info, + size_t len, + u32 src_cid, + u32 src_port, + u32 dst_cid, + u32 dst_port); +ssize_t +virtio_transport_stream_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len, + int type); +int +virtio_transport_dgram_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len, int flags); + +s64 virtio_transport_stream_has_data(struct vsock_sock *vsk); +s64 virtio_transport_stream_has_space(struct vsock_sock *vsk); + +int virtio_transport_do_socket_init(struct vsock_sock *vsk, + struct vsock_sock *psk); +u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk); +u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk); +u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk); +void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val); +void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val); +void virtio_transport_set_max_buffer_size(struct vsock_sock *vs, u64 val); +int +virtio_transport_notify_poll_in(struct vsock_sock *vsk, + size_t target, + bool *data_ready_now); +int +virtio_transport_notify_poll_out(struct vsock_sock *vsk, + size_t target, + bool *space_available_now); + +int virtio_transport_notify_recv_init(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data); +int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data); +int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data); +int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, + size_t target, ssize_t copied, bool data_read, + struct vsock_transport_recv_notify_data *data); +int virtio_transport_notify_send_init(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data); +int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data); +int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data); +int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, + ssize_t written, struct vsock_transport_send_notify_data *data); + +u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk); +bool virtio_transport_stream_is_active(struct vsock_sock *vsk); +bool virtio_transport_stream_allow(u32 cid, u32 port); +int virtio_transport_dgram_bind(struct vsock_sock *vsk, + struct sockaddr_vm *addr); +bool virtio_transport_dgram_allow(u32 cid, u32 port); + +int virtio_transport_connect(struct vsock_sock *vsk); + +int virtio_transport_shutdown(struct vsock_sock *vsk, int mode); + +void virtio_transport_release(struct vsock_sock *vsk); + +ssize_t +virtio_transport_stream_enqueue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len); +int +virtio_transport_dgram_enqueue(struct vsock_sock *vsk, + struct sockaddr_vm *remote_addr, + struct msghdr *msg, + size_t len); + +void virtio_transport_destruct(struct vsock_sock *vsk); + +void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt); +void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt); +void virtio_transport_inc_tx_pkt(struct virtio_vsock_pkt *pkt); +void virtio_transport_dec_tx_pkt(struct virtio_vsock_pkt *pkt); +u32 virtio_transport_get_credit(struct virtio_transport *trans, u32 wanted); +void virtio_transport_put_credit(struct virtio_transport *trans, u32 credit); +#endif /* _LINUX_VIRTIO_VSOCK_H */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 77925f587b15..16dcf5d06cd7 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -39,6 +39,7 @@ #define VIRTIO_ID_9P 9 /* 9p virtio console */ #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ #define VIRTIO_ID_CAIF 12 /* Virtio caif */ +#define VIRTIO_ID_VSOCK 13 /* virtio vsock transport */ #define VIRTIO_ID_GPU 16 /* virtio GPU */ #define VIRTIO_ID_INPUT 18 /* virtio input */ diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h new file mode 100644 index 000000000000..8cf9b5682628 --- /dev/null +++ b/include/uapi/linux/virtio_vsock.h @@ -0,0 +1,89 @@ +/* + * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so + * anyone can use the definitions to implement compatible drivers/servers: + * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright (C) Red Hat, Inc., 2013-2015 + * Copyright (C) Asias He , 2013 + * Copyright (C) Stefan Hajnoczi , 2015 + */ + +#ifndef _UAPI_LINUX_VIRTIO_VSOCK_H +#define _UAPI_LINUX_VIRTIO_VOSCK_H + +#include +#include +#include + +struct virtio_vsock_config { + __le32 guest_cid; + __le32 max_virtqueue_pairs; +}; + +struct virtio_vsock_hdr { + __le32 src_cid; + __le32 src_port; + __le32 dst_cid; + __le32 dst_port; + __le32 len; + __le16 type; /* enum virtio_vsock_type */ + __le16 op; /* enum virtio_vsock_op */ + __le32 flags; + __le32 buf_alloc; + __le32 fwd_cnt; +}; + +enum virtio_vsock_type { + VIRTIO_VSOCK_TYPE_STREAM = 1, + VIRTIO_VSOCK_TYPE_DGRAM = 2, +}; + +enum virtio_vsock_op { + VIRTIO_VSOCK_OP_INVALID = 0, + + /* Connect operations */ + VIRTIO_VSOCK_OP_REQUEST = 1, + VIRTIO_VSOCK_OP_RESPONSE = 2, + VIRTIO_VSOCK_OP_ACK = 3, + VIRTIO_VSOCK_OP_RST = 4, + VIRTIO_VSOCK_OP_SHUTDOWN = 5, + + /* To send payload */ + VIRTIO_VSOCK_OP_RW = 6, + + /* Tell the peer our credit info */ + VIRTIO_VSOCK_OP_CREDIT_UPDATE = 7, + /* Request the peer to send the credit info to us */ + VIRTIO_VSOCK_OP_CREDIT_REQUEST = 8, +}; + +/* VIRTIO_VSOCK_OP_SHUTDOWN flags values */ +enum virtio_vsock_shutdown { + VIRTIO_VSOCK_SHUTDOWN_RCV = 1, + VIRTIO_VSOCK_SHUTDOWN_SEND = 2, +}; + +#endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */ diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c new file mode 100644 index 000000000000..28f790da6f15 --- /dev/null +++ b/net/vmw_vsock/virtio_transport_common.c @@ -0,0 +1,1272 @@ +/* + * common code for virtio vsock + * + * Copyright (C) 2013-2015 Red Hat, Inc. + * Author: Asias He + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define COOKIEBITS 24 +#define COOKIEMASK (((u32)1 << COOKIEBITS) - 1) +#define VSOCK_TIMEOUT_INIT 4 + +#define SHA_MESSAGE_WORDS 16 +#define SHA_VSOCK_WORDS 5 + +static u32 vsockcookie_secret[2][SHA_MESSAGE_WORDS - SHA_VSOCK_WORDS + + SHA_DIGEST_WORDS]; + +static DEFINE_PER_CPU(__u32[SHA_MESSAGE_WORDS + SHA_DIGEST_WORDS + + SHA_WORKSPACE_WORDS], vsock_cookie_scratch); + +static u32 cookie_hash(u32 saddr, u32 daddr, u16 sport, u16 dport, + u32 count, int c) +{ + __u32 *tmp = this_cpu_ptr(vsock_cookie_scratch); + + memcpy(tmp + SHA_VSOCK_WORDS, vsockcookie_secret[c], + sizeof(vsockcookie_secret[c])); + tmp[0] = saddr; + tmp[1] = daddr; + tmp[2] = sport; + tmp[3] = dport; + tmp[4] = count; + sha_transform(tmp + SHA_MESSAGE_WORDS, (__u8 *)tmp, + tmp + SHA_MESSAGE_WORDS + SHA_DIGEST_WORDS); + + return tmp[17]; +} + +static u32 +virtio_vsock_secure_cookie(u32 saddr, u32 daddr, u32 sport, u32 dport, + u32 count) +{ + u32 h1, h2; + + h1 = cookie_hash(saddr, daddr, sport, dport, 0, 0); + h2 = cookie_hash(saddr, daddr, sport, dport, count, 1); + + return h1 + (count << COOKIEBITS) + (h2 & COOKIEMASK); +} + +static u32 +virtio_vsock_check_cookie(u32 saddr, u32 daddr, u32 sport, u32 dport, + u32 count, u32 cookie, u32 maxdiff) +{ + u32 diff; + u32 ret; + + cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0); + + diff = (count - (cookie >> COOKIEBITS)) & ((u32)-1 >> COOKIEBITS); + pr_debug("%s: diff=%x\n", __func__, diff); + if (diff >= maxdiff) + return (u32)-1; + + ret = (cookie - + cookie_hash(saddr, daddr, sport, dport, count - diff, 1)) + & COOKIEMASK; + pr_debug("%s: ret=%x\n", __func__, diff); + + return ret; +} + +void virtio_vsock_dumppkt(const char *func, const struct virtio_vsock_pkt *pkt) +{ + pr_debug("%s: pkt=%p, op=%d, len=%d, %d:%d---%d:%d, len=%d\n", + func, pkt, + le16_to_cpu(pkt->hdr.op), + le32_to_cpu(pkt->hdr.len), + le32_to_cpu(pkt->hdr.src_cid), + le32_to_cpu(pkt->hdr.src_port), + le32_to_cpu(pkt->hdr.dst_cid), + le32_to_cpu(pkt->hdr.dst_port), + pkt->len); +} +EXPORT_SYMBOL_GPL(virtio_vsock_dumppkt); + +struct virtio_vsock_pkt * +virtio_transport_alloc_pkt(struct vsock_sock *vsk, + struct virtio_vsock_pkt_info *info, + size_t len, + u32 src_cid, + u32 src_port, + u32 dst_cid, + u32 dst_port) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt *pkt; + int err; + + BUG_ON(!trans); + + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return NULL; + + pkt->hdr.type = cpu_to_le16(info->type); + pkt->hdr.op = cpu_to_le16(info->op); + pkt->hdr.src_cid = cpu_to_le32(src_cid); + pkt->hdr.src_port = cpu_to_le32(src_port); + pkt->hdr.dst_cid = cpu_to_le32(dst_cid); + pkt->hdr.dst_port = cpu_to_le32(dst_port); + pkt->hdr.flags = cpu_to_le32(info->flags); + pkt->len = len; + pkt->trans = trans; + if (info->type == VIRTIO_VSOCK_TYPE_DGRAM) + pkt->hdr.len = cpu_to_le32(len + (info->dgram_len << 16)); + else if (info->type == VIRTIO_VSOCK_TYPE_STREAM) + pkt->hdr.len = cpu_to_le32(len); + + if (info->msg && len > 0) { + pkt->buf = kmalloc(len, GFP_KERNEL); + if (!pkt->buf) + goto out_pkt; + err = memcpy_from_msg(pkt->buf, info->msg, len); + if (err) + goto out; + } + + return pkt; + +out: + kfree(pkt->buf); +out_pkt: + kfree(pkt); + return NULL; +} +EXPORT_SYMBOL_GPL(virtio_transport_alloc_pkt); + +struct sock * +virtio_transport_get_pending(struct sock *listener, + struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vpending; + struct sockaddr_vm src; + struct sockaddr_vm dst; + struct sock *pending; + + vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); + vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); + + vlistener = vsock_sk(listener); + list_for_each_entry(vpending, &vlistener->pending_links, + pending_links) { + if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && + vsock_addr_equals_addr(&dst, &vpending->local_addr)) { + pending = sk_vsock(vpending); + sock_hold(pending); + return pending; + } + } + + return NULL; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_pending); + +static void virtio_transport_inc_rx_pkt(struct virtio_vsock_pkt *pkt) +{ + pkt->trans->rx_bytes += pkt->len; +} + +static void virtio_transport_dec_rx_pkt(struct virtio_vsock_pkt *pkt) +{ + pkt->trans->rx_bytes -= pkt->len; + pkt->trans->fwd_cnt += pkt->len; +} + +void virtio_transport_inc_tx_pkt(struct virtio_vsock_pkt *pkt) +{ + mutex_lock(&pkt->trans->tx_lock); + pkt->hdr.fwd_cnt = cpu_to_le32(pkt->trans->fwd_cnt); + pkt->hdr.buf_alloc = cpu_to_le32(pkt->trans->buf_alloc); + mutex_unlock(&pkt->trans->tx_lock); +} +EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); + +void virtio_transport_dec_tx_pkt(struct virtio_vsock_pkt *pkt) +{ +} +EXPORT_SYMBOL_GPL(virtio_transport_dec_tx_pkt); + +u32 virtio_transport_get_credit(struct virtio_transport *trans, u32 credit) +{ + u32 ret; + + mutex_lock(&trans->tx_lock); + ret = trans->peer_buf_alloc - (trans->tx_cnt - trans->peer_fwd_cnt); + if (ret > credit) + ret = credit; + trans->tx_cnt += ret; + mutex_unlock(&trans->tx_lock); + + pr_debug("%s: ret=%d, buf_alloc=%d, peer_buf_alloc=%d," + "tx_cnt=%d, fwd_cnt=%d, peer_fwd_cnt=%d\n", __func__, + ret, trans->buf_alloc, trans->peer_buf_alloc, + trans->tx_cnt, trans->fwd_cnt, trans->peer_fwd_cnt); + + return ret; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_credit); + +void virtio_transport_put_credit(struct virtio_transport *trans, u32 credit) +{ + mutex_lock(&trans->tx_lock); + trans->tx_cnt -= credit; + mutex_unlock(&trans->tx_lock); +} +EXPORT_SYMBOL_GPL(virtio_transport_put_credit); + +static int virtio_transport_send_credit_update(struct vsock_sock *vsk, int type, struct virtio_vsock_hdr *hdr) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, + .type = type, + }; + + if (hdr && type == VIRTIO_VSOCK_TYPE_DGRAM) { + info.remote_cid = le32_to_cpu(hdr->src_cid); + info.remote_port = le32_to_cpu(hdr->src_port); + } + + pr_debug("%s: sk=%p send_credit_update\n", __func__, vsk); + return trans->ops->send_pkt(vsk, &info); +} + +static int virtio_transport_send_credit_request(struct vsock_sock *vsk, int type) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_CREDIT_REQUEST, + .type = type, + }; + + pr_debug("%s: sk=%p send_credit_request\n", __func__, vsk); + return trans->ops->send_pkt(vsk, &info); +} + +static ssize_t +virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt *pkt; + size_t bytes, total = 0; + int err = -EFAULT; + + mutex_lock(&trans->rx_lock); + while (total < len && trans->rx_bytes > 0 && + !list_empty(&trans->rx_queue)) { + pkt = list_first_entry(&trans->rx_queue, + struct virtio_vsock_pkt, list); + + bytes = len - total; + if (bytes > pkt->len - pkt->off) + bytes = pkt->len - pkt->off; + + err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); + if (err) + goto out; + total += bytes; + pkt->off += bytes; + if (pkt->off == pkt->len) { + virtio_transport_dec_rx_pkt(pkt); + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } + } + mutex_unlock(&trans->rx_lock); + + /* Send a credit pkt to peer */ + virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, + NULL); + + return total; + +out: + mutex_unlock(&trans->rx_lock); + if (total) + err = total; + return err; +} + +ssize_t +virtio_transport_stream_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len, int flags) +{ + if (flags & MSG_PEEK) + return -EOPNOTSUPP; + + return virtio_transport_stream_do_dequeue(vsk, msg, len); +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); + +struct dgram_skb { + struct list_head list; + struct sk_buff *skb; + u16 id; +}; + +static struct dgram_skb *dgram_id_to_skb(struct virtio_transport *trans, + u16 id) +{ + struct dgram_skb *dgram_skb; + + list_for_each_entry(dgram_skb, &trans->incomplete_dgrams, list) { + if (dgram_skb->id == id) + return dgram_skb; + } + + return NULL; +} + +static void +virtio_transport_recv_dgram(struct sock *sk, + struct virtio_vsock_pkt *pkt) +{ + struct sk_buff *skb = NULL; + struct vsock_sock *vsk; + struct virtio_transport *trans; + size_t size; + u16 dgram_id, pkt_off, dgram_len, pkt_len; + u32 flags, len; + struct dgram_skb *dgram_skb; + + vsk = vsock_sk(sk); + trans = vsk->trans; + + /* len: dgram_len | pkt_len */ + len = le32_to_cpu(pkt->hdr.len); + dgram_len = len >> 16; + pkt_len = len & 0xFFFF; + + /* flags: dgram_id | pkt_off */ + flags = le32_to_cpu(pkt->hdr.flags); + dgram_id = flags >> 16; + pkt_off = flags & 0xFFFF; + + pr_debug("%s: dgram_len=%d, pkt_len=%d, id=%d, off=%d\n", __func__, + dgram_len, pkt_len, dgram_id, pkt_off); + + dgram_skb = dgram_id_to_skb(trans, dgram_id); + if (dgram_skb) { + /* This pkt is for a existing dgram */ + skb = dgram_skb->skb; + pr_debug("%s:found skb\n", __func__); + } + + /* Packet payload must be within datagram bounds */ + if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) + goto drop; + if (pkt_len > dgram_len) + goto drop; + if (pkt_off > dgram_len) + goto drop; + if (dgram_len - pkt_off < pkt_len) + goto drop; + + if (!skb) { + /* This pkt is for a new dgram */ + pr_debug("%s:create skb\n", __func__); + + size = sizeof(pkt->hdr) + dgram_len; + /* Attach the packet to the socket's receive queue as an sk_buff. */ + dgram_skb = kzalloc(sizeof(struct dgram_skb), GFP_ATOMIC); + if (!dgram_skb) + goto drop; + + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) { + kfree(dgram_skb); + dgram_skb = NULL; + goto drop; + } + dgram_skb->id = dgram_id; + dgram_skb->skb = skb; + list_add_tail(&dgram_skb->list, &trans->incomplete_dgrams); + + /* sk_receive_skb() will do a sock_put(), so hold here. */ + sock_hold(sk); + skb_put(skb, size); + memcpy(skb->data, &pkt->hdr, sizeof(pkt->hdr)); + } + + memcpy(skb->data + sizeof(pkt->hdr) + pkt_off, pkt->buf, pkt_len); + + pr_debug("%s:C, off=%d, pkt_len=%d, dgram_len=%d\n", __func__, + pkt_off, pkt_len, dgram_len); + + /* We are done with this dgram */ + if (pkt_off + pkt_len == dgram_len) { + pr_debug("%s:dgram_id=%d is done\n", __func__, dgram_id); + list_del(&dgram_skb->list); + kfree(dgram_skb); + sk_receive_skb(sk, skb, 0); + } + virtio_transport_free_pkt(pkt); + return; + +drop: + if (dgram_skb) { + list_del(&dgram_skb->list); + kfree(dgram_skb); + kfree_skb(skb); + sock_put(sk); + } + virtio_transport_free_pkt(pkt); +} + +int +virtio_transport_dgram_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len, int flags) +{ + struct virtio_vsock_hdr *hdr; + struct sk_buff *skb; + int noblock; + int err; + int dgram_len; + + noblock = flags & MSG_DONTWAIT; + + if (flags & MSG_OOB || flags & MSG_ERRQUEUE) + return -EOPNOTSUPP; + + /* Retrieve the head sk_buff from the socket's receive queue. */ + err = 0; + skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); + if (err) + return err; + if (!skb) + return -EAGAIN; + + hdr = (struct virtio_vsock_hdr *)skb->data; + if (!hdr) + goto out; + + dgram_len = le32_to_cpu(hdr->len) >> 16; + /* Place the datagram payload in the user's iovec. */ + err = skb_copy_datagram_msg(skb, sizeof(*hdr), msg, dgram_len); + if (err) + goto out; + + if (msg->msg_name) { + /* Provide the address of the sender. */ + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); + vsock_addr_init(vm_addr, le32_to_cpu(hdr->src_cid), le32_to_cpu(hdr->src_port)); + msg->msg_namelen = sizeof(*vm_addr); + } + err = dgram_len; + + /* Send a credit pkt to peer */ + virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_DGRAM, hdr); + + pr_debug("%s:done, recved =%d\n", __func__, dgram_len); +out: + skb_free_datagram(&vsk->sk, skb); + return err; +} +EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); + +s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + s64 bytes; + + mutex_lock(&trans->rx_lock); + bytes = trans->rx_bytes; + mutex_unlock(&trans->rx_lock); + + return bytes; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); + +static s64 virtio_transport_has_space(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + s64 bytes; + + bytes = trans->peer_buf_alloc - (trans->tx_cnt - trans->peer_fwd_cnt); + if (bytes < 0) + bytes = 0; + + return bytes; +} + +s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + s64 bytes; + + mutex_lock(&trans->tx_lock); + bytes = virtio_transport_has_space(vsk); + mutex_unlock(&trans->tx_lock); + + pr_debug("%s: bytes=%lld\n", __func__, bytes); + + return bytes; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); + +int virtio_transport_do_socket_init(struct vsock_sock *vsk, + struct vsock_sock *psk) +{ + struct virtio_transport *trans; + + trans = kzalloc(sizeof(*trans), GFP_KERNEL); + if (!trans) + return -ENOMEM; + + vsk->trans = trans; + trans->vsk = vsk; + if (psk) { + struct virtio_transport *ptrans = psk->trans; + trans->buf_size = ptrans->buf_size; + trans->buf_size_min = ptrans->buf_size_min; + trans->buf_size_max = ptrans->buf_size_max; + trans->peer_buf_alloc = ptrans->peer_buf_alloc; + } else { + trans->buf_size = VIRTIO_VSOCK_DEFAULT_BUF_SIZE; + trans->buf_size_min = VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE; + trans->buf_size_max = VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE; + } + + trans->buf_alloc = trans->buf_size; + + pr_debug("%s: trans->buf_alloc=%d\n", __func__, trans->buf_alloc); + + mutex_init(&trans->rx_lock); + mutex_init(&trans->tx_lock); + INIT_LIST_HEAD(&trans->rx_queue); + INIT_LIST_HEAD(&trans->incomplete_dgrams); + + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); + +u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + + return trans->buf_size; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_buffer_size); + +u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + + return trans->buf_size_min; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_min_buffer_size); + +u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + + return trans->buf_size_max; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_max_buffer_size); + +void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) +{ + struct virtio_transport *trans = vsk->trans; + + if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) + val = VIRTIO_VSOCK_MAX_BUF_SIZE; + if (val < trans->buf_size_min) + trans->buf_size_min = val; + if (val > trans->buf_size_max) + trans->buf_size_max = val; + trans->buf_size = val; + trans->buf_alloc = val; +} +EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size); + +void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val) +{ + struct virtio_transport *trans = vsk->trans; + + if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) + val = VIRTIO_VSOCK_MAX_BUF_SIZE; + if (val > trans->buf_size) + trans->buf_size = val; + trans->buf_size_min = val; +} +EXPORT_SYMBOL_GPL(virtio_transport_set_min_buffer_size); + +void virtio_transport_set_max_buffer_size(struct vsock_sock *vsk, u64 val) +{ + struct virtio_transport *trans = vsk->trans; + + if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) + val = VIRTIO_VSOCK_MAX_BUF_SIZE; + if (val < trans->buf_size) + trans->buf_size = val; + trans->buf_size_max = val; +} +EXPORT_SYMBOL_GPL(virtio_transport_set_max_buffer_size); + +int +virtio_transport_notify_poll_in(struct vsock_sock *vsk, + size_t target, + bool *data_ready_now) +{ + if (vsock_stream_has_data(vsk)) + *data_ready_now = true; + else + *data_ready_now = false; + + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); + +int +virtio_transport_notify_poll_out(struct vsock_sock *vsk, + size_t target, + bool *space_avail_now) +{ + s64 free_space; + + free_space = vsock_stream_has_space(vsk); + if (free_space > 0) + *space_avail_now = true; + else if (free_space == 0) + *space_avail_now = false; + + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); + +int virtio_transport_notify_recv_init(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); + +int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); + +int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); + +int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, + size_t target, ssize_t copied, bool data_read, + struct vsock_transport_recv_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); + +int virtio_transport_notify_send_init(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); + +int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); + +int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); + +int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, + ssize_t written, struct vsock_transport_send_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); + +u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + + return trans->buf_size; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); + +bool virtio_transport_stream_is_active(struct vsock_sock *vsk) +{ + return true; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); + +bool virtio_transport_stream_allow(u32 cid, u32 port) +{ + return true; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); + +int virtio_transport_dgram_bind(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + return vsock_bind_dgram_generic(vsk, addr); +} +EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); + +bool virtio_transport_dgram_allow(u32 cid, u32 port) +{ + return true; +} +EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); + +int virtio_transport_connect(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_REQUEST, + .type = VIRTIO_VSOCK_TYPE_STREAM, + }; + + pr_debug("%s: vsk=%p send_request\n", __func__, vsk); + return trans->ops->send_pkt(vsk, &info); +} +EXPORT_SYMBOL_GPL(virtio_transport_connect); + +int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_SHUTDOWN, + .type = VIRTIO_VSOCK_TYPE_STREAM, + .flags = (mode & RCV_SHUTDOWN ? + VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | + (mode & SEND_SHUTDOWN ? + VIRTIO_VSOCK_SHUTDOWN_SEND : 0), + }; + + pr_debug("%s: vsk=%p: send_shutdown\n", __func__, vsk); + return trans->ops->send_pkt(vsk, &info); +} +EXPORT_SYMBOL_GPL(virtio_transport_shutdown); + +void virtio_transport_release(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + struct sock *sk = &vsk->sk; + struct dgram_skb *dgram_skb; + struct dgram_skb *dgram_skb_tmp; + + pr_debug("%s: vsk=%p\n", __func__, vsk); + + /* Tell other side to terminate connection */ + if (sk->sk_type == SOCK_STREAM && sk->sk_state == SS_CONNECTED) { + virtio_transport_shutdown(vsk, SHUTDOWN_MASK); + } + + /* Free incomplete dgrams */ + lock_sock(sk); + list_for_each_entry_safe(dgram_skb, dgram_skb_tmp, + &trans->incomplete_dgrams, list) { + list_del(&dgram_skb->list); + kfree_skb(dgram_skb->skb); + kfree(dgram_skb); + sock_put(sk); /* held in virtio_transport_recv_dgram() */ + } + release_sock(sk); +} +EXPORT_SYMBOL_GPL(virtio_transport_release); + +int +virtio_transport_dgram_enqueue(struct vsock_sock *vsk, + struct sockaddr_vm *remote_addr, + struct msghdr *msg, + size_t dgram_len) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RW, + .type = VIRTIO_VSOCK_TYPE_DGRAM, + .msg = msg, + }; + size_t total_written = 0, pkt_off = 0, written; + u16 dgram_id; + + /* The max size of a single dgram we support is 64KB */ + if (dgram_len > VIRTIO_VSOCK_MAX_DGRAM_SIZE) + return -EMSGSIZE; + + info.dgram_len = dgram_len; + vsk->remote_addr = *remote_addr; + + dgram_id = trans->dgram_id++; + + /* TODO: To optimize, if we have enough credit to send the pkt already, + * do not ask the peer to send credit to use */ + virtio_transport_send_credit_request(vsk, VIRTIO_VSOCK_TYPE_DGRAM); + + while (total_written < dgram_len) { + info.pkt_len = dgram_len - total_written; + info.flags = dgram_id << 16 | pkt_off; + written = trans->ops->send_pkt(vsk, &info); + if (written < 0) + return -ENOMEM; + if (written == 0) { + /* TODO: if written = 0, we need a sleep & wakeup + * instead of sleep */ + pr_debug("%s: SHOULD WAIT written==0", __func__); + msleep(10); + } + total_written += written; + pkt_off += written; + pr_debug("%s:id=%d, dgram_len=%zu, off=%zu, total_written=%zu, written=%zu\n", + __func__, dgram_id, dgram_len, pkt_off, total_written, written); + } + + return dgram_len; +} +EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); + +ssize_t +virtio_transport_stream_enqueue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RW, + .type = VIRTIO_VSOCK_TYPE_STREAM, + .msg = msg, + .pkt_len = len, + }; + + return trans->ops->send_pkt(vsk, &info); +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); + +void virtio_transport_destruct(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + + pr_debug("%s: vsk=%p\n", __func__, vsk); + kfree(trans); +} +EXPORT_SYMBOL_GPL(virtio_transport_destruct); + +static int virtio_transport_send_ack(struct vsock_sock *vsk, u32 cookie) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_ACK, + .type = VIRTIO_VSOCK_TYPE_STREAM, + .flags = cpu_to_le32(cookie), + }; + + pr_debug("%s: sk=%p send_offer\n", __func__, vsk); + return trans->ops->send_pkt(vsk, &info); +} + +static int virtio_transport_send_reset(struct vsock_sock *vsk, + struct virtio_vsock_pkt *pkt) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RST, + .type = VIRTIO_VSOCK_TYPE_STREAM, + }; + + pr_debug("%s\n", __func__); + + /* Send RST only if the original pkt is not a RST pkt */ + if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) + return 0; + + return trans->ops->send_pkt(vsk, &info); +} + +static int +virtio_transport_recv_connecting(struct sock *sk, + struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vsk = vsock_sk(sk); + int err; + int skerr; + u32 cookie; + + pr_debug("%s: vsk=%p\n", __func__, vsk); + switch (le16_to_cpu(pkt->hdr.op)) { + case VIRTIO_VSOCK_OP_RESPONSE: + cookie = le32_to_cpu(pkt->hdr.flags); + pr_debug("%s: got RESPONSE and send ACK, cookie=%x\n", __func__, cookie); + err = virtio_transport_send_ack(vsk, cookie); + if (err < 0) { + skerr = -err; + goto destroy; + } + sk->sk_state = SS_CONNECTED; + sk->sk_socket->state = SS_CONNECTED; + vsock_insert_connected(vsk); + sk->sk_state_change(sk); + break; + case VIRTIO_VSOCK_OP_INVALID: + pr_debug("%s: got invalid\n", __func__); + break; + case VIRTIO_VSOCK_OP_RST: + pr_debug("%s: got rst\n", __func__); + skerr = ECONNRESET; + err = 0; + goto destroy; + default: + pr_debug("%s: got def\n", __func__); + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + return 0; + +destroy: + virtio_transport_send_reset(vsk, pkt); + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = skerr; + sk->sk_error_report(sk); + return err; +} + +static int +virtio_transport_recv_connected(struct sock *sk, + struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vsk = vsock_sk(sk); + struct virtio_transport *trans = vsk->trans; + int err = 0; + + switch (le16_to_cpu(pkt->hdr.op)) { + case VIRTIO_VSOCK_OP_RW: + pkt->len = le32_to_cpu(pkt->hdr.len); + pkt->off = 0; + pkt->trans = trans; + + mutex_lock(&trans->rx_lock); + virtio_transport_inc_rx_pkt(pkt); + list_add_tail(&pkt->list, &trans->rx_queue); + mutex_unlock(&trans->rx_lock); + + sk->sk_data_ready(sk); + return err; + case VIRTIO_VSOCK_OP_CREDIT_UPDATE: + sk->sk_write_space(sk); + break; + case VIRTIO_VSOCK_OP_SHUTDOWN: + pr_debug("%s: got shutdown\n", __func__); + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) + vsk->peer_shutdown |= RCV_SHUTDOWN; + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) + vsk->peer_shutdown |= SEND_SHUTDOWN; + if (le32_to_cpu(pkt->hdr.flags)) + sk->sk_state_change(sk); + break; + case VIRTIO_VSOCK_OP_RST: + pr_debug("%s: got rst\n", __func__); + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + if (vsock_stream_has_data(vsk) <= 0) + sk->sk_state = SS_DISCONNECTING; + sk->sk_state_change(sk); + break; + default: + err = -EINVAL; + break; + } + + virtio_transport_free_pkt(pkt); + return err; +} + +static int +virtio_transport_send_response(struct vsock_sock *vsk, + struct virtio_vsock_pkt *pkt) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RESPONSE, + .type = VIRTIO_VSOCK_TYPE_STREAM, + .remote_cid = le32_to_cpu(pkt->hdr.src_cid), + .remote_port = le32_to_cpu(pkt->hdr.src_port), + }; + u32 cookie; + + cookie = virtio_vsock_secure_cookie(le32_to_cpu(pkt->hdr.src_cid), + le32_to_cpu(pkt->hdr.dst_cid), + le32_to_cpu(pkt->hdr.src_port), + le32_to_cpu(pkt->hdr.dst_port), + jiffies / (HZ * 60)); + info.flags = cpu_to_le32(cookie); + + pr_debug("%s: send_response, cookie=%x\n", __func__, le32_to_cpu(cookie)); + + return trans->ops->send_pkt(vsk, &info); +} + +/* Handle server socket */ +static int +virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vsk = vsock_sk(sk); + struct vsock_sock *vpending; + struct sock *pending; + int err; + u32 cookie; + + switch (le16_to_cpu(pkt->hdr.op)) { + case VIRTIO_VSOCK_OP_REQUEST: + err = virtio_transport_send_response(vsk, pkt); + if (err < 0) { + // FIXME vsk should be vpending + virtio_transport_send_reset(vsk, pkt); + return err; + } + break; + case VIRTIO_VSOCK_OP_ACK: + cookie = le32_to_cpu(pkt->hdr.flags); + err = virtio_vsock_check_cookie(le32_to_cpu(pkt->hdr.src_cid), + le32_to_cpu(pkt->hdr.dst_cid), + le32_to_cpu(pkt->hdr.src_port), + le32_to_cpu(pkt->hdr.dst_port), + jiffies / (HZ * 60), + le32_to_cpu(pkt->hdr.flags), + VSOCK_TIMEOUT_INIT); + pr_debug("%s: cookie=%x, err=%d\n", __func__, cookie, err); + if (err) + return err; + + /* So no pending socket are responsible for this pkt, create one */ + pr_debug("%s: create pending\n", __func__); + pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, + sk->sk_type, 0); + if (!pending) { + virtio_transport_send_reset(vsk, pkt); + return -ENOMEM; + } + sk->sk_ack_backlog++; + pending->sk_state = SS_CONNECTING; + + vpending = vsock_sk(pending); + vsock_addr_init(&vpending->local_addr, le32_to_cpu(pkt->hdr.dst_cid), + le32_to_cpu(pkt->hdr.dst_port)); + vsock_addr_init(&vpending->remote_addr, le32_to_cpu(pkt->hdr.src_cid), + le32_to_cpu(pkt->hdr.src_port)); + vsock_add_pending(sk, pending); + + pr_debug("%s: get pending\n", __func__); + pending = virtio_transport_get_pending(sk, pkt); + vpending = vsock_sk(pending); + lock_sock(pending); + switch (pending->sk_state) { + case SS_CONNECTING: + if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_ACK) { + pr_debug("%s: op=%d != OP_ACK\n", __func__, + le16_to_cpu(pkt->hdr.op)); + virtio_transport_send_reset(vpending, pkt); + pending->sk_err = EPROTO; + pending->sk_state = SS_UNCONNECTED; + sock_put(pending); + } else { + pending->sk_state = SS_CONNECTED; + vsock_insert_connected(vpending); + + vsock_remove_pending(sk, pending); + vsock_enqueue_accept(sk, pending); + + sk->sk_data_ready(sk); + } + err = 0; + break; + default: + pr_debug("%s: sk->sk_ack_backlog=%d\n", __func__, + sk->sk_ack_backlog); + virtio_transport_send_reset(vpending, pkt); + err = -EINVAL; + break; + } + if (err < 0) + vsock_remove_pending(sk, pending); + release_sock(pending); + + /* Release refcnt obtained in virtio_transport_get_pending */ + sock_put(pending); + break; + default: + break; + } + + return 0; +} + +static void virtio_transport_space_update(struct sock *sk, + struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vsk = vsock_sk(sk); + struct virtio_transport *trans = vsk->trans; + bool space_available; + + /* buf_alloc and fwd_cnt is always included in the hdr */ + mutex_lock(&trans->tx_lock); + trans->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); + trans->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); + space_available = virtio_transport_has_space(vsk); + mutex_unlock(&trans->tx_lock); + + if (space_available) + sk->sk_write_space(sk); +} + +/* We are under the virtio-vsock's vsock->rx_lock or + * vhost-vsock's vq->mutex lock */ +void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) +{ + struct virtio_transport *trans; + struct sockaddr_vm src, dst; + struct vsock_sock *vsk; + struct sock *sk; + + vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); + vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); + + virtio_vsock_dumppkt(__func__, pkt); + + if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_DGRAM) { + sk = vsock_find_unbound_socket(&dst); + if (!sk) + goto free_pkt; + + vsk = vsock_sk(sk); + trans = vsk->trans; + BUG_ON(!trans); + + virtio_transport_space_update(sk, pkt); + + lock_sock(sk); + switch (le16_to_cpu(pkt->hdr.op)) { + case VIRTIO_VSOCK_OP_CREDIT_UPDATE: + virtio_transport_free_pkt(pkt); + break; + case VIRTIO_VSOCK_OP_CREDIT_REQUEST: + virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_DGRAM, + &pkt->hdr); + virtio_transport_free_pkt(pkt); + break; + case VIRTIO_VSOCK_OP_RW: + virtio_transport_recv_dgram(sk, pkt); + break; + default: + virtio_transport_free_pkt(pkt); + break; + } + release_sock(sk); + + /* Release refcnt obtained when we fetched this socket out of + * the unbound list. + */ + sock_put(sk); + return; + } else if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) { + /* The socket must be in connected or bound table + * otherwise send reset back + */ + sk = vsock_find_connected_socket(&src, &dst); + if (!sk) { + sk = vsock_find_bound_socket(&dst); + if (!sk) { + pr_debug("%s: can not find bound_socket\n", __func__); + virtio_vsock_dumppkt(__func__, pkt); + /* Ignore this pkt instead of sending reset back */ + /* TODO send a RST unless this packet is a RST (to avoid infinite loops) */ + goto free_pkt; + } + } + + vsk = vsock_sk(sk); + trans = vsk->trans; + BUG_ON(!trans); + + virtio_transport_space_update(sk, pkt); + + lock_sock(sk); + switch (sk->sk_state) { + case VSOCK_SS_LISTEN: + virtio_transport_recv_listen(sk, pkt); + virtio_transport_free_pkt(pkt); + break; + case SS_CONNECTING: + virtio_transport_recv_connecting(sk, pkt); + virtio_transport_free_pkt(pkt); + break; + case SS_CONNECTED: + virtio_transport_recv_connected(sk, pkt); + break; + default: + virtio_transport_free_pkt(pkt); + break; + } + release_sock(sk); + + /* Release refcnt obtained when we fetched this socket out of the + * bound or connected list. + */ + sock_put(sk); + } + return; + +free_pkt: + virtio_transport_free_pkt(pkt); +} +EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); + +void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) +{ + kfree(pkt->buf); + kfree(pkt); +} +EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); + +static int __init virtio_vsock_common_init(void) +{ + get_random_bytes(vsockcookie_secret, sizeof(vsockcookie_secret)); + return 0; +} + +static void __exit virtio_vsock_common_exit(void) +{ +} + +module_init(virtio_vsock_common_init); +module_exit(virtio_vsock_common_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Asias He"); +MODULE_DESCRIPTION("common code for virtio vsock"); -- cgit v1.2.3-71-gd317 From 2f8364a291e8adde25c93f97a76abbcaf4b1ed3f Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 3 Dec 2015 21:12:31 +0100 Subject: WAN: HDLC: Call notifiers before and after changing device type An HDLC device can change type when the protocol driver is changed. Calling the notifier change allows potential users of the interface know about this planned change, and even block it. After the change has occurred, send a second notification to users can evaluate the new device type etc. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/wan/hdlc.c | 19 +++++++++++++++++-- drivers/net/wan/hdlc_cisco.c | 1 + drivers/net/wan/hdlc_fr.c | 1 + drivers/net/wan/hdlc_ppp.c | 1 + drivers/net/wan/hdlc_raw.c | 1 + drivers/net/wan/hdlc_raw_eth.c | 1 + drivers/net/wan/hdlc_x25.c | 1 + include/linux/hdlc.h | 2 +- 8 files changed, 24 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c index 2a6595b4ae15..9bd4aa8083ce 100644 --- a/drivers/net/wan/hdlc.c +++ b/drivers/net/wan/hdlc.c @@ -276,7 +276,11 @@ void unregister_hdlc_device(struct net_device *dev) int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto, size_t size) { - detach_hdlc_protocol(dev); + int err; + + err = detach_hdlc_protocol(dev); + if (err) + return err; if (!try_module_get(proto->module)) return -ENOSYS; @@ -289,15 +293,24 @@ int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto, } } dev_to_hdlc(dev)->proto = proto; + return 0; } -void detach_hdlc_protocol(struct net_device *dev) +int detach_hdlc_protocol(struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); + int err; if (hdlc->proto) { + err = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE, dev); + err = notifier_to_errno(err); + if (err) { + netdev_err(dev, "Refused to change device type\n"); + return err; + } + if (hdlc->proto->detach) hdlc->proto->detach(dev); module_put(hdlc->proto->module); @@ -306,6 +319,8 @@ void detach_hdlc_protocol(struct net_device *dev) kfree(hdlc->state); hdlc->state = NULL; hdlc_setup_dev(dev); + + return 0; } diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c index 3f20808b5ff8..a408abc25512 100644 --- a/drivers/net/wan/hdlc_cisco.c +++ b/drivers/net/wan/hdlc_cisco.c @@ -378,6 +378,7 @@ static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr) spin_lock_init(&state(hdlc)->lock); dev->header_ops = &cisco_header_ops; dev->type = ARPHRD_CISCO; + call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev); netif_dormant_on(dev); return 0; } diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 89541cc90e87..b6e0cfb095d3 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1240,6 +1240,7 @@ static int fr_ioctl(struct net_device *dev, struct ifreq *ifr) } memcpy(&state(hdlc)->settings, &new_settings, size); dev->type = ARPHRD_FRAD; + call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev); return 0; case IF_PROTO_FR_ADD_PVC: diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 0d7645581f91..47fdb87d3567 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -687,6 +687,7 @@ static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr) dev->hard_header_len = sizeof(struct hdlc_header); dev->header_ops = &ppp_header_ops; dev->type = ARPHRD_PPP; + call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev); netif_dormant_on(dev); return 0; } diff --git a/drivers/net/wan/hdlc_raw.c b/drivers/net/wan/hdlc_raw.c index 5dc153e8a29d..4feb45001aac 100644 --- a/drivers/net/wan/hdlc_raw.c +++ b/drivers/net/wan/hdlc_raw.c @@ -84,6 +84,7 @@ static int raw_ioctl(struct net_device *dev, struct ifreq *ifr) return result; memcpy(hdlc->state, &new_settings, size); dev->type = ARPHRD_RAWHDLC; + call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev); netif_dormant_off(dev); return 0; } diff --git a/drivers/net/wan/hdlc_raw_eth.c b/drivers/net/wan/hdlc_raw_eth.c index 3ab72b3082de..2f11836078ab 100644 --- a/drivers/net/wan/hdlc_raw_eth.c +++ b/drivers/net/wan/hdlc_raw_eth.c @@ -102,6 +102,7 @@ static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr) ether_setup(dev); dev->tx_queue_len = old_qlen; eth_hw_addr_random(dev); + call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev); netif_dormant_off(dev); return 0; } diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c index a49aec5efd20..e867638067a6 100644 --- a/drivers/net/wan/hdlc_x25.c +++ b/drivers/net/wan/hdlc_x25.c @@ -213,6 +213,7 @@ static int x25_ioctl(struct net_device *dev, struct ifreq *ifr) if ((result = attach_hdlc_protocol(dev, &proto, 0))) return result; dev->type = ARPHRD_X25; + call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev); netif_dormant_off(dev); return 0; } diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index 1acb1445e05f..e31bcd4c7859 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -101,7 +101,7 @@ netdev_tx_t hdlc_start_xmit(struct sk_buff *skb, struct net_device *dev); int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto, size_t size); /* May be used by hardware driver to gain control over HDLC device */ -void detach_hdlc_protocol(struct net_device *dev); +int detach_hdlc_protocol(struct net_device *dev); static __inline__ __be16 hdlc_type_trans(struct sk_buff *skb, struct net_device *dev) -- cgit v1.2.3-71-gd317 From b618aaa91b5870e7bd139987ac4b7bf0851142d0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Dec 2015 15:01:31 +0100 Subject: net: constify netif_is_* helpers net_device param As suggested by Eric, these helpers should have const dev param. Suggested-by: Eric Dumazet Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 2 +- include/linux/netdevice.h | 22 +++++++++++----------- net/core/dev.c | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 67ce5bd3b56a..05f5879821b8 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -73,7 +73,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) /* found in socket.c */ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); -static inline bool is_vlan_dev(struct net_device *dev) +static inline bool is_vlan_dev(const struct net_device *dev) { return dev->priv_flags & IFF_802_1Q_VLAN; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3efe017fe419..1bb21ff0fa64 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3661,7 +3661,7 @@ extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN]; void netdev_rss_key_fill(void *buffer, size_t len); int dev_get_nest_level(struct net_device *dev, - bool (*type_check)(struct net_device *dev)); + bool (*type_check)(const struct net_device *dev)); int skb_checksum_help(struct sk_buff *skb); struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); @@ -3858,32 +3858,32 @@ static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, skb->mac_len = mac_len; } -static inline bool netif_is_macvlan(struct net_device *dev) +static inline bool netif_is_macvlan(const struct net_device *dev) { return dev->priv_flags & IFF_MACVLAN; } -static inline bool netif_is_macvlan_port(struct net_device *dev) +static inline bool netif_is_macvlan_port(const struct net_device *dev) { return dev->priv_flags & IFF_MACVLAN_PORT; } -static inline bool netif_is_ipvlan(struct net_device *dev) +static inline bool netif_is_ipvlan(const struct net_device *dev) { return dev->priv_flags & IFF_IPVLAN_SLAVE; } -static inline bool netif_is_ipvlan_port(struct net_device *dev) +static inline bool netif_is_ipvlan_port(const struct net_device *dev) { return dev->priv_flags & IFF_IPVLAN_MASTER; } -static inline bool netif_is_bond_master(struct net_device *dev) +static inline bool netif_is_bond_master(const struct net_device *dev) { return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING; } -static inline bool netif_is_bond_slave(struct net_device *dev) +static inline bool netif_is_bond_slave(const struct net_device *dev) { return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING; } @@ -3918,22 +3918,22 @@ static inline bool netif_is_ovs_master(const struct net_device *dev) return dev->priv_flags & IFF_OPENVSWITCH; } -static inline bool netif_is_team_master(struct net_device *dev) +static inline bool netif_is_team_master(const struct net_device *dev) { return dev->priv_flags & IFF_TEAM; } -static inline bool netif_is_team_port(struct net_device *dev) +static inline bool netif_is_team_port(const struct net_device *dev) { return dev->priv_flags & IFF_TEAM_PORT; } -static inline bool netif_is_lag_master(struct net_device *dev) +static inline bool netif_is_lag_master(const struct net_device *dev) { return netif_is_bond_master(dev) || netif_is_team_master(dev); } -static inline bool netif_is_lag_port(struct net_device *dev) +static inline bool netif_is_lag_port(const struct net_device *dev) { return netif_is_bond_slave(dev) || netif_is_team_port(dev); } diff --git a/net/core/dev.c b/net/core/dev.c index d1706e88fbeb..e5c395473eba 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5734,7 +5734,7 @@ EXPORT_SYMBOL(netdev_lower_dev_get_private); int dev_get_nest_level(struct net_device *dev, - bool (*type_check)(struct net_device *dev)) + bool (*type_check)(const struct net_device *dev)) { struct net_device *lower = NULL; struct list_head *iter; -- cgit v1.2.3-71-gd317 From 5f61385d2ebc2bd62bc389c7da0d8d2f263be1eb Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Sun, 6 Dec 2015 18:07:41 +0200 Subject: net/mlx4_core: Keep VLAN/MAC tables mirrored in multifunc HA mode Due to HW limitations, indexes to MAC and VLAN tables are always taken from the table of the actual port. So, if a resource holds an index to a table, it may refer to different values during the lifetime of the resource, unless the tables are mirrored. Also, even when driver is not in HA mode the policy of allocating an index to these tables is such to make sure, as much as possible, that when the time comes the mirroring will be successful. This means that in multifunction mode the allocation of a free index in a port's table tries to make sure that the same index in the other's port table is also free. Signed-off-by: Moni Shoua Reviewed-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 6 + drivers/net/ethernet/mellanox/mlx4/port.c | 598 ++++++++++++++++++++++++++++-- include/linux/mlx4/driver.h | 5 + 3 files changed, 586 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 33c4c6f2c4bb..2404c22ad2b2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -736,6 +736,7 @@ struct mlx4_catas_err { struct mlx4_mac_table { __be64 entries[MLX4_MAX_MAC_NUM]; int refs[MLX4_MAX_MAC_NUM]; + bool is_dup[MLX4_MAX_MAC_NUM]; struct mutex mutex; int total; int max; @@ -758,6 +759,7 @@ struct mlx4_roce_gid_table { struct mlx4_vlan_table { __be32 entries[MLX4_MAX_VLAN_NUM]; int refs[MLX4_MAX_VLAN_NUM]; + int is_dup[MLX4_MAX_VLAN_NUM]; struct mutex mutex; int total; int max; @@ -1225,6 +1227,10 @@ void mlx4_init_roce_gid_table(struct mlx4_dev *dev, struct mlx4_roce_gid_table *table); void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); +int mlx4_bond_vlan_table(struct mlx4_dev *dev); +int mlx4_unbond_vlan_table(struct mlx4_dev *dev); +int mlx4_bond_mac_table(struct mlx4_dev *dev); +int mlx4_unbond_mac_table(struct mlx4_dev *dev); int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz); /* resource tracker functions*/ diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index c2b21313dba7..f2550425c251 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -61,6 +61,7 @@ void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table) for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { table->entries[i] = 0; table->refs[i] = 0; + table->is_dup[i] = false; } table->max = 1 << dev->caps.log_num_macs; table->total = 0; @@ -74,6 +75,7 @@ void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table) for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { table->entries[i] = 0; table->refs[i] = 0; + table->is_dup[i] = false; } table->max = (1 << dev->caps.log_num_vlans) - MLX4_VLAN_REGULAR; table->total = 0; @@ -159,21 +161,94 @@ int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx) } EXPORT_SYMBOL_GPL(mlx4_find_cached_mac); +static bool mlx4_need_mf_bond(struct mlx4_dev *dev) +{ + int i, num_eth_ports = 0; + + if (!mlx4_is_mfunc(dev)) + return false; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) + ++num_eth_ports; + + return (num_eth_ports == 2) ? true : false; +} + int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) { struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; struct mlx4_mac_table *table = &info->mac_table; int i, err = 0; int free = -1; + int free_for_dup = -1; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table; + bool need_mf_bond = mlx4_need_mf_bond(dev); + bool can_mf_bond = true; + + mlx4_dbg(dev, "Registering MAC: 0x%llx for port %d %s duplicate\n", + (unsigned long long)mac, port, + dup ? "with" : "without"); + + if (need_mf_bond) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } + + if (need_mf_bond) { + int index_at_port = -1; + int index_at_dup_port = -1; - mlx4_dbg(dev, "Registering MAC: 0x%llx for port %d\n", - (unsigned long long) mac, port); + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i])))) + index_at_port = i; + if (((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(dup_table->entries[i])))) + index_at_dup_port = i; + } + + /* check that same mac is not in the tables at different indices */ + if ((index_at_port != index_at_dup_port) && + (index_at_port >= 0) && + (index_at_dup_port >= 0)) + can_mf_bond = false; + + /* If the mac is already in the primary table, the slot must be + * available in the duplicate table as well. + */ + if (index_at_port >= 0 && index_at_dup_port < 0 && + dup_table->refs[index_at_port]) { + can_mf_bond = false; + } + /* If the mac is already in the duplicate table, check that the + * corresponding index is not occupied in the primary table, or + * the primary table already contains the mac at the same index. + * Otherwise, you cannot bond (primary contains a different mac + * at that index). + */ + if (index_at_dup_port >= 0) { + if (!table->refs[index_at_dup_port] || + ((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(table->entries[index_at_dup_port])))) + free_for_dup = index_at_dup_port; + else + can_mf_bond = false; + } + } - mutex_lock(&table->mutex); for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { if (!table->refs[i]) { if (free < 0) free = i; + if (free_for_dup < 0 && need_mf_bond && can_mf_bond) { + if (!dup_table->refs[i]) + free_for_dup = i; + } continue; } @@ -182,10 +257,30 @@ int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) /* MAC already registered, increment ref count */ err = i; ++table->refs[i]; + if (dup) { + u64 dup_mac = MLX4_MAC_MASK & be64_to_cpu(dup_table->entries[i]); + + if (dup_mac != mac || !dup_table->is_dup[i]) { + mlx4_warn(dev, "register mac: expect duplicate mac 0x%llx on port %d index %d\n", + mac, dup_port, i); + } + } goto out; } } + if (need_mf_bond && (free_for_dup < 0)) { + if (dup) { + mlx4_warn(dev, "Fail to allocate duplicate MAC table entry\n"); + mlx4_warn(dev, "High Availability for virtual functions may not work as expected\n"); + dup = false; + } + can_mf_bond = false; + } + + if (need_mf_bond && can_mf_bond) + free = free_for_dup; + mlx4_dbg(dev, "Free MAC index is %d\n", free); if (table->total == table->max) { @@ -205,10 +300,35 @@ int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) goto out; } table->refs[free] = 1; - err = free; + table->is_dup[free] = false; ++table->total; + if (dup) { + dup_table->refs[free] = 0; + dup_table->is_dup[free] = true; + dup_table->entries[free] = cpu_to_be64(mac | MLX4_MAC_VALID); + + err = mlx4_set_port_mac_table(dev, dup_port, dup_table->entries); + if (unlikely(err)) { + mlx4_warn(dev, "Failed adding duplicate mac: 0x%llx\n", mac); + dup_table->is_dup[free] = false; + dup_table->entries[free] = 0; + goto out; + } + ++dup_table->total; + } + err = free; out: - mutex_unlock(&table->mutex); + if (need_mf_bond) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } return err; } EXPORT_SYMBOL_GPL(__mlx4_register_mac); @@ -255,6 +375,9 @@ void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) struct mlx4_port_info *info; struct mlx4_mac_table *table; int index; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table; if (port < 1 || port > dev->caps.num_ports) { mlx4_warn(dev, "invalid port number (%d), aborting...\n", port); @@ -262,22 +385,59 @@ void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) } info = &mlx4_priv(dev)->port[port]; table = &info->mac_table; - mutex_lock(&table->mutex); + + if (dup) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } + index = find_index(dev, table, mac); if (validate_index(dev, table, index)) goto out; - if (--table->refs[index]) { + + if (--table->refs[index] || table->is_dup[index]) { mlx4_dbg(dev, "Have more references for index %d, no need to modify mac table\n", index); + if (!table->refs[index]) + dup_table->is_dup[index] = false; goto out; } table->entries[index] = 0; - mlx4_set_port_mac_table(dev, port, table->entries); + if (mlx4_set_port_mac_table(dev, port, table->entries)) + mlx4_warn(dev, "Fail to set mac in port %d during unregister\n", port); --table->total; + + if (dup) { + dup_table->is_dup[index] = false; + if (dup_table->refs[index]) + goto out; + dup_table->entries[index] = 0; + if (mlx4_set_port_mac_table(dev, dup_port, dup_table->entries)) + mlx4_warn(dev, "Fail to set mac in duplicate port %d during unregister\n", dup_port); + + --table->total; + } out: - mutex_unlock(&table->mutex); + if (dup) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } } EXPORT_SYMBOL_GPL(__mlx4_unregister_mac); @@ -311,9 +471,22 @@ int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac) struct mlx4_mac_table *table = &info->mac_table; int index = qpn - info->base_qpn; int err = 0; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table; /* CX1 doesn't support multi-functions */ - mutex_lock(&table->mutex); + if (dup) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } err = validate_index(dev, table, index); if (err) @@ -326,9 +499,30 @@ int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac) mlx4_err(dev, "Failed adding MAC: 0x%llx\n", (unsigned long long) new_mac); table->entries[index] = 0; + } else { + if (dup) { + dup_table->entries[index] = cpu_to_be64(new_mac | MLX4_MAC_VALID); + + err = mlx4_set_port_mac_table(dev, dup_port, dup_table->entries); + if (unlikely(err)) { + mlx4_err(dev, "Failed adding duplicate MAC: 0x%llx\n", + (unsigned long long)new_mac); + dup_table->entries[index] = 0; + } + } } out: - mutex_unlock(&table->mutex); + if (dup) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } return err; } EXPORT_SYMBOL_GPL(__mlx4_replace_mac); @@ -380,8 +574,28 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; int i, err = 0; int free = -1; - - mutex_lock(&table->mutex); + int free_for_dup = -1; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_vlan_table *dup_table = &mlx4_priv(dev)->port[dup_port].vlan_table; + bool need_mf_bond = mlx4_need_mf_bond(dev); + bool can_mf_bond = true; + + mlx4_dbg(dev, "Registering VLAN: %d for port %d %s duplicate\n", + vlan, port, + dup ? "with" : "without"); + + if (need_mf_bond) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } if (table->total == table->max) { /* No free vlan entries */ @@ -389,22 +603,85 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, goto out; } + if (need_mf_bond) { + int index_at_port = -1; + int index_at_dup_port = -1; + + for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) { + if ((vlan == (MLX4_VLAN_MASK & be32_to_cpu(table->entries[i])))) + index_at_port = i; + if ((vlan == (MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[i])))) + index_at_dup_port = i; + } + /* check that same vlan is not in the tables at different indices */ + if ((index_at_port != index_at_dup_port) && + (index_at_port >= 0) && + (index_at_dup_port >= 0)) + can_mf_bond = false; + + /* If the vlan is already in the primary table, the slot must be + * available in the duplicate table as well. + */ + if (index_at_port >= 0 && index_at_dup_port < 0 && + dup_table->refs[index_at_port]) { + can_mf_bond = false; + } + /* If the vlan is already in the duplicate table, check that the + * corresponding index is not occupied in the primary table, or + * the primary table already contains the vlan at the same index. + * Otherwise, you cannot bond (primary contains a different vlan + * at that index). + */ + if (index_at_dup_port >= 0) { + if (!table->refs[index_at_dup_port] || + (vlan == (MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[index_at_dup_port])))) + free_for_dup = index_at_dup_port; + else + can_mf_bond = false; + } + } + for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) { - if (free < 0 && (table->refs[i] == 0)) { - free = i; - continue; + if (!table->refs[i]) { + if (free < 0) + free = i; + if (free_for_dup < 0 && need_mf_bond && can_mf_bond) { + if (!dup_table->refs[i]) + free_for_dup = i; + } } - if (table->refs[i] && + if ((table->refs[i] || table->is_dup[i]) && (vlan == (MLX4_VLAN_MASK & be32_to_cpu(table->entries[i])))) { /* Vlan already registered, increase references count */ + mlx4_dbg(dev, "vlan %u is already registered.\n", vlan); *index = i; ++table->refs[i]; + if (dup) { + u16 dup_vlan = MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[i]); + + if (dup_vlan != vlan || !dup_table->is_dup[i]) { + mlx4_warn(dev, "register vlan: expected duplicate vlan %u on port %d index %d\n", + vlan, dup_port, i); + } + } goto out; } } + if (need_mf_bond && (free_for_dup < 0)) { + if (dup) { + mlx4_warn(dev, "Fail to allocate duplicate VLAN table entry\n"); + mlx4_warn(dev, "High Availability for virtual functions may not work as expected\n"); + dup = false; + } + can_mf_bond = false; + } + + if (need_mf_bond && can_mf_bond) + free = free_for_dup; + if (free < 0) { err = -ENOMEM; goto out; @@ -412,6 +689,7 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, /* Register new VLAN */ table->refs[free] = 1; + table->is_dup[free] = false; table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID); err = mlx4_set_port_vlan_table(dev, port, table->entries); @@ -421,11 +699,35 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, table->entries[free] = 0; goto out; } + ++table->total; + if (dup) { + dup_table->refs[free] = 0; + dup_table->is_dup[free] = true; + dup_table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID); + + err = mlx4_set_port_vlan_table(dev, dup_port, dup_table->entries); + if (unlikely(err)) { + mlx4_warn(dev, "Failed adding duplicate vlan: %u\n", vlan); + dup_table->is_dup[free] = false; + dup_table->entries[free] = 0; + goto out; + } + ++dup_table->total; + } *index = free; - ++table->total; out: - mutex_unlock(&table->mutex); + if (need_mf_bond) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } return err; } @@ -455,8 +757,22 @@ void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) { struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; int index; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_vlan_table *dup_table = &mlx4_priv(dev)->port[dup_port].vlan_table; + + if (dup) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } - mutex_lock(&table->mutex); if (mlx4_find_cached_vlan(dev, port, vlan, &index)) { mlx4_warn(dev, "vlan 0x%x is not in the vlan table\n", vlan); goto out; @@ -467,16 +783,38 @@ void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) goto out; } - if (--table->refs[index]) { + if (--table->refs[index] || table->is_dup[index]) { mlx4_dbg(dev, "Have %d more references for index %d, no need to modify vlan table\n", table->refs[index], index); + if (!table->refs[index]) + dup_table->is_dup[index] = false; goto out; } table->entries[index] = 0; - mlx4_set_port_vlan_table(dev, port, table->entries); + if (mlx4_set_port_vlan_table(dev, port, table->entries)) + mlx4_warn(dev, "Fail to set vlan in port %d during unregister\n", port); --table->total; + if (dup) { + dup_table->is_dup[index] = false; + if (dup_table->refs[index]) + goto out; + dup_table->entries[index] = 0; + if (mlx4_set_port_vlan_table(dev, dup_port, dup_table->entries)) + mlx4_warn(dev, "Fail to set vlan in duplicate port %d during unregister\n", dup_port); + --dup_table->total; + } out: - mutex_unlock(&table->mutex); + if (dup) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } } void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) @@ -495,6 +833,220 @@ void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) } EXPORT_SYMBOL_GPL(mlx4_unregister_vlan); +int mlx4_bond_mac_table(struct mlx4_dev *dev) +{ + struct mlx4_mac_table *t1 = &mlx4_priv(dev)->port[1].mac_table; + struct mlx4_mac_table *t2 = &mlx4_priv(dev)->port[2].mac_table; + int ret = 0; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if ((t1->entries[i] != t2->entries[i]) && + t1->entries[i] && t2->entries[i]) { + mlx4_warn(dev, "can't duplicate entry %d in mac table\n", i); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (t1->entries[i] && !t2->entries[i]) { + t2->entries[i] = t1->entries[i]; + t2->is_dup[i] = true; + update2 = true; + } else if (!t1->entries[i] && t2->entries[i]) { + t1->entries[i] = t2->entries[i]; + t1->is_dup[i] = true; + update1 = true; + } else if (t1->entries[i] && t2->entries[i]) { + t1->is_dup[i] = true; + t2->is_dup[i] = true; + } + } + + if (update1) { + ret = mlx4_set_port_mac_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to set MAC table for port 1 (%d)\n", ret); + } + if (!ret && update2) { + ret = mlx4_set_port_mac_table(dev, 2, t2->entries); + if (ret) + mlx4_warn(dev, "failed to set MAC table for port 2 (%d)\n", ret); + } + + if (ret) + mlx4_warn(dev, "failed to create mirror MAC tables\n"); +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + +int mlx4_unbond_mac_table(struct mlx4_dev *dev) +{ + struct mlx4_mac_table *t1 = &mlx4_priv(dev)->port[1].mac_table; + struct mlx4_mac_table *t2 = &mlx4_priv(dev)->port[2].mac_table; + int ret = 0; + int ret1; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (t1->entries[i] != t2->entries[i]) { + mlx4_warn(dev, "mac table is in an unexpected state when trying to unbond\n"); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (!t1->entries[i]) + continue; + t1->is_dup[i] = false; + if (!t1->refs[i]) { + t1->entries[i] = 0; + update1 = true; + } + t2->is_dup[i] = false; + if (!t2->refs[i]) { + t2->entries[i] = 0; + update2 = true; + } + } + + if (update1) { + ret = mlx4_set_port_mac_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to unmirror MAC tables for port 1(%d)\n", ret); + } + if (update2) { + ret1 = mlx4_set_port_mac_table(dev, 2, t2->entries); + if (ret1) { + mlx4_warn(dev, "failed to unmirror MAC tables for port 2(%d)\n", ret1); + ret = ret1; + } + } +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + +int mlx4_bond_vlan_table(struct mlx4_dev *dev) +{ + struct mlx4_vlan_table *t1 = &mlx4_priv(dev)->port[1].vlan_table; + struct mlx4_vlan_table *t2 = &mlx4_priv(dev)->port[2].vlan_table; + int ret = 0; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if ((t1->entries[i] != t2->entries[i]) && + t1->entries[i] && t2->entries[i]) { + mlx4_warn(dev, "can't duplicate entry %d in vlan table\n", i); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if (t1->entries[i] && !t2->entries[i]) { + t2->entries[i] = t1->entries[i]; + t2->is_dup[i] = true; + update2 = true; + } else if (!t1->entries[i] && t2->entries[i]) { + t1->entries[i] = t2->entries[i]; + t1->is_dup[i] = true; + update1 = true; + } else if (t1->entries[i] && t2->entries[i]) { + t1->is_dup[i] = true; + t2->is_dup[i] = true; + } + } + + if (update1) { + ret = mlx4_set_port_vlan_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to set VLAN table for port 1 (%d)\n", ret); + } + if (!ret && update2) { + ret = mlx4_set_port_vlan_table(dev, 2, t2->entries); + if (ret) + mlx4_warn(dev, "failed to set VLAN table for port 2 (%d)\n", ret); + } + + if (ret) + mlx4_warn(dev, "failed to create mirror VLAN tables\n"); +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + +int mlx4_unbond_vlan_table(struct mlx4_dev *dev) +{ + struct mlx4_vlan_table *t1 = &mlx4_priv(dev)->port[1].vlan_table; + struct mlx4_vlan_table *t2 = &mlx4_priv(dev)->port[2].vlan_table; + int ret = 0; + int ret1; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if (t1->entries[i] != t2->entries[i]) { + mlx4_warn(dev, "vlan table is in an unexpected state when trying to unbond\n"); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if (!t1->entries[i]) + continue; + t1->is_dup[i] = false; + if (!t1->refs[i]) { + t1->entries[i] = 0; + update1 = true; + } + t2->is_dup[i] = false; + if (!t2->refs[i]) { + t2->entries[i] = 0; + update2 = true; + } + } + + if (update1) { + ret = mlx4_set_port_vlan_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to unmirror VLAN tables for port 1(%d)\n", ret); + } + if (update2) { + ret1 = mlx4_set_port_vlan_table(dev, 2, t2->entries); + if (ret1) { + mlx4_warn(dev, "failed to unmirror VLAN tables for port 2(%d)\n", ret1); + ret = ret1; + } + } +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps) { struct mlx4_cmd_mailbox *inmailbox, *outmailbox; diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index 5a06d969338e..2e8af001c5da 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -75,6 +75,11 @@ static inline int mlx4_is_bonded(struct mlx4_dev *dev) return !!(dev->flags & MLX4_FLAG_BONDED); } +static inline int mlx4_is_mf_bonded(struct mlx4_dev *dev) +{ + return (mlx4_is_bonded(dev) && mlx4_is_mfunc(dev)); +} + struct mlx4_port_map { u8 port1; u8 port2; -- cgit v1.2.3-71-gd317 From ea3793ee29d3621faf857fa8ef5425e9ff9a756d Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Sun, 6 Dec 2015 21:11:34 +0000 Subject: core: enable more fine-grained datagram reception control The __skb_recv_datagram routine in core/ datagram.c provides a general skb reception factility supposed to be utilized by protocol modules providing datagram sockets. It encompasses both the actual recvmsg code and a surrounding 'sleep until data is available' loop. This is inconvenient if a protocol module has to use additional locking in order to maintain some per-socket state the generic datagram socket code is unaware of (as the af_unix code does). The patch below moves the recvmsg proper code into a new __skb_try_recv_datagram routine which doesn't sleep and renames wait_for_more_packets to __skb_wait_for_more_packets, both routines being exported interfaces. The original __skb_recv_datagram routine is reimplemented on top of these two functions such that its user-visible behaviour remains unchanged. Signed-off-by: Rainer Weikusat Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++ net/core/datagram.c | 77 +++++++++++++++++++++++++++++++------------------- 2 files changed, 54 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c9c394bf0771..9b9b9ead7bb3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2785,6 +2785,12 @@ static inline void skb_frag_list_init(struct sk_buff *skb) #define skb_walk_frags(skb, iter) \ for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next) + +int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, + const struct sk_buff *skb); +struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags, + int *peeked, int *off, int *err, + struct sk_buff **last); struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, int *peeked, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, diff --git a/net/core/datagram.c b/net/core/datagram.c index d62af69ad844..7daff66d3d0b 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -83,8 +83,8 @@ static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int syn /* * Wait for the last received packet to be different from skb */ -static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, - const struct sk_buff *skb) +int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, + const struct sk_buff *skb) { int error; DEFINE_WAIT_FUNC(wait, receiver_wake_function); @@ -130,6 +130,7 @@ out_noerr: error = 1; goto out; } +EXPORT_SYMBOL(__skb_wait_for_more_packets); static struct sk_buff *skb_set_peeked(struct sk_buff *skb) { @@ -161,13 +162,15 @@ done: } /** - * __skb_recv_datagram - Receive a datagram skbuff + * __skb_try_recv_datagram - Receive a datagram skbuff * @sk: socket * @flags: MSG_ flags * @peeked: returns non-zero if this packet has been seen before * @off: an offset in bytes to peek skb from. Returns an offset * within an skb where data actually starts * @err: error code returned + * @last: set to last peeked message to inform the wait function + * what to look for when peeking * * Get a datagram skbuff, understands the peeking, nonblocking wakeups * and possible races. This replaces identical code in packet, raw and @@ -175,9 +178,11 @@ done: * the long standing peek and read race for datagram sockets. If you * alter this routine remember it must be re-entrant. * - * This function will lock the socket if a skb is returned, so the caller - * needs to unlock the socket in that case (usually by calling - * skb_free_datagram) + * This function will lock the socket if a skb is returned, so + * the caller needs to unlock the socket in that case (usually by + * calling skb_free_datagram). Returns NULL with *err set to + * -EAGAIN if no data was available or to some other value if an + * error was detected. * * * It does not lock socket since today. This function is * * free of race conditions. This measure should/can improve @@ -191,13 +196,13 @@ done: * quite explicitly by POSIX 1003.1g, don't change them without having * the standard around please. */ -struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, - int *peeked, int *off, int *err) +struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, + int *peeked, int *off, int *err, + struct sk_buff **last) { struct sk_buff_head *queue = &sk->sk_receive_queue; - struct sk_buff *skb, *last; + struct sk_buff *skb; unsigned long cpu_flags; - long timeo; /* * Caller is allowed not to check sk->sk_err before skb_recv_datagram() */ @@ -206,8 +211,6 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, if (error) goto no_packet; - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - do { /* Again only user level code calls this function, so nothing * interrupt level will suddenly eat the receive_queue. @@ -217,10 +220,10 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, */ int _off = *off; - last = (struct sk_buff *)queue; + *last = (struct sk_buff *)queue; spin_lock_irqsave(&queue->lock, cpu_flags); skb_queue_walk(queue, skb) { - last = skb; + *last = skb; *peeked = skb->peeked; if (flags & MSG_PEEK) { if (_off >= skb->len && (skb->len || _off || @@ -231,8 +234,11 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, skb = skb_set_peeked(skb); error = PTR_ERR(skb); - if (IS_ERR(skb)) - goto unlock_err; + if (IS_ERR(skb)) { + spin_unlock_irqrestore(&queue->lock, + cpu_flags); + goto no_packet; + } atomic_inc(&skb->users); } else @@ -242,25 +248,38 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, *off = _off; return skb; } + spin_unlock_irqrestore(&queue->lock, cpu_flags); + } while (sk_can_busy_loop(sk) && + sk_busy_loop(sk, flags & MSG_DONTWAIT)); - if (sk_can_busy_loop(sk) && - sk_busy_loop(sk, flags & MSG_DONTWAIT)) - continue; + error = -EAGAIN; - /* User doesn't want to wait */ - error = -EAGAIN; - if (!timeo) - goto no_packet; +no_packet: + *err = error; + return NULL; +} +EXPORT_SYMBOL(__skb_try_recv_datagram); - } while (!wait_for_more_packets(sk, err, &timeo, last)); +struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, + int *peeked, int *off, int *err) +{ + struct sk_buff *skb, *last; + long timeo; - return NULL; + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + + do { + skb = __skb_try_recv_datagram(sk, flags, peeked, off, err, + &last); + if (skb) + return skb; + + if (*err != EAGAIN) + break; + } while (timeo && + !__skb_wait_for_more_packets(sk, err, &timeo, last)); -unlock_err: - spin_unlock_irqrestore(&queue->lock, cpu_flags); -no_packet: - *err = error; return NULL; } EXPORT_SYMBOL(__skb_recv_datagram); -- cgit v1.2.3-71-gd317 From 8ac2837c89c8c0fcad557e4380aeef80580390f9 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 9 Dec 2015 10:51:12 +0800 Subject: Revert "Merge branch 'vsock-virtio'" This reverts commit 0d76d6e8b2507983a2cae4c09880798079007421 and merge commit c402293bd76fbc93e52ef8c0947ab81eea3ae019, reversing changes made to c89359a42e2a49656451569c382eed63e781153c. The virtio-vsock device specification is not finalized yet. Michael Tsirkin voiced concerned about merging this code when the hardware interface (and possibly the userspace interface) could still change. Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- drivers/vhost/Kconfig | 4 - drivers/vhost/Kconfig.vsock | 7 - drivers/vhost/Makefile | 4 - drivers/vhost/vsock.c | 630 --------------- drivers/vhost/vsock.h | 4 - include/linux/virtio_vsock.h | 209 ----- include/net/af_vsock.h | 2 - include/uapi/linux/virtio_ids.h | 1 - include/uapi/linux/virtio_vsock.h | 89 --- net/vmw_vsock/Kconfig | 18 - net/vmw_vsock/Makefile | 2 - net/vmw_vsock/af_vsock.c | 70 -- net/vmw_vsock/virtio_transport.c | 466 ----------- net/vmw_vsock/virtio_transport_common.c | 1272 ------------------------------- 14 files changed, 2778 deletions(-) delete mode 100644 drivers/vhost/Kconfig.vsock delete mode 100644 drivers/vhost/vsock.c delete mode 100644 drivers/vhost/vsock.h delete mode 100644 include/linux/virtio_vsock.h delete mode 100644 include/uapi/linux/virtio_vsock.h delete mode 100644 net/vmw_vsock/virtio_transport.c delete mode 100644 net/vmw_vsock/virtio_transport_common.c (limited to 'include/linux') diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 81449bfc8d3b..533eaf04f12f 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -47,7 +47,3 @@ config VHOST_CROSS_ENDIAN_LEGACY adds some overhead, it is disabled by default. If unsure, say "N". - -if STAGING -source "drivers/vhost/Kconfig.vsock" -endif diff --git a/drivers/vhost/Kconfig.vsock b/drivers/vhost/Kconfig.vsock deleted file mode 100644 index 3491865d3eb9..000000000000 --- a/drivers/vhost/Kconfig.vsock +++ /dev/null @@ -1,7 +0,0 @@ -config VHOST_VSOCK - tristate "vhost virtio-vsock driver" - depends on VSOCKETS && EVENTFD - select VIRTIO_VSOCKETS_COMMON - default n - ---help--- - Say M here to enable the vhost-vsock for virtio-vsock guests diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile index 6b012b986b57..e0441c34db1c 100644 --- a/drivers/vhost/Makefile +++ b/drivers/vhost/Makefile @@ -4,9 +4,5 @@ vhost_net-y := net.o obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o vhost_scsi-y := scsi.o -obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o -vhost_vsock-y := vsock.o - obj-$(CONFIG_VHOST_RING) += vringh.o - obj-$(CONFIG_VHOST) += vhost.o diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c deleted file mode 100644 index 64bcb10bb901..000000000000 --- a/drivers/vhost/vsock.c +++ /dev/null @@ -1,630 +0,0 @@ -/* - * vhost transport for vsock - * - * Copyright (C) 2013-2015 Red Hat, Inc. - * Author: Asias He - * Stefan Hajnoczi - * - * This work is licensed under the terms of the GNU GPL, version 2. - */ -#include -#include -#include -#include -#include -#include - -#include -#include "vhost.h" -#include "vsock.h" - -#define VHOST_VSOCK_DEFAULT_HOST_CID 2 - -static int vhost_transport_socket_init(struct vsock_sock *vsk, - struct vsock_sock *psk); - -enum { - VHOST_VSOCK_FEATURES = VHOST_FEATURES, -}; - -/* Used to track all the vhost_vsock instances on the system. */ -static LIST_HEAD(vhost_vsock_list); -static DEFINE_MUTEX(vhost_vsock_mutex); - -struct vhost_vsock_virtqueue { - struct vhost_virtqueue vq; -}; - -struct vhost_vsock { - /* Vhost device */ - struct vhost_dev dev; - /* Vhost vsock virtqueue*/ - struct vhost_vsock_virtqueue vqs[VSOCK_VQ_MAX]; - /* Link to global vhost_vsock_list*/ - struct list_head list; - /* Head for pkt from host to guest */ - struct list_head send_pkt_list; - /* Work item to send pkt */ - struct vhost_work send_pkt_work; - /* Wait queue for send pkt */ - wait_queue_head_t queue_wait; - /* Used for global tx buf limitation */ - u32 total_tx_buf; - /* Guest contex id this vhost_vsock instance handles */ - u32 guest_cid; -}; - -static u32 vhost_transport_get_local_cid(void) -{ - return VHOST_VSOCK_DEFAULT_HOST_CID; -} - -static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) -{ - struct vhost_vsock *vsock; - - mutex_lock(&vhost_vsock_mutex); - list_for_each_entry(vsock, &vhost_vsock_list, list) { - if (vsock->guest_cid == guest_cid) { - mutex_unlock(&vhost_vsock_mutex); - return vsock; - } - } - mutex_unlock(&vhost_vsock_mutex); - - return NULL; -} - -static void -vhost_transport_do_send_pkt(struct vhost_vsock *vsock, - struct vhost_virtqueue *vq) -{ - bool added = false; - - mutex_lock(&vq->mutex); - vhost_disable_notify(&vsock->dev, vq); - for (;;) { - struct virtio_vsock_pkt *pkt; - struct iov_iter iov_iter; - unsigned out, in; - struct sock *sk; - size_t nbytes; - size_t len; - int head; - - if (list_empty(&vsock->send_pkt_list)) { - vhost_enable_notify(&vsock->dev, vq); - break; - } - - head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), - &out, &in, NULL, NULL); - pr_debug("%s: head = %d\n", __func__, head); - if (head < 0) - break; - - if (head == vq->num) { - if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { - vhost_disable_notify(&vsock->dev, vq); - continue; - } - break; - } - - pkt = list_first_entry(&vsock->send_pkt_list, - struct virtio_vsock_pkt, list); - list_del_init(&pkt->list); - - if (out) { - virtio_transport_free_pkt(pkt); - vq_err(vq, "Expected 0 output buffers, got %u\n", out); - break; - } - - len = iov_length(&vq->iov[out], in); - iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len); - - nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); - if (nbytes != sizeof(pkt->hdr)) { - virtio_transport_free_pkt(pkt); - vq_err(vq, "Faulted on copying pkt hdr\n"); - break; - } - - nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); - if (nbytes != pkt->len) { - virtio_transport_free_pkt(pkt); - vq_err(vq, "Faulted on copying pkt buf\n"); - break; - } - - vhost_add_used(vq, head, pkt->len); /* TODO should this be sizeof(pkt->hdr) + pkt->len? */ - added = true; - - virtio_transport_dec_tx_pkt(pkt); - vsock->total_tx_buf -= pkt->len; - - sk = sk_vsock(pkt->trans->vsk); - /* Release refcnt taken in vhost_transport_send_pkt */ - sock_put(sk); - - virtio_transport_free_pkt(pkt); - } - if (added) - vhost_signal(&vsock->dev, vq); - mutex_unlock(&vq->mutex); - - if (added) - wake_up(&vsock->queue_wait); -} - -static void vhost_transport_send_pkt_work(struct vhost_work *work) -{ - struct vhost_virtqueue *vq; - struct vhost_vsock *vsock; - - vsock = container_of(work, struct vhost_vsock, send_pkt_work); - vq = &vsock->vqs[VSOCK_VQ_RX].vq; - - vhost_transport_do_send_pkt(vsock, vq); -} - -static int -vhost_transport_send_pkt(struct vsock_sock *vsk, - struct virtio_vsock_pkt_info *info) -{ - u32 src_cid, src_port, dst_cid, dst_port; - struct virtio_transport *trans; - struct virtio_vsock_pkt *pkt; - struct vhost_virtqueue *vq; - struct vhost_vsock *vsock; - u32 pkt_len = info->pkt_len; - DEFINE_WAIT(wait); - - src_cid = vhost_transport_get_local_cid(); - src_port = vsk->local_addr.svm_port; - if (!info->remote_cid) { - dst_cid = vsk->remote_addr.svm_cid; - dst_port = vsk->remote_addr.svm_port; - } else { - dst_cid = info->remote_cid; - dst_port = info->remote_port; - } - - /* Find the vhost_vsock according to guest context id */ - vsock = vhost_vsock_get(dst_cid); - if (!vsock) - return -ENODEV; - - trans = vsk->trans; - vq = &vsock->vqs[VSOCK_VQ_RX].vq; - - /* we can send less than pkt_len bytes */ - if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) - pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; - - /* virtio_transport_get_credit might return less than pkt_len credit */ - pkt_len = virtio_transport_get_credit(trans, pkt_len); - - /* Do not send zero length OP_RW pkt*/ - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) - return pkt_len; - - /* Respect global tx buf limitation */ - mutex_lock(&vq->mutex); - while (pkt_len + vsock->total_tx_buf > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) { - prepare_to_wait_exclusive(&vsock->queue_wait, &wait, - TASK_UNINTERRUPTIBLE); - mutex_unlock(&vq->mutex); - schedule(); - mutex_lock(&vq->mutex); - finish_wait(&vsock->queue_wait, &wait); - } - vsock->total_tx_buf += pkt_len; - mutex_unlock(&vq->mutex); - - pkt = virtio_transport_alloc_pkt(vsk, info, pkt_len, - src_cid, src_port, - dst_cid, dst_port); - if (!pkt) { - mutex_lock(&vq->mutex); - vsock->total_tx_buf -= pkt_len; - mutex_unlock(&vq->mutex); - virtio_transport_put_credit(trans, pkt_len); - return -ENOMEM; - } - - pr_debug("%s:info->pkt_len= %d\n", __func__, pkt_len); - /* Released in vhost_transport_do_send_pkt */ - sock_hold(&trans->vsk->sk); - virtio_transport_inc_tx_pkt(pkt); - - /* Queue it up in vhost work */ - mutex_lock(&vq->mutex); - list_add_tail(&pkt->list, &vsock->send_pkt_list); - vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); - mutex_unlock(&vq->mutex); - - return pkt_len; -} - -static struct virtio_transport_pkt_ops vhost_ops = { - .send_pkt = vhost_transport_send_pkt, -}; - -static struct virtio_vsock_pkt * -vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, - unsigned int out, unsigned int in) -{ - struct virtio_vsock_pkt *pkt; - struct iov_iter iov_iter; - size_t nbytes; - size_t len; - - if (in != 0) { - vq_err(vq, "Expected 0 input buffers, got %u\n", in); - return NULL; - } - - pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) - return NULL; - - len = iov_length(vq->iov, out); - iov_iter_init(&iov_iter, WRITE, vq->iov, out, len); - - nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); - if (nbytes != sizeof(pkt->hdr)) { - vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n", - sizeof(pkt->hdr), nbytes); - kfree(pkt); - return NULL; - } - - if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_DGRAM) - pkt->len = le32_to_cpu(pkt->hdr.len) & 0XFFFF; - else if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) - pkt->len = le32_to_cpu(pkt->hdr.len); - - /* No payload */ - if (!pkt->len) - return pkt; - - /* The pkt is too big */ - if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { - kfree(pkt); - return NULL; - } - - pkt->buf = kmalloc(pkt->len, GFP_KERNEL); - if (!pkt->buf) { - kfree(pkt); - return NULL; - } - - nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); - if (nbytes != pkt->len) { - vq_err(vq, "Expected %u byte payload, got %zu bytes\n", - pkt->len, nbytes); - virtio_transport_free_pkt(pkt); - return NULL; - } - - return pkt; -} - -static void vhost_vsock_handle_ctl_kick(struct vhost_work *work) -{ - struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, - poll.work); - struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, - dev); - - pr_debug("%s vq=%p, vsock=%p\n", __func__, vq, vsock); -} - -static void vhost_vsock_handle_tx_kick(struct vhost_work *work) -{ - struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, - poll.work); - struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, - dev); - struct virtio_vsock_pkt *pkt; - int head; - unsigned int out, in; - bool added = false; - u32 len; - - mutex_lock(&vq->mutex); - vhost_disable_notify(&vsock->dev, vq); - for (;;) { - head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), - &out, &in, NULL, NULL); - if (head < 0) - break; - - if (head == vq->num) { - if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { - vhost_disable_notify(&vsock->dev, vq); - continue; - } - break; - } - - pkt = vhost_vsock_alloc_pkt(vq, out, in); - if (!pkt) { - vq_err(vq, "Faulted on pkt\n"); - continue; - } - - len = pkt->len; - - /* Only accept correctly addressed packets */ - if (le32_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid && - le32_to_cpu(pkt->hdr.dst_cid) == vhost_transport_get_local_cid()) - virtio_transport_recv_pkt(pkt); - else - virtio_transport_free_pkt(pkt); - - vhost_add_used(vq, head, len); - added = true; - } - if (added) - vhost_signal(&vsock->dev, vq); - mutex_unlock(&vq->mutex); -} - -static void vhost_vsock_handle_rx_kick(struct vhost_work *work) -{ - struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, - poll.work); - struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, - dev); - - vhost_transport_do_send_pkt(vsock, vq); -} - -static int vhost_vsock_dev_open(struct inode *inode, struct file *file) -{ - struct vhost_virtqueue **vqs; - struct vhost_vsock *vsock; - int ret; - - vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); - if (!vsock) - return -ENOMEM; - - pr_debug("%s:vsock=%p\n", __func__, vsock); - - vqs = kmalloc(VSOCK_VQ_MAX * sizeof(*vqs), GFP_KERNEL); - if (!vqs) { - ret = -ENOMEM; - goto out; - } - - vqs[VSOCK_VQ_CTRL] = &vsock->vqs[VSOCK_VQ_CTRL].vq; - vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX].vq; - vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX].vq; - vsock->vqs[VSOCK_VQ_CTRL].vq.handle_kick = vhost_vsock_handle_ctl_kick; - vsock->vqs[VSOCK_VQ_TX].vq.handle_kick = vhost_vsock_handle_tx_kick; - vsock->vqs[VSOCK_VQ_RX].vq.handle_kick = vhost_vsock_handle_rx_kick; - - vhost_dev_init(&vsock->dev, vqs, VSOCK_VQ_MAX); - - file->private_data = vsock; - init_waitqueue_head(&vsock->queue_wait); - INIT_LIST_HEAD(&vsock->send_pkt_list); - vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work); - - mutex_lock(&vhost_vsock_mutex); - list_add_tail(&vsock->list, &vhost_vsock_list); - mutex_unlock(&vhost_vsock_mutex); - return 0; - -out: - kfree(vsock); - return ret; -} - -static void vhost_vsock_flush(struct vhost_vsock *vsock) -{ - int i; - - for (i = 0; i < VSOCK_VQ_MAX; i++) - vhost_poll_flush(&vsock->vqs[i].vq.poll); - vhost_work_flush(&vsock->dev, &vsock->send_pkt_work); -} - -static int vhost_vsock_dev_release(struct inode *inode, struct file *file) -{ - struct vhost_vsock *vsock = file->private_data; - - mutex_lock(&vhost_vsock_mutex); - list_del(&vsock->list); - mutex_unlock(&vhost_vsock_mutex); - - vhost_dev_stop(&vsock->dev); - vhost_vsock_flush(vsock); - vhost_dev_cleanup(&vsock->dev, false); - kfree(vsock->dev.vqs); - kfree(vsock); - return 0; -} - -static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u32 guest_cid) -{ - struct vhost_vsock *other; - - /* Refuse reserved CIDs */ - if (guest_cid <= VMADDR_CID_HOST) { - return -EINVAL; - } - - /* Refuse if CID is already in use */ - other = vhost_vsock_get(guest_cid); - if (other && other != vsock) { - return -EADDRINUSE; - } - - mutex_lock(&vhost_vsock_mutex); - vsock->guest_cid = guest_cid; - pr_debug("%s:guest_cid=%d\n", __func__, guest_cid); - mutex_unlock(&vhost_vsock_mutex); - - return 0; -} - -static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) -{ - struct vhost_virtqueue *vq; - int i; - - if (features & ~VHOST_VSOCK_FEATURES) - return -EOPNOTSUPP; - - mutex_lock(&vsock->dev.mutex); - if ((features & (1 << VHOST_F_LOG_ALL)) && - !vhost_log_access_ok(&vsock->dev)) { - mutex_unlock(&vsock->dev.mutex); - return -EFAULT; - } - - for (i = 0; i < VSOCK_VQ_MAX; i++) { - vq = &vsock->vqs[i].vq; - mutex_lock(&vq->mutex); - vq->acked_features = features; - mutex_unlock(&vq->mutex); - } - mutex_unlock(&vsock->dev.mutex); - return 0; -} - -static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, - unsigned long arg) -{ - struct vhost_vsock *vsock = f->private_data; - void __user *argp = (void __user *)arg; - u64 __user *featurep = argp; - u32 __user *cidp = argp; - u32 guest_cid; - u64 features; - int r; - - switch (ioctl) { - case VHOST_VSOCK_SET_GUEST_CID: - if (get_user(guest_cid, cidp)) - return -EFAULT; - return vhost_vsock_set_cid(vsock, guest_cid); - case VHOST_GET_FEATURES: - features = VHOST_VSOCK_FEATURES; - if (copy_to_user(featurep, &features, sizeof(features))) - return -EFAULT; - return 0; - case VHOST_SET_FEATURES: - if (copy_from_user(&features, featurep, sizeof(features))) - return -EFAULT; - return vhost_vsock_set_features(vsock, features); - default: - mutex_lock(&vsock->dev.mutex); - r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); - if (r == -ENOIOCTLCMD) - r = vhost_vring_ioctl(&vsock->dev, ioctl, argp); - else - vhost_vsock_flush(vsock); - mutex_unlock(&vsock->dev.mutex); - return r; - } -} - -static const struct file_operations vhost_vsock_fops = { - .owner = THIS_MODULE, - .open = vhost_vsock_dev_open, - .release = vhost_vsock_dev_release, - .llseek = noop_llseek, - .unlocked_ioctl = vhost_vsock_dev_ioctl, -}; - -static struct miscdevice vhost_vsock_misc = { - .minor = MISC_DYNAMIC_MINOR, - .name = "vhost-vsock", - .fops = &vhost_vsock_fops, -}; - -static int -vhost_transport_socket_init(struct vsock_sock *vsk, struct vsock_sock *psk) -{ - struct virtio_transport *trans; - int ret; - - ret = virtio_transport_do_socket_init(vsk, psk); - if (ret) - return ret; - - trans = vsk->trans; - trans->ops = &vhost_ops; - - return ret; -} - -static struct vsock_transport vhost_transport = { - .get_local_cid = vhost_transport_get_local_cid, - - .init = vhost_transport_socket_init, - .destruct = virtio_transport_destruct, - .release = virtio_transport_release, - .connect = virtio_transport_connect, - .shutdown = virtio_transport_shutdown, - - .dgram_enqueue = virtio_transport_dgram_enqueue, - .dgram_dequeue = virtio_transport_dgram_dequeue, - .dgram_bind = virtio_transport_dgram_bind, - .dgram_allow = virtio_transport_dgram_allow, - - .stream_enqueue = virtio_transport_stream_enqueue, - .stream_dequeue = virtio_transport_stream_dequeue, - .stream_has_data = virtio_transport_stream_has_data, - .stream_has_space = virtio_transport_stream_has_space, - .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, - .stream_is_active = virtio_transport_stream_is_active, - .stream_allow = virtio_transport_stream_allow, - - .notify_poll_in = virtio_transport_notify_poll_in, - .notify_poll_out = virtio_transport_notify_poll_out, - .notify_recv_init = virtio_transport_notify_recv_init, - .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, - .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, - .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, - .notify_send_init = virtio_transport_notify_send_init, - .notify_send_pre_block = virtio_transport_notify_send_pre_block, - .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, - .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, - - .set_buffer_size = virtio_transport_set_buffer_size, - .set_min_buffer_size = virtio_transport_set_min_buffer_size, - .set_max_buffer_size = virtio_transport_set_max_buffer_size, - .get_buffer_size = virtio_transport_get_buffer_size, - .get_min_buffer_size = virtio_transport_get_min_buffer_size, - .get_max_buffer_size = virtio_transport_get_max_buffer_size, -}; - -static int __init vhost_vsock_init(void) -{ - int ret; - - ret = vsock_core_init(&vhost_transport); - if (ret < 0) - return ret; - return misc_register(&vhost_vsock_misc); -}; - -static void __exit vhost_vsock_exit(void) -{ - misc_deregister(&vhost_vsock_misc); - vsock_core_exit(); -}; - -module_init(vhost_vsock_init); -module_exit(vhost_vsock_exit); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Asias He"); -MODULE_DESCRIPTION("vhost transport for vsock "); diff --git a/drivers/vhost/vsock.h b/drivers/vhost/vsock.h deleted file mode 100644 index 0ddb107b86ca..000000000000 --- a/drivers/vhost/vsock.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef VHOST_VSOCK_H -#define VHOST_VSOCK_H -#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u32) -#endif diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h deleted file mode 100644 index a5f3ecc038f7..000000000000 --- a/include/linux/virtio_vsock.h +++ /dev/null @@ -1,209 +0,0 @@ -/* - * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers: - * - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of IBM nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Copyright (C) Red Hat, Inc., 2013-2015 - * Copyright (C) Asias He , 2013 - * Copyright (C) Stefan Hajnoczi , 2015 - */ - -#ifndef _LINUX_VIRTIO_VSOCK_H -#define _LINUX_VIRTIO_VSOCK_H - -#include -#include -#include - -#define VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE 128 -#define VIRTIO_VSOCK_DEFAULT_BUF_SIZE (1024 * 256) -#define VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE (1024 * 256) -#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) -#define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL -#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) -#define VIRTIO_VSOCK_MAX_TX_BUF_SIZE (1024 * 1024 * 16) -#define VIRTIO_VSOCK_MAX_DGRAM_SIZE (1024 * 64) - -struct vsock_transport_recv_notify_data; -struct vsock_transport_send_notify_data; -struct sockaddr_vm; -struct vsock_sock; - -enum { - VSOCK_VQ_CTRL = 0, - VSOCK_VQ_RX = 1, /* for host to guest data */ - VSOCK_VQ_TX = 2, /* for guest to host data */ - VSOCK_VQ_MAX = 3, -}; - -/* virtio transport socket state */ -struct virtio_transport { - struct virtio_transport_pkt_ops *ops; - struct vsock_sock *vsk; - - u32 buf_size; - u32 buf_size_min; - u32 buf_size_max; - - struct mutex tx_lock; - struct mutex rx_lock; - - struct list_head rx_queue; - u32 rx_bytes; - - /* Protected by trans->tx_lock */ - u32 tx_cnt; - u32 buf_alloc; - u32 peer_fwd_cnt; - u32 peer_buf_alloc; - /* Protected by trans->rx_lock */ - u32 fwd_cnt; - - /* Protected by sk_lock */ - u16 dgram_id; - struct list_head incomplete_dgrams; /* dgram fragments */ -}; - -struct virtio_vsock_pkt { - struct virtio_vsock_hdr hdr; - struct virtio_transport *trans; - struct work_struct work; - struct list_head list; - void *buf; - u32 len; - u32 off; -}; - -struct virtio_vsock_pkt_info { - u32 remote_cid, remote_port; - struct msghdr *msg; - u32 pkt_len; - u16 type; - u16 op; - u32 flags; - u16 dgram_id; - u16 dgram_len; -}; - -struct virtio_transport_pkt_ops { - int (*send_pkt)(struct vsock_sock *vsk, - struct virtio_vsock_pkt_info *info); -}; - -void virtio_vsock_dumppkt(const char *func, - const struct virtio_vsock_pkt *pkt); - -struct sock * -virtio_transport_get_pending(struct sock *listener, - struct virtio_vsock_pkt *pkt); -struct virtio_vsock_pkt * -virtio_transport_alloc_pkt(struct vsock_sock *vsk, - struct virtio_vsock_pkt_info *info, - size_t len, - u32 src_cid, - u32 src_port, - u32 dst_cid, - u32 dst_port); -ssize_t -virtio_transport_stream_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len, - int type); -int -virtio_transport_dgram_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len, int flags); - -s64 virtio_transport_stream_has_data(struct vsock_sock *vsk); -s64 virtio_transport_stream_has_space(struct vsock_sock *vsk); - -int virtio_transport_do_socket_init(struct vsock_sock *vsk, - struct vsock_sock *psk); -u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk); -u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk); -u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk); -void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val); -void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val); -void virtio_transport_set_max_buffer_size(struct vsock_sock *vs, u64 val); -int -virtio_transport_notify_poll_in(struct vsock_sock *vsk, - size_t target, - bool *data_ready_now); -int -virtio_transport_notify_poll_out(struct vsock_sock *vsk, - size_t target, - bool *space_available_now); - -int virtio_transport_notify_recv_init(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data); -int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data); -int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data); -int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, - size_t target, ssize_t copied, bool data_read, - struct vsock_transport_recv_notify_data *data); -int virtio_transport_notify_send_init(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data); -int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data); -int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data); -int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, - ssize_t written, struct vsock_transport_send_notify_data *data); - -u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk); -bool virtio_transport_stream_is_active(struct vsock_sock *vsk); -bool virtio_transport_stream_allow(u32 cid, u32 port); -int virtio_transport_dgram_bind(struct vsock_sock *vsk, - struct sockaddr_vm *addr); -bool virtio_transport_dgram_allow(u32 cid, u32 port); - -int virtio_transport_connect(struct vsock_sock *vsk); - -int virtio_transport_shutdown(struct vsock_sock *vsk, int mode); - -void virtio_transport_release(struct vsock_sock *vsk); - -ssize_t -virtio_transport_stream_enqueue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len); -int -virtio_transport_dgram_enqueue(struct vsock_sock *vsk, - struct sockaddr_vm *remote_addr, - struct msghdr *msg, - size_t len); - -void virtio_transport_destruct(struct vsock_sock *vsk); - -void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt); -void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt); -void virtio_transport_inc_tx_pkt(struct virtio_vsock_pkt *pkt); -void virtio_transport_dec_tx_pkt(struct virtio_vsock_pkt *pkt); -u32 virtio_transport_get_credit(struct virtio_transport *trans, u32 wanted); -void virtio_transport_put_credit(struct virtio_transport *trans, u32 credit); -#endif /* _LINUX_VIRTIO_VSOCK_H */ diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index a0c8fa2ababf..e9eb2d6791b3 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -175,10 +175,8 @@ void vsock_insert_connected(struct vsock_sock *vsk); void vsock_remove_bound(struct vsock_sock *vsk); void vsock_remove_connected(struct vsock_sock *vsk); struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); -struct sock *vsock_find_unbound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); -int vsock_bind_dgram_generic(struct vsock_sock *vsk, struct sockaddr_vm *addr); #endif /* __AF_VSOCK_H__ */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 16dcf5d06cd7..77925f587b15 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -39,7 +39,6 @@ #define VIRTIO_ID_9P 9 /* 9p virtio console */ #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ #define VIRTIO_ID_CAIF 12 /* Virtio caif */ -#define VIRTIO_ID_VSOCK 13 /* virtio vsock transport */ #define VIRTIO_ID_GPU 16 /* virtio GPU */ #define VIRTIO_ID_INPUT 18 /* virtio input */ diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h deleted file mode 100644 index 8cf9b5682628..000000000000 --- a/include/uapi/linux/virtio_vsock.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers: - * - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of IBM nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Copyright (C) Red Hat, Inc., 2013-2015 - * Copyright (C) Asias He , 2013 - * Copyright (C) Stefan Hajnoczi , 2015 - */ - -#ifndef _UAPI_LINUX_VIRTIO_VSOCK_H -#define _UAPI_LINUX_VIRTIO_VOSCK_H - -#include -#include -#include - -struct virtio_vsock_config { - __le32 guest_cid; - __le32 max_virtqueue_pairs; -}; - -struct virtio_vsock_hdr { - __le32 src_cid; - __le32 src_port; - __le32 dst_cid; - __le32 dst_port; - __le32 len; - __le16 type; /* enum virtio_vsock_type */ - __le16 op; /* enum virtio_vsock_op */ - __le32 flags; - __le32 buf_alloc; - __le32 fwd_cnt; -}; - -enum virtio_vsock_type { - VIRTIO_VSOCK_TYPE_STREAM = 1, - VIRTIO_VSOCK_TYPE_DGRAM = 2, -}; - -enum virtio_vsock_op { - VIRTIO_VSOCK_OP_INVALID = 0, - - /* Connect operations */ - VIRTIO_VSOCK_OP_REQUEST = 1, - VIRTIO_VSOCK_OP_RESPONSE = 2, - VIRTIO_VSOCK_OP_ACK = 3, - VIRTIO_VSOCK_OP_RST = 4, - VIRTIO_VSOCK_OP_SHUTDOWN = 5, - - /* To send payload */ - VIRTIO_VSOCK_OP_RW = 6, - - /* Tell the peer our credit info */ - VIRTIO_VSOCK_OP_CREDIT_UPDATE = 7, - /* Request the peer to send the credit info to us */ - VIRTIO_VSOCK_OP_CREDIT_REQUEST = 8, -}; - -/* VIRTIO_VSOCK_OP_SHUTDOWN flags values */ -enum virtio_vsock_shutdown { - VIRTIO_VSOCK_SHUTDOWN_RCV = 1, - VIRTIO_VSOCK_SHUTDOWN_SEND = 2, -}; - -#endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */ diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig index 74e0bc887a33..14810abedc2e 100644 --- a/net/vmw_vsock/Kconfig +++ b/net/vmw_vsock/Kconfig @@ -26,21 +26,3 @@ config VMWARE_VMCI_VSOCKETS To compile this driver as a module, choose M here: the module will be called vmw_vsock_vmci_transport. If unsure, say N. - -config VIRTIO_VSOCKETS - tristate "virtio transport for Virtual Sockets" - depends on VSOCKETS && VIRTIO - select VIRTIO_VSOCKETS_COMMON - help - This module implements a virtio transport for Virtual Sockets. - - Enable this transport if your Virtual Machine runs on Qemu/KVM. - - To compile this driver as a module, choose M here: the module - will be called virtio_vsock_transport. If unsure, say N. - -config VIRTIO_VSOCKETS_COMMON - tristate - ---help--- - This option is selected by any driver which needs to access - the virtio_vsock. diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile index cf4c29439081..2ce52d70f224 100644 --- a/net/vmw_vsock/Makefile +++ b/net/vmw_vsock/Makefile @@ -1,7 +1,5 @@ obj-$(CONFIG_VSOCKETS) += vsock.o obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o -obj-$(CONFIG_VIRTIO_VSOCKETS) += virtio_transport.o -obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += virtio_transport_common.o vsock-y += af_vsock.o vsock_addr.o diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 77247a2b670b..7fd1220fbfa0 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -223,17 +223,6 @@ static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) return NULL; } -static struct sock *__vsock_find_unbound_socket(struct sockaddr_vm *addr) -{ - struct vsock_sock *vsk; - - list_for_each_entry(vsk, vsock_unbound_sockets, bound_table) - if (addr->svm_port == vsk->local_addr.svm_port) - return sk_vsock(vsk); - - return NULL; -} - static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst) { @@ -309,21 +298,6 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr) } EXPORT_SYMBOL_GPL(vsock_find_bound_socket); -struct sock *vsock_find_unbound_socket(struct sockaddr_vm *addr) -{ - struct sock *sk; - - spin_lock_bh(&vsock_table_lock); - sk = __vsock_find_unbound_socket(addr); - if (sk) - sock_hold(sk); - - spin_unlock_bh(&vsock_table_lock); - - return sk; -} -EXPORT_SYMBOL_GPL(vsock_find_unbound_socket); - struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst) { @@ -558,50 +532,6 @@ static int __vsock_bind_stream(struct vsock_sock *vsk, return 0; } -int vsock_bind_dgram_generic(struct vsock_sock *vsk, struct sockaddr_vm *addr) -{ - static u32 port = LAST_RESERVED_PORT + 1; - struct sockaddr_vm new_addr; - - vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); - - if (addr->svm_port == VMADDR_PORT_ANY) { - bool found = false; - unsigned int i; - - for (i = 0; i < MAX_PORT_RETRIES; i++) { - if (port <= LAST_RESERVED_PORT) - port = LAST_RESERVED_PORT + 1; - - new_addr.svm_port = port++; - - if (!__vsock_find_unbound_socket(&new_addr)) { - found = true; - break; - } - } - - if (!found) - return -EADDRNOTAVAIL; - } else { - /* If port is in reserved range, ensure caller - * has necessary privileges. - */ - if (addr->svm_port <= LAST_RESERVED_PORT && - !capable(CAP_NET_BIND_SERVICE)) { - return -EACCES; - } - - if (__vsock_find_unbound_socket(&new_addr)) - return -EADDRINUSE; - } - - vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port); - - return 0; -} -EXPORT_SYMBOL_GPL(vsock_bind_dgram_generic); - static int __vsock_bind_dgram(struct vsock_sock *vsk, struct sockaddr_vm *addr) { diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c deleted file mode 100644 index df65dca55fa1..000000000000 --- a/net/vmw_vsock/virtio_transport.c +++ /dev/null @@ -1,466 +0,0 @@ -/* - * virtio transport for vsock - * - * Copyright (C) 2013-2015 Red Hat, Inc. - * Author: Asias He - * Stefan Hajnoczi - * - * Some of the code is take from Gerd Hoffmann 's - * early virtio-vsock proof-of-concept bits. - * - * This work is licensed under the terms of the GNU GPL, version 2. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static struct workqueue_struct *virtio_vsock_workqueue; -static struct virtio_vsock *the_virtio_vsock; -static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ -static void virtio_vsock_rx_fill(struct virtio_vsock *vsock); - -struct virtio_vsock { - /* Virtio device */ - struct virtio_device *vdev; - /* Virtio virtqueue */ - struct virtqueue *vqs[VSOCK_VQ_MAX]; - /* Wait queue for send pkt */ - wait_queue_head_t queue_wait; - /* Work item to send pkt */ - struct work_struct tx_work; - /* Work item to recv pkt */ - struct work_struct rx_work; - /* Mutex to protect send pkt*/ - struct mutex tx_lock; - /* Mutex to protect recv pkt*/ - struct mutex rx_lock; - /* Number of recv buffers */ - int rx_buf_nr; - /* Number of max recv buffers */ - int rx_buf_max_nr; - /* Used for global tx buf limitation */ - u32 total_tx_buf; - /* Guest context id, just like guest ip address */ - u32 guest_cid; -}; - -static struct virtio_vsock *virtio_vsock_get(void) -{ - return the_virtio_vsock; -} - -static u32 virtio_transport_get_local_cid(void) -{ - struct virtio_vsock *vsock = virtio_vsock_get(); - - return vsock->guest_cid; -} - -static int -virtio_transport_send_pkt(struct vsock_sock *vsk, - struct virtio_vsock_pkt_info *info) -{ - u32 src_cid, src_port, dst_cid, dst_port; - int ret, in_sg = 0, out_sg = 0; - struct virtio_transport *trans; - struct virtio_vsock_pkt *pkt; - struct virtio_vsock *vsock; - struct scatterlist hdr, buf, *sgs[2]; - struct virtqueue *vq; - u32 pkt_len = info->pkt_len; - DEFINE_WAIT(wait); - - vsock = virtio_vsock_get(); - if (!vsock) - return -ENODEV; - - src_cid = virtio_transport_get_local_cid(); - src_port = vsk->local_addr.svm_port; - if (!info->remote_cid) { - dst_cid = vsk->remote_addr.svm_cid; - dst_port = vsk->remote_addr.svm_port; - } else { - dst_cid = info->remote_cid; - dst_port = info->remote_port; - } - - trans = vsk->trans; - vq = vsock->vqs[VSOCK_VQ_TX]; - - if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) - pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; - pkt_len = virtio_transport_get_credit(trans, pkt_len); - /* Do not send zero length OP_RW pkt*/ - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) - return pkt_len; - - /* Respect global tx buf limitation */ - mutex_lock(&vsock->tx_lock); - while (pkt_len + vsock->total_tx_buf > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) { - prepare_to_wait_exclusive(&vsock->queue_wait, &wait, - TASK_UNINTERRUPTIBLE); - mutex_unlock(&vsock->tx_lock); - schedule(); - mutex_lock(&vsock->tx_lock); - finish_wait(&vsock->queue_wait, &wait); - } - vsock->total_tx_buf += pkt_len; - mutex_unlock(&vsock->tx_lock); - - pkt = virtio_transport_alloc_pkt(vsk, info, pkt_len, - src_cid, src_port, - dst_cid, dst_port); - if (!pkt) { - mutex_lock(&vsock->tx_lock); - vsock->total_tx_buf -= pkt_len; - mutex_unlock(&vsock->tx_lock); - virtio_transport_put_credit(trans, pkt_len); - return -ENOMEM; - } - - pr_debug("%s:info->pkt_len= %d\n", __func__, info->pkt_len); - - /* Will be released in virtio_transport_send_pkt_work */ - sock_hold(&trans->vsk->sk); - virtio_transport_inc_tx_pkt(pkt); - - /* Put pkt in the virtqueue */ - sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); - sgs[out_sg++] = &hdr; - if (info->msg && info->pkt_len > 0) { - sg_init_one(&buf, pkt->buf, pkt->len); - sgs[out_sg++] = &buf; - } - - mutex_lock(&vsock->tx_lock); - while ((ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, - GFP_KERNEL)) < 0) { - prepare_to_wait_exclusive(&vsock->queue_wait, &wait, - TASK_UNINTERRUPTIBLE); - mutex_unlock(&vsock->tx_lock); - schedule(); - mutex_lock(&vsock->tx_lock); - finish_wait(&vsock->queue_wait, &wait); - } - virtqueue_kick(vq); - mutex_unlock(&vsock->tx_lock); - - return pkt_len; -} - -static struct virtio_transport_pkt_ops virtio_ops = { - .send_pkt = virtio_transport_send_pkt, -}; - -static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) -{ - int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; - struct virtio_vsock_pkt *pkt; - struct scatterlist hdr, buf, *sgs[2]; - struct virtqueue *vq; - int ret; - - vq = vsock->vqs[VSOCK_VQ_RX]; - - do { - pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) { - pr_debug("%s: fail to allocate pkt\n", __func__); - goto out; - } - - /* TODO: use mergeable rx buffer */ - pkt->buf = kmalloc(buf_len, GFP_KERNEL); - if (!pkt->buf) { - pr_debug("%s: fail to allocate pkt->buf\n", __func__); - goto err; - } - - sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); - sgs[0] = &hdr; - - sg_init_one(&buf, pkt->buf, buf_len); - sgs[1] = &buf; - ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL); - if (ret) - goto err; - vsock->rx_buf_nr++; - } while (vq->num_free); - if (vsock->rx_buf_nr > vsock->rx_buf_max_nr) - vsock->rx_buf_max_nr = vsock->rx_buf_nr; -out: - virtqueue_kick(vq); - return; -err: - virtqueue_kick(vq); - virtio_transport_free_pkt(pkt); - return; -} - -static void virtio_transport_send_pkt_work(struct work_struct *work) -{ - struct virtio_vsock *vsock = - container_of(work, struct virtio_vsock, tx_work); - struct virtio_vsock_pkt *pkt; - bool added = false; - struct virtqueue *vq; - unsigned int len; - struct sock *sk; - - vq = vsock->vqs[VSOCK_VQ_TX]; - mutex_lock(&vsock->tx_lock); - do { - virtqueue_disable_cb(vq); - while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) { - sk = &pkt->trans->vsk->sk; - virtio_transport_dec_tx_pkt(pkt); - /* Release refcnt taken in virtio_transport_send_pkt */ - sock_put(sk); - vsock->total_tx_buf -= pkt->len; - virtio_transport_free_pkt(pkt); - added = true; - } - } while (!virtqueue_enable_cb(vq)); - mutex_unlock(&vsock->tx_lock); - - if (added) - wake_up(&vsock->queue_wait); -} - -static void virtio_transport_recv_pkt_work(struct work_struct *work) -{ - struct virtio_vsock *vsock = - container_of(work, struct virtio_vsock, rx_work); - struct virtio_vsock_pkt *pkt; - struct virtqueue *vq; - unsigned int len; - - vq = vsock->vqs[VSOCK_VQ_RX]; - mutex_lock(&vsock->rx_lock); - do { - virtqueue_disable_cb(vq); - while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) { - pkt->len = len; - virtio_transport_recv_pkt(pkt); - vsock->rx_buf_nr--; - } - } while (!virtqueue_enable_cb(vq)); - - if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2) - virtio_vsock_rx_fill(vsock); - mutex_unlock(&vsock->rx_lock); -} - -static void virtio_vsock_ctrl_done(struct virtqueue *vq) -{ -} - -static void virtio_vsock_tx_done(struct virtqueue *vq) -{ - struct virtio_vsock *vsock = vq->vdev->priv; - - if (!vsock) - return; - queue_work(virtio_vsock_workqueue, &vsock->tx_work); -} - -static void virtio_vsock_rx_done(struct virtqueue *vq) -{ - struct virtio_vsock *vsock = vq->vdev->priv; - - if (!vsock) - return; - queue_work(virtio_vsock_workqueue, &vsock->rx_work); -} - -static int -virtio_transport_socket_init(struct vsock_sock *vsk, struct vsock_sock *psk) -{ - struct virtio_transport *trans; - int ret; - - ret = virtio_transport_do_socket_init(vsk, psk); - if (ret) - return ret; - - trans = vsk->trans; - trans->ops = &virtio_ops; - return ret; -} - -static struct vsock_transport virtio_transport = { - .get_local_cid = virtio_transport_get_local_cid, - - .init = virtio_transport_socket_init, - .destruct = virtio_transport_destruct, - .release = virtio_transport_release, - .connect = virtio_transport_connect, - .shutdown = virtio_transport_shutdown, - - .dgram_bind = virtio_transport_dgram_bind, - .dgram_dequeue = virtio_transport_dgram_dequeue, - .dgram_enqueue = virtio_transport_dgram_enqueue, - .dgram_allow = virtio_transport_dgram_allow, - - .stream_dequeue = virtio_transport_stream_dequeue, - .stream_enqueue = virtio_transport_stream_enqueue, - .stream_has_data = virtio_transport_stream_has_data, - .stream_has_space = virtio_transport_stream_has_space, - .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, - .stream_is_active = virtio_transport_stream_is_active, - .stream_allow = virtio_transport_stream_allow, - - .notify_poll_in = virtio_transport_notify_poll_in, - .notify_poll_out = virtio_transport_notify_poll_out, - .notify_recv_init = virtio_transport_notify_recv_init, - .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, - .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, - .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, - .notify_send_init = virtio_transport_notify_send_init, - .notify_send_pre_block = virtio_transport_notify_send_pre_block, - .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, - .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, - - .set_buffer_size = virtio_transport_set_buffer_size, - .set_min_buffer_size = virtio_transport_set_min_buffer_size, - .set_max_buffer_size = virtio_transport_set_max_buffer_size, - .get_buffer_size = virtio_transport_get_buffer_size, - .get_min_buffer_size = virtio_transport_get_min_buffer_size, - .get_max_buffer_size = virtio_transport_get_max_buffer_size, -}; - -static int virtio_vsock_probe(struct virtio_device *vdev) -{ - vq_callback_t *callbacks[] = { - virtio_vsock_ctrl_done, - virtio_vsock_rx_done, - virtio_vsock_tx_done, - }; - const char *names[] = { - "ctrl", - "rx", - "tx", - }; - struct virtio_vsock *vsock = NULL; - u32 guest_cid; - int ret; - - ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); - if (ret) - return ret; - - /* Only one virtio-vsock device per guest is supported */ - if (the_virtio_vsock) { - ret = -EBUSY; - goto out; - } - - vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); - if (!vsock) { - ret = -ENOMEM; - goto out; - } - - vsock->vdev = vdev; - - ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, - vsock->vqs, callbacks, names); - if (ret < 0) - goto out; - - vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid), - &guest_cid, sizeof(guest_cid)); - vsock->guest_cid = le32_to_cpu(guest_cid); - pr_debug("%s:guest_cid=%d\n", __func__, vsock->guest_cid); - - ret = vsock_core_init(&virtio_transport); - if (ret < 0) - goto out_vqs; - - vsock->rx_buf_nr = 0; - vsock->rx_buf_max_nr = 0; - - vdev->priv = the_virtio_vsock = vsock; - init_waitqueue_head(&vsock->queue_wait); - mutex_init(&vsock->tx_lock); - mutex_init(&vsock->rx_lock); - INIT_WORK(&vsock->rx_work, virtio_transport_recv_pkt_work); - INIT_WORK(&vsock->tx_work, virtio_transport_send_pkt_work); - - mutex_lock(&vsock->rx_lock); - virtio_vsock_rx_fill(vsock); - mutex_unlock(&vsock->rx_lock); - - mutex_unlock(&the_virtio_vsock_mutex); - return 0; - -out_vqs: - vsock->vdev->config->del_vqs(vsock->vdev); -out: - kfree(vsock); - mutex_unlock(&the_virtio_vsock_mutex); - return ret; -} - -static void virtio_vsock_remove(struct virtio_device *vdev) -{ - struct virtio_vsock *vsock = vdev->priv; - - mutex_lock(&the_virtio_vsock_mutex); - the_virtio_vsock = NULL; - vsock_core_exit(); - mutex_unlock(&the_virtio_vsock_mutex); - - kfree(vsock); -} - -static struct virtio_device_id id_table[] = { - { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID }, - { 0 }, -}; - -static unsigned int features[] = { -}; - -static struct virtio_driver virtio_vsock_driver = { - .feature_table = features, - .feature_table_size = ARRAY_SIZE(features), - .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, - .id_table = id_table, - .probe = virtio_vsock_probe, - .remove = virtio_vsock_remove, -}; - -static int __init virtio_vsock_init(void) -{ - int ret; - - virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); - if (!virtio_vsock_workqueue) - return -ENOMEM; - ret = register_virtio_driver(&virtio_vsock_driver); - if (ret) - destroy_workqueue(virtio_vsock_workqueue); - return ret; -} - -static void __exit virtio_vsock_exit(void) -{ - unregister_virtio_driver(&virtio_vsock_driver); - destroy_workqueue(virtio_vsock_workqueue); -} - -module_init(virtio_vsock_init); -module_exit(virtio_vsock_exit); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Asias He"); -MODULE_DESCRIPTION("virtio transport for vsock"); -MODULE_DEVICE_TABLE(virtio, id_table); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c deleted file mode 100644 index 28f790da6f15..000000000000 --- a/net/vmw_vsock/virtio_transport_common.c +++ /dev/null @@ -1,1272 +0,0 @@ -/* - * common code for virtio vsock - * - * Copyright (C) 2013-2015 Red Hat, Inc. - * Author: Asias He - * Stefan Hajnoczi - * - * This work is licensed under the terms of the GNU GPL, version 2. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#define COOKIEBITS 24 -#define COOKIEMASK (((u32)1 << COOKIEBITS) - 1) -#define VSOCK_TIMEOUT_INIT 4 - -#define SHA_MESSAGE_WORDS 16 -#define SHA_VSOCK_WORDS 5 - -static u32 vsockcookie_secret[2][SHA_MESSAGE_WORDS - SHA_VSOCK_WORDS + - SHA_DIGEST_WORDS]; - -static DEFINE_PER_CPU(__u32[SHA_MESSAGE_WORDS + SHA_DIGEST_WORDS + - SHA_WORKSPACE_WORDS], vsock_cookie_scratch); - -static u32 cookie_hash(u32 saddr, u32 daddr, u16 sport, u16 dport, - u32 count, int c) -{ - __u32 *tmp = this_cpu_ptr(vsock_cookie_scratch); - - memcpy(tmp + SHA_VSOCK_WORDS, vsockcookie_secret[c], - sizeof(vsockcookie_secret[c])); - tmp[0] = saddr; - tmp[1] = daddr; - tmp[2] = sport; - tmp[3] = dport; - tmp[4] = count; - sha_transform(tmp + SHA_MESSAGE_WORDS, (__u8 *)tmp, - tmp + SHA_MESSAGE_WORDS + SHA_DIGEST_WORDS); - - return tmp[17]; -} - -static u32 -virtio_vsock_secure_cookie(u32 saddr, u32 daddr, u32 sport, u32 dport, - u32 count) -{ - u32 h1, h2; - - h1 = cookie_hash(saddr, daddr, sport, dport, 0, 0); - h2 = cookie_hash(saddr, daddr, sport, dport, count, 1); - - return h1 + (count << COOKIEBITS) + (h2 & COOKIEMASK); -} - -static u32 -virtio_vsock_check_cookie(u32 saddr, u32 daddr, u32 sport, u32 dport, - u32 count, u32 cookie, u32 maxdiff) -{ - u32 diff; - u32 ret; - - cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0); - - diff = (count - (cookie >> COOKIEBITS)) & ((u32)-1 >> COOKIEBITS); - pr_debug("%s: diff=%x\n", __func__, diff); - if (diff >= maxdiff) - return (u32)-1; - - ret = (cookie - - cookie_hash(saddr, daddr, sport, dport, count - diff, 1)) - & COOKIEMASK; - pr_debug("%s: ret=%x\n", __func__, diff); - - return ret; -} - -void virtio_vsock_dumppkt(const char *func, const struct virtio_vsock_pkt *pkt) -{ - pr_debug("%s: pkt=%p, op=%d, len=%d, %d:%d---%d:%d, len=%d\n", - func, pkt, - le16_to_cpu(pkt->hdr.op), - le32_to_cpu(pkt->hdr.len), - le32_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.src_port), - le32_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.dst_port), - pkt->len); -} -EXPORT_SYMBOL_GPL(virtio_vsock_dumppkt); - -struct virtio_vsock_pkt * -virtio_transport_alloc_pkt(struct vsock_sock *vsk, - struct virtio_vsock_pkt_info *info, - size_t len, - u32 src_cid, - u32 src_port, - u32 dst_cid, - u32 dst_port) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt *pkt; - int err; - - BUG_ON(!trans); - - pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) - return NULL; - - pkt->hdr.type = cpu_to_le16(info->type); - pkt->hdr.op = cpu_to_le16(info->op); - pkt->hdr.src_cid = cpu_to_le32(src_cid); - pkt->hdr.src_port = cpu_to_le32(src_port); - pkt->hdr.dst_cid = cpu_to_le32(dst_cid); - pkt->hdr.dst_port = cpu_to_le32(dst_port); - pkt->hdr.flags = cpu_to_le32(info->flags); - pkt->len = len; - pkt->trans = trans; - if (info->type == VIRTIO_VSOCK_TYPE_DGRAM) - pkt->hdr.len = cpu_to_le32(len + (info->dgram_len << 16)); - else if (info->type == VIRTIO_VSOCK_TYPE_STREAM) - pkt->hdr.len = cpu_to_le32(len); - - if (info->msg && len > 0) { - pkt->buf = kmalloc(len, GFP_KERNEL); - if (!pkt->buf) - goto out_pkt; - err = memcpy_from_msg(pkt->buf, info->msg, len); - if (err) - goto out; - } - - return pkt; - -out: - kfree(pkt->buf); -out_pkt: - kfree(pkt); - return NULL; -} -EXPORT_SYMBOL_GPL(virtio_transport_alloc_pkt); - -struct sock * -virtio_transport_get_pending(struct sock *listener, - struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vlistener; - struct vsock_sock *vpending; - struct sockaddr_vm src; - struct sockaddr_vm dst; - struct sock *pending; - - vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); - vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); - - vlistener = vsock_sk(listener); - list_for_each_entry(vpending, &vlistener->pending_links, - pending_links) { - if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && - vsock_addr_equals_addr(&dst, &vpending->local_addr)) { - pending = sk_vsock(vpending); - sock_hold(pending); - return pending; - } - } - - return NULL; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_pending); - -static void virtio_transport_inc_rx_pkt(struct virtio_vsock_pkt *pkt) -{ - pkt->trans->rx_bytes += pkt->len; -} - -static void virtio_transport_dec_rx_pkt(struct virtio_vsock_pkt *pkt) -{ - pkt->trans->rx_bytes -= pkt->len; - pkt->trans->fwd_cnt += pkt->len; -} - -void virtio_transport_inc_tx_pkt(struct virtio_vsock_pkt *pkt) -{ - mutex_lock(&pkt->trans->tx_lock); - pkt->hdr.fwd_cnt = cpu_to_le32(pkt->trans->fwd_cnt); - pkt->hdr.buf_alloc = cpu_to_le32(pkt->trans->buf_alloc); - mutex_unlock(&pkt->trans->tx_lock); -} -EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); - -void virtio_transport_dec_tx_pkt(struct virtio_vsock_pkt *pkt) -{ -} -EXPORT_SYMBOL_GPL(virtio_transport_dec_tx_pkt); - -u32 virtio_transport_get_credit(struct virtio_transport *trans, u32 credit) -{ - u32 ret; - - mutex_lock(&trans->tx_lock); - ret = trans->peer_buf_alloc - (trans->tx_cnt - trans->peer_fwd_cnt); - if (ret > credit) - ret = credit; - trans->tx_cnt += ret; - mutex_unlock(&trans->tx_lock); - - pr_debug("%s: ret=%d, buf_alloc=%d, peer_buf_alloc=%d," - "tx_cnt=%d, fwd_cnt=%d, peer_fwd_cnt=%d\n", __func__, - ret, trans->buf_alloc, trans->peer_buf_alloc, - trans->tx_cnt, trans->fwd_cnt, trans->peer_fwd_cnt); - - return ret; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_credit); - -void virtio_transport_put_credit(struct virtio_transport *trans, u32 credit) -{ - mutex_lock(&trans->tx_lock); - trans->tx_cnt -= credit; - mutex_unlock(&trans->tx_lock); -} -EXPORT_SYMBOL_GPL(virtio_transport_put_credit); - -static int virtio_transport_send_credit_update(struct vsock_sock *vsk, int type, struct virtio_vsock_hdr *hdr) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, - .type = type, - }; - - if (hdr && type == VIRTIO_VSOCK_TYPE_DGRAM) { - info.remote_cid = le32_to_cpu(hdr->src_cid); - info.remote_port = le32_to_cpu(hdr->src_port); - } - - pr_debug("%s: sk=%p send_credit_update\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} - -static int virtio_transport_send_credit_request(struct vsock_sock *vsk, int type) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_CREDIT_REQUEST, - .type = type, - }; - - pr_debug("%s: sk=%p send_credit_request\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} - -static ssize_t -virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt *pkt; - size_t bytes, total = 0; - int err = -EFAULT; - - mutex_lock(&trans->rx_lock); - while (total < len && trans->rx_bytes > 0 && - !list_empty(&trans->rx_queue)) { - pkt = list_first_entry(&trans->rx_queue, - struct virtio_vsock_pkt, list); - - bytes = len - total; - if (bytes > pkt->len - pkt->off) - bytes = pkt->len - pkt->off; - - err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); - if (err) - goto out; - total += bytes; - pkt->off += bytes; - if (pkt->off == pkt->len) { - virtio_transport_dec_rx_pkt(pkt); - list_del(&pkt->list); - virtio_transport_free_pkt(pkt); - } - } - mutex_unlock(&trans->rx_lock); - - /* Send a credit pkt to peer */ - virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, - NULL); - - return total; - -out: - mutex_unlock(&trans->rx_lock); - if (total) - err = total; - return err; -} - -ssize_t -virtio_transport_stream_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len, int flags) -{ - if (flags & MSG_PEEK) - return -EOPNOTSUPP; - - return virtio_transport_stream_do_dequeue(vsk, msg, len); -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); - -struct dgram_skb { - struct list_head list; - struct sk_buff *skb; - u16 id; -}; - -static struct dgram_skb *dgram_id_to_skb(struct virtio_transport *trans, - u16 id) -{ - struct dgram_skb *dgram_skb; - - list_for_each_entry(dgram_skb, &trans->incomplete_dgrams, list) { - if (dgram_skb->id == id) - return dgram_skb; - } - - return NULL; -} - -static void -virtio_transport_recv_dgram(struct sock *sk, - struct virtio_vsock_pkt *pkt) -{ - struct sk_buff *skb = NULL; - struct vsock_sock *vsk; - struct virtio_transport *trans; - size_t size; - u16 dgram_id, pkt_off, dgram_len, pkt_len; - u32 flags, len; - struct dgram_skb *dgram_skb; - - vsk = vsock_sk(sk); - trans = vsk->trans; - - /* len: dgram_len | pkt_len */ - len = le32_to_cpu(pkt->hdr.len); - dgram_len = len >> 16; - pkt_len = len & 0xFFFF; - - /* flags: dgram_id | pkt_off */ - flags = le32_to_cpu(pkt->hdr.flags); - dgram_id = flags >> 16; - pkt_off = flags & 0xFFFF; - - pr_debug("%s: dgram_len=%d, pkt_len=%d, id=%d, off=%d\n", __func__, - dgram_len, pkt_len, dgram_id, pkt_off); - - dgram_skb = dgram_id_to_skb(trans, dgram_id); - if (dgram_skb) { - /* This pkt is for a existing dgram */ - skb = dgram_skb->skb; - pr_debug("%s:found skb\n", __func__); - } - - /* Packet payload must be within datagram bounds */ - if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) - goto drop; - if (pkt_len > dgram_len) - goto drop; - if (pkt_off > dgram_len) - goto drop; - if (dgram_len - pkt_off < pkt_len) - goto drop; - - if (!skb) { - /* This pkt is for a new dgram */ - pr_debug("%s:create skb\n", __func__); - - size = sizeof(pkt->hdr) + dgram_len; - /* Attach the packet to the socket's receive queue as an sk_buff. */ - dgram_skb = kzalloc(sizeof(struct dgram_skb), GFP_ATOMIC); - if (!dgram_skb) - goto drop; - - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) { - kfree(dgram_skb); - dgram_skb = NULL; - goto drop; - } - dgram_skb->id = dgram_id; - dgram_skb->skb = skb; - list_add_tail(&dgram_skb->list, &trans->incomplete_dgrams); - - /* sk_receive_skb() will do a sock_put(), so hold here. */ - sock_hold(sk); - skb_put(skb, size); - memcpy(skb->data, &pkt->hdr, sizeof(pkt->hdr)); - } - - memcpy(skb->data + sizeof(pkt->hdr) + pkt_off, pkt->buf, pkt_len); - - pr_debug("%s:C, off=%d, pkt_len=%d, dgram_len=%d\n", __func__, - pkt_off, pkt_len, dgram_len); - - /* We are done with this dgram */ - if (pkt_off + pkt_len == dgram_len) { - pr_debug("%s:dgram_id=%d is done\n", __func__, dgram_id); - list_del(&dgram_skb->list); - kfree(dgram_skb); - sk_receive_skb(sk, skb, 0); - } - virtio_transport_free_pkt(pkt); - return; - -drop: - if (dgram_skb) { - list_del(&dgram_skb->list); - kfree(dgram_skb); - kfree_skb(skb); - sock_put(sk); - } - virtio_transport_free_pkt(pkt); -} - -int -virtio_transport_dgram_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len, int flags) -{ - struct virtio_vsock_hdr *hdr; - struct sk_buff *skb; - int noblock; - int err; - int dgram_len; - - noblock = flags & MSG_DONTWAIT; - - if (flags & MSG_OOB || flags & MSG_ERRQUEUE) - return -EOPNOTSUPP; - - /* Retrieve the head sk_buff from the socket's receive queue. */ - err = 0; - skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); - if (err) - return err; - if (!skb) - return -EAGAIN; - - hdr = (struct virtio_vsock_hdr *)skb->data; - if (!hdr) - goto out; - - dgram_len = le32_to_cpu(hdr->len) >> 16; - /* Place the datagram payload in the user's iovec. */ - err = skb_copy_datagram_msg(skb, sizeof(*hdr), msg, dgram_len); - if (err) - goto out; - - if (msg->msg_name) { - /* Provide the address of the sender. */ - DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); - vsock_addr_init(vm_addr, le32_to_cpu(hdr->src_cid), le32_to_cpu(hdr->src_port)); - msg->msg_namelen = sizeof(*vm_addr); - } - err = dgram_len; - - /* Send a credit pkt to peer */ - virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_DGRAM, hdr); - - pr_debug("%s:done, recved =%d\n", __func__, dgram_len); -out: - skb_free_datagram(&vsk->sk, skb); - return err; -} -EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); - -s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - s64 bytes; - - mutex_lock(&trans->rx_lock); - bytes = trans->rx_bytes; - mutex_unlock(&trans->rx_lock); - - return bytes; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); - -static s64 virtio_transport_has_space(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - s64 bytes; - - bytes = trans->peer_buf_alloc - (trans->tx_cnt - trans->peer_fwd_cnt); - if (bytes < 0) - bytes = 0; - - return bytes; -} - -s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - s64 bytes; - - mutex_lock(&trans->tx_lock); - bytes = virtio_transport_has_space(vsk); - mutex_unlock(&trans->tx_lock); - - pr_debug("%s: bytes=%lld\n", __func__, bytes); - - return bytes; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); - -int virtio_transport_do_socket_init(struct vsock_sock *vsk, - struct vsock_sock *psk) -{ - struct virtio_transport *trans; - - trans = kzalloc(sizeof(*trans), GFP_KERNEL); - if (!trans) - return -ENOMEM; - - vsk->trans = trans; - trans->vsk = vsk; - if (psk) { - struct virtio_transport *ptrans = psk->trans; - trans->buf_size = ptrans->buf_size; - trans->buf_size_min = ptrans->buf_size_min; - trans->buf_size_max = ptrans->buf_size_max; - trans->peer_buf_alloc = ptrans->peer_buf_alloc; - } else { - trans->buf_size = VIRTIO_VSOCK_DEFAULT_BUF_SIZE; - trans->buf_size_min = VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE; - trans->buf_size_max = VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE; - } - - trans->buf_alloc = trans->buf_size; - - pr_debug("%s: trans->buf_alloc=%d\n", __func__, trans->buf_alloc); - - mutex_init(&trans->rx_lock); - mutex_init(&trans->tx_lock); - INIT_LIST_HEAD(&trans->rx_queue); - INIT_LIST_HEAD(&trans->incomplete_dgrams); - - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); - -u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - return trans->buf_size; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_buffer_size); - -u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - return trans->buf_size_min; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_min_buffer_size); - -u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - return trans->buf_size_max; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_max_buffer_size); - -void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) -{ - struct virtio_transport *trans = vsk->trans; - - if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) - val = VIRTIO_VSOCK_MAX_BUF_SIZE; - if (val < trans->buf_size_min) - trans->buf_size_min = val; - if (val > trans->buf_size_max) - trans->buf_size_max = val; - trans->buf_size = val; - trans->buf_alloc = val; -} -EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size); - -void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val) -{ - struct virtio_transport *trans = vsk->trans; - - if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) - val = VIRTIO_VSOCK_MAX_BUF_SIZE; - if (val > trans->buf_size) - trans->buf_size = val; - trans->buf_size_min = val; -} -EXPORT_SYMBOL_GPL(virtio_transport_set_min_buffer_size); - -void virtio_transport_set_max_buffer_size(struct vsock_sock *vsk, u64 val) -{ - struct virtio_transport *trans = vsk->trans; - - if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) - val = VIRTIO_VSOCK_MAX_BUF_SIZE; - if (val < trans->buf_size) - trans->buf_size = val; - trans->buf_size_max = val; -} -EXPORT_SYMBOL_GPL(virtio_transport_set_max_buffer_size); - -int -virtio_transport_notify_poll_in(struct vsock_sock *vsk, - size_t target, - bool *data_ready_now) -{ - if (vsock_stream_has_data(vsk)) - *data_ready_now = true; - else - *data_ready_now = false; - - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); - -int -virtio_transport_notify_poll_out(struct vsock_sock *vsk, - size_t target, - bool *space_avail_now) -{ - s64 free_space; - - free_space = vsock_stream_has_space(vsk); - if (free_space > 0) - *space_avail_now = true; - else if (free_space == 0) - *space_avail_now = false; - - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); - -int virtio_transport_notify_recv_init(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); - -int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); - -int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); - -int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, - size_t target, ssize_t copied, bool data_read, - struct vsock_transport_recv_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); - -int virtio_transport_notify_send_init(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); - -int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); - -int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); - -int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, - ssize_t written, struct vsock_transport_send_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); - -u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - return trans->buf_size; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); - -bool virtio_transport_stream_is_active(struct vsock_sock *vsk) -{ - return true; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); - -bool virtio_transport_stream_allow(u32 cid, u32 port) -{ - return true; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); - -int virtio_transport_dgram_bind(struct vsock_sock *vsk, - struct sockaddr_vm *addr) -{ - return vsock_bind_dgram_generic(vsk, addr); -} -EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); - -bool virtio_transport_dgram_allow(u32 cid, u32 port) -{ - return true; -} -EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); - -int virtio_transport_connect(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_REQUEST, - .type = VIRTIO_VSOCK_TYPE_STREAM, - }; - - pr_debug("%s: vsk=%p send_request\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} -EXPORT_SYMBOL_GPL(virtio_transport_connect); - -int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_SHUTDOWN, - .type = VIRTIO_VSOCK_TYPE_STREAM, - .flags = (mode & RCV_SHUTDOWN ? - VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | - (mode & SEND_SHUTDOWN ? - VIRTIO_VSOCK_SHUTDOWN_SEND : 0), - }; - - pr_debug("%s: vsk=%p: send_shutdown\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} -EXPORT_SYMBOL_GPL(virtio_transport_shutdown); - -void virtio_transport_release(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - struct sock *sk = &vsk->sk; - struct dgram_skb *dgram_skb; - struct dgram_skb *dgram_skb_tmp; - - pr_debug("%s: vsk=%p\n", __func__, vsk); - - /* Tell other side to terminate connection */ - if (sk->sk_type == SOCK_STREAM && sk->sk_state == SS_CONNECTED) { - virtio_transport_shutdown(vsk, SHUTDOWN_MASK); - } - - /* Free incomplete dgrams */ - lock_sock(sk); - list_for_each_entry_safe(dgram_skb, dgram_skb_tmp, - &trans->incomplete_dgrams, list) { - list_del(&dgram_skb->list); - kfree_skb(dgram_skb->skb); - kfree(dgram_skb); - sock_put(sk); /* held in virtio_transport_recv_dgram() */ - } - release_sock(sk); -} -EXPORT_SYMBOL_GPL(virtio_transport_release); - -int -virtio_transport_dgram_enqueue(struct vsock_sock *vsk, - struct sockaddr_vm *remote_addr, - struct msghdr *msg, - size_t dgram_len) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_RW, - .type = VIRTIO_VSOCK_TYPE_DGRAM, - .msg = msg, - }; - size_t total_written = 0, pkt_off = 0, written; - u16 dgram_id; - - /* The max size of a single dgram we support is 64KB */ - if (dgram_len > VIRTIO_VSOCK_MAX_DGRAM_SIZE) - return -EMSGSIZE; - - info.dgram_len = dgram_len; - vsk->remote_addr = *remote_addr; - - dgram_id = trans->dgram_id++; - - /* TODO: To optimize, if we have enough credit to send the pkt already, - * do not ask the peer to send credit to use */ - virtio_transport_send_credit_request(vsk, VIRTIO_VSOCK_TYPE_DGRAM); - - while (total_written < dgram_len) { - info.pkt_len = dgram_len - total_written; - info.flags = dgram_id << 16 | pkt_off; - written = trans->ops->send_pkt(vsk, &info); - if (written < 0) - return -ENOMEM; - if (written == 0) { - /* TODO: if written = 0, we need a sleep & wakeup - * instead of sleep */ - pr_debug("%s: SHOULD WAIT written==0", __func__); - msleep(10); - } - total_written += written; - pkt_off += written; - pr_debug("%s:id=%d, dgram_len=%zu, off=%zu, total_written=%zu, written=%zu\n", - __func__, dgram_id, dgram_len, pkt_off, total_written, written); - } - - return dgram_len; -} -EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); - -ssize_t -virtio_transport_stream_enqueue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_RW, - .type = VIRTIO_VSOCK_TYPE_STREAM, - .msg = msg, - .pkt_len = len, - }; - - return trans->ops->send_pkt(vsk, &info); -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); - -void virtio_transport_destruct(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - pr_debug("%s: vsk=%p\n", __func__, vsk); - kfree(trans); -} -EXPORT_SYMBOL_GPL(virtio_transport_destruct); - -static int virtio_transport_send_ack(struct vsock_sock *vsk, u32 cookie) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_ACK, - .type = VIRTIO_VSOCK_TYPE_STREAM, - .flags = cpu_to_le32(cookie), - }; - - pr_debug("%s: sk=%p send_offer\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} - -static int virtio_transport_send_reset(struct vsock_sock *vsk, - struct virtio_vsock_pkt *pkt) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_RST, - .type = VIRTIO_VSOCK_TYPE_STREAM, - }; - - pr_debug("%s\n", __func__); - - /* Send RST only if the original pkt is not a RST pkt */ - if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) - return 0; - - return trans->ops->send_pkt(vsk, &info); -} - -static int -virtio_transport_recv_connecting(struct sock *sk, - struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vsk = vsock_sk(sk); - int err; - int skerr; - u32 cookie; - - pr_debug("%s: vsk=%p\n", __func__, vsk); - switch (le16_to_cpu(pkt->hdr.op)) { - case VIRTIO_VSOCK_OP_RESPONSE: - cookie = le32_to_cpu(pkt->hdr.flags); - pr_debug("%s: got RESPONSE and send ACK, cookie=%x\n", __func__, cookie); - err = virtio_transport_send_ack(vsk, cookie); - if (err < 0) { - skerr = -err; - goto destroy; - } - sk->sk_state = SS_CONNECTED; - sk->sk_socket->state = SS_CONNECTED; - vsock_insert_connected(vsk); - sk->sk_state_change(sk); - break; - case VIRTIO_VSOCK_OP_INVALID: - pr_debug("%s: got invalid\n", __func__); - break; - case VIRTIO_VSOCK_OP_RST: - pr_debug("%s: got rst\n", __func__); - skerr = ECONNRESET; - err = 0; - goto destroy; - default: - pr_debug("%s: got def\n", __func__); - skerr = EPROTO; - err = -EINVAL; - goto destroy; - } - return 0; - -destroy: - virtio_transport_send_reset(vsk, pkt); - sk->sk_state = SS_UNCONNECTED; - sk->sk_err = skerr; - sk->sk_error_report(sk); - return err; -} - -static int -virtio_transport_recv_connected(struct sock *sk, - struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vsk = vsock_sk(sk); - struct virtio_transport *trans = vsk->trans; - int err = 0; - - switch (le16_to_cpu(pkt->hdr.op)) { - case VIRTIO_VSOCK_OP_RW: - pkt->len = le32_to_cpu(pkt->hdr.len); - pkt->off = 0; - pkt->trans = trans; - - mutex_lock(&trans->rx_lock); - virtio_transport_inc_rx_pkt(pkt); - list_add_tail(&pkt->list, &trans->rx_queue); - mutex_unlock(&trans->rx_lock); - - sk->sk_data_ready(sk); - return err; - case VIRTIO_VSOCK_OP_CREDIT_UPDATE: - sk->sk_write_space(sk); - break; - case VIRTIO_VSOCK_OP_SHUTDOWN: - pr_debug("%s: got shutdown\n", __func__); - if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) - vsk->peer_shutdown |= RCV_SHUTDOWN; - if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) - vsk->peer_shutdown |= SEND_SHUTDOWN; - if (le32_to_cpu(pkt->hdr.flags)) - sk->sk_state_change(sk); - break; - case VIRTIO_VSOCK_OP_RST: - pr_debug("%s: got rst\n", __func__); - sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; - if (vsock_stream_has_data(vsk) <= 0) - sk->sk_state = SS_DISCONNECTING; - sk->sk_state_change(sk); - break; - default: - err = -EINVAL; - break; - } - - virtio_transport_free_pkt(pkt); - return err; -} - -static int -virtio_transport_send_response(struct vsock_sock *vsk, - struct virtio_vsock_pkt *pkt) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_RESPONSE, - .type = VIRTIO_VSOCK_TYPE_STREAM, - .remote_cid = le32_to_cpu(pkt->hdr.src_cid), - .remote_port = le32_to_cpu(pkt->hdr.src_port), - }; - u32 cookie; - - cookie = virtio_vsock_secure_cookie(le32_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.src_port), - le32_to_cpu(pkt->hdr.dst_port), - jiffies / (HZ * 60)); - info.flags = cpu_to_le32(cookie); - - pr_debug("%s: send_response, cookie=%x\n", __func__, le32_to_cpu(cookie)); - - return trans->ops->send_pkt(vsk, &info); -} - -/* Handle server socket */ -static int -virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vsk = vsock_sk(sk); - struct vsock_sock *vpending; - struct sock *pending; - int err; - u32 cookie; - - switch (le16_to_cpu(pkt->hdr.op)) { - case VIRTIO_VSOCK_OP_REQUEST: - err = virtio_transport_send_response(vsk, pkt); - if (err < 0) { - // FIXME vsk should be vpending - virtio_transport_send_reset(vsk, pkt); - return err; - } - break; - case VIRTIO_VSOCK_OP_ACK: - cookie = le32_to_cpu(pkt->hdr.flags); - err = virtio_vsock_check_cookie(le32_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.src_port), - le32_to_cpu(pkt->hdr.dst_port), - jiffies / (HZ * 60), - le32_to_cpu(pkt->hdr.flags), - VSOCK_TIMEOUT_INIT); - pr_debug("%s: cookie=%x, err=%d\n", __func__, cookie, err); - if (err) - return err; - - /* So no pending socket are responsible for this pkt, create one */ - pr_debug("%s: create pending\n", __func__); - pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, - sk->sk_type, 0); - if (!pending) { - virtio_transport_send_reset(vsk, pkt); - return -ENOMEM; - } - sk->sk_ack_backlog++; - pending->sk_state = SS_CONNECTING; - - vpending = vsock_sk(pending); - vsock_addr_init(&vpending->local_addr, le32_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.dst_port)); - vsock_addr_init(&vpending->remote_addr, le32_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.src_port)); - vsock_add_pending(sk, pending); - - pr_debug("%s: get pending\n", __func__); - pending = virtio_transport_get_pending(sk, pkt); - vpending = vsock_sk(pending); - lock_sock(pending); - switch (pending->sk_state) { - case SS_CONNECTING: - if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_ACK) { - pr_debug("%s: op=%d != OP_ACK\n", __func__, - le16_to_cpu(pkt->hdr.op)); - virtio_transport_send_reset(vpending, pkt); - pending->sk_err = EPROTO; - pending->sk_state = SS_UNCONNECTED; - sock_put(pending); - } else { - pending->sk_state = SS_CONNECTED; - vsock_insert_connected(vpending); - - vsock_remove_pending(sk, pending); - vsock_enqueue_accept(sk, pending); - - sk->sk_data_ready(sk); - } - err = 0; - break; - default: - pr_debug("%s: sk->sk_ack_backlog=%d\n", __func__, - sk->sk_ack_backlog); - virtio_transport_send_reset(vpending, pkt); - err = -EINVAL; - break; - } - if (err < 0) - vsock_remove_pending(sk, pending); - release_sock(pending); - - /* Release refcnt obtained in virtio_transport_get_pending */ - sock_put(pending); - break; - default: - break; - } - - return 0; -} - -static void virtio_transport_space_update(struct sock *sk, - struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vsk = vsock_sk(sk); - struct virtio_transport *trans = vsk->trans; - bool space_available; - - /* buf_alloc and fwd_cnt is always included in the hdr */ - mutex_lock(&trans->tx_lock); - trans->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); - trans->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); - space_available = virtio_transport_has_space(vsk); - mutex_unlock(&trans->tx_lock); - - if (space_available) - sk->sk_write_space(sk); -} - -/* We are under the virtio-vsock's vsock->rx_lock or - * vhost-vsock's vq->mutex lock */ -void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) -{ - struct virtio_transport *trans; - struct sockaddr_vm src, dst; - struct vsock_sock *vsk; - struct sock *sk; - - vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); - vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); - - virtio_vsock_dumppkt(__func__, pkt); - - if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_DGRAM) { - sk = vsock_find_unbound_socket(&dst); - if (!sk) - goto free_pkt; - - vsk = vsock_sk(sk); - trans = vsk->trans; - BUG_ON(!trans); - - virtio_transport_space_update(sk, pkt); - - lock_sock(sk); - switch (le16_to_cpu(pkt->hdr.op)) { - case VIRTIO_VSOCK_OP_CREDIT_UPDATE: - virtio_transport_free_pkt(pkt); - break; - case VIRTIO_VSOCK_OP_CREDIT_REQUEST: - virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_DGRAM, - &pkt->hdr); - virtio_transport_free_pkt(pkt); - break; - case VIRTIO_VSOCK_OP_RW: - virtio_transport_recv_dgram(sk, pkt); - break; - default: - virtio_transport_free_pkt(pkt); - break; - } - release_sock(sk); - - /* Release refcnt obtained when we fetched this socket out of - * the unbound list. - */ - sock_put(sk); - return; - } else if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) { - /* The socket must be in connected or bound table - * otherwise send reset back - */ - sk = vsock_find_connected_socket(&src, &dst); - if (!sk) { - sk = vsock_find_bound_socket(&dst); - if (!sk) { - pr_debug("%s: can not find bound_socket\n", __func__); - virtio_vsock_dumppkt(__func__, pkt); - /* Ignore this pkt instead of sending reset back */ - /* TODO send a RST unless this packet is a RST (to avoid infinite loops) */ - goto free_pkt; - } - } - - vsk = vsock_sk(sk); - trans = vsk->trans; - BUG_ON(!trans); - - virtio_transport_space_update(sk, pkt); - - lock_sock(sk); - switch (sk->sk_state) { - case VSOCK_SS_LISTEN: - virtio_transport_recv_listen(sk, pkt); - virtio_transport_free_pkt(pkt); - break; - case SS_CONNECTING: - virtio_transport_recv_connecting(sk, pkt); - virtio_transport_free_pkt(pkt); - break; - case SS_CONNECTED: - virtio_transport_recv_connected(sk, pkt); - break; - default: - virtio_transport_free_pkt(pkt); - break; - } - release_sock(sk); - - /* Release refcnt obtained when we fetched this socket out of the - * bound or connected list. - */ - sock_put(sk); - } - return; - -free_pkt: - virtio_transport_free_pkt(pkt); -} -EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); - -void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) -{ - kfree(pkt->buf); - kfree(pkt); -} -EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); - -static int __init virtio_vsock_common_init(void) -{ - get_random_bytes(vsockcookie_secret, sizeof(vsockcookie_secret)); - return 0; -} - -static void __exit virtio_vsock_common_exit(void) -{ -} - -module_init(virtio_vsock_common_init); -module_exit(virtio_vsock_common_exit); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Asias He"); -MODULE_DESCRIPTION("common code for virtio vsock"); -- cgit v1.2.3-71-gd317 From 2a56a1fec290bf0bc4676bbf4efdb3744953a3e7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Dec 2015 17:38:52 -0500 Subject: net: wrap sock->sk_cgrp_prioidx and ->sk_classid inside a struct Introduce sock->sk_cgrp_data which is a struct sock_cgroup_data. ->sk_cgroup_prioidx and ->sk_classid are moved into it. The struct and its accessors are defined in cgroup-defs.h. This is to prepare for overloading the fields with a cgroup pointer. This patch mostly performs equivalent conversions but the followings are noteworthy. * Equality test before updating classid is removed from sock_update_classid(). This shouldn't make any noticeable difference and a similar test will be implemented on the helper side later. * sock_update_netprioidx() now takes struct sock_cgroup_data and can be moved to netprio_cgroup.h without causing include dependency loop. Moved. * The dummy version of sock_update_netprioidx() converted to a static inline function while at it. Signed-off-by: Tejun Heo Signed-off-by: David S. Miller --- include/linux/cgroup-defs.h | 36 ++++++++++++++++++++++++++++++++++++ include/net/cls_cgroup.h | 11 +++++------ include/net/netprio_cgroup.h | 16 +++++++++++++--- include/net/sock.h | 11 +++-------- net/Kconfig | 6 ++++++ net/core/dev.c | 3 ++- net/core/netclassid_cgroup.c | 4 ++-- net/core/netprio_cgroup.c | 3 ++- net/core/scm.c | 4 ++-- net/core/sock.c | 15 ++------------- net/netfilter/nft_meta.c | 2 +- net/netfilter/xt_cgroup.c | 3 ++- 12 files changed, 76 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 504d8591b6d3..ed128fed0335 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -542,4 +542,40 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} #endif /* CONFIG_CGROUPS */ +#ifdef CONFIG_SOCK_CGROUP_DATA + +struct sock_cgroup_data { + u16 prioidx; + u32 classid; +}; + +static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd) +{ + return skcd->prioidx; +} + +static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd) +{ + return skcd->classid; +} + +static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, + u16 prioidx) +{ + skcd->prioidx = prioidx; +} + +static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, + u32 classid) +{ + skcd->classid = classid; +} + +#else /* CONFIG_SOCK_CGROUP_DATA */ + +struct sock_cgroup_data { +}; + +#endif /* CONFIG_SOCK_CGROUP_DATA */ + #endif /* _LINUX_CGROUP_DEFS_H */ diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index ccd6d8bffa4d..c0a92e2c286d 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -41,13 +41,12 @@ static inline u32 task_cls_classid(struct task_struct *p) return classid; } -static inline void sock_update_classid(struct sock *sk) +static inline void sock_update_classid(struct sock_cgroup_data *skcd) { u32 classid; classid = task_cls_classid(current); - if (classid != sk->sk_classid) - sk->sk_classid = classid; + sock_cgroup_set_classid(skcd, classid); } static inline u32 task_get_classid(const struct sk_buff *skb) @@ -64,17 +63,17 @@ static inline u32 task_get_classid(const struct sk_buff *skb) * softirqs always disables bh. */ if (in_serving_softirq()) { - /* If there is an sk_classid we'll use that. */ + /* If there is an sock_cgroup_classid we'll use that. */ if (!skb->sk) return 0; - classid = skb->sk->sk_classid; + classid = sock_cgroup_classid(&skb->sk->sk_cgrp_data); } return classid; } #else /* !CONFIG_CGROUP_NET_CLASSID */ -static inline void sock_update_classid(struct sock *sk) +static inline void sock_update_classid(struct sock_cgroup_data *skcd) { } diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index f2a9597ff53c..604190596cde 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -25,8 +25,6 @@ struct netprio_map { u32 priomap[]; }; -void sock_update_netprioidx(struct sock *sk); - static inline u32 task_netprioidx(struct task_struct *p) { struct cgroup_subsys_state *css; @@ -38,13 +36,25 @@ static inline u32 task_netprioidx(struct task_struct *p) rcu_read_unlock(); return idx; } + +static inline void sock_update_netprioidx(struct sock_cgroup_data *skcd) +{ + if (in_interrupt()) + return; + + sock_cgroup_set_prioidx(skcd, task_netprioidx(current)); +} + #else /* !CONFIG_CGROUP_NET_PRIO */ + static inline u32 task_netprioidx(struct task_struct *p) { return 0; } -#define sock_update_netprioidx(sk) +static inline void sock_update_netprioidx(struct sock_cgroup_data *skcd) +{ +} #endif /* CONFIG_CGROUP_NET_PRIO */ #endif /* _NET_CLS_CGROUP_H */ diff --git a/include/net/sock.h b/include/net/sock.h index a95bcf7d6efa..0ca22b014de1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -308,8 +309,7 @@ struct cg_proto; * @sk_send_head: front of stuff to transmit * @sk_security: used by security modules * @sk_mark: generic packet mark - * @sk_cgrp_prioidx: socket group's priority map index - * @sk_classid: this socket's cgroup classid + * @sk_cgrp_data: cgroup data for this cgroup * @sk_cgrp: this socket's cgroup-specific proto data * @sk_write_pending: a write to stream socket waits to start * @sk_state_change: callback to indicate change in the state of the sock @@ -443,12 +443,7 @@ struct sock { #ifdef CONFIG_SECURITY void *sk_security; #endif -#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) - u16 sk_cgrp_prioidx; -#endif -#ifdef CONFIG_CGROUP_NET_CLASSID - u32 sk_classid; -#endif + struct sock_cgroup_data sk_cgrp_data; struct cg_proto *sk_cgrp; void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk); diff --git a/net/Kconfig b/net/Kconfig index 127da94ae25e..11f8c22af34d 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -250,9 +250,14 @@ config XPS depends on SMP default y +config SOCK_CGROUP_DATA + bool + default n + config CGROUP_NET_PRIO bool "Network priority cgroup" depends on CGROUPS + select SOCK_CGROUP_DATA ---help--- Cgroup subsystem for use in assigning processes to network priorities on a per-interface basis. @@ -260,6 +265,7 @@ config CGROUP_NET_PRIO config CGROUP_NET_CLASSID bool "Network classid cgroup" depends on CGROUPS + select SOCK_CGROUP_DATA ---help--- Cgroup subsystem for use as general purpose socket classid marker that is being used in cls_cgroup and for netfilter matching. diff --git a/net/core/dev.c b/net/core/dev.c index e5c395473eba..8f705fcedb94 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2929,7 +2929,8 @@ static void skb_update_prio(struct sk_buff *skb) struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); if (!skb->priority && skb->sk && map) { - unsigned int prioidx = skb->sk->sk_cgrp_prioidx; + unsigned int prioidx = + sock_cgroup_prioidx(&skb->sk->sk_cgrp_data); if (prioidx < map->priomap_len) skb->priority = map->priomap[prioidx]; diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 2e4df84c34a1..e60ded46b3ac 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -62,8 +62,8 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n) struct socket *sock = sock_from_file(file, &err); if (sock) - sock->sk->sk_classid = (u32)(unsigned long)v; - + sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, + (unsigned long)v); return 0; } diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 2b9159b7a28a..de42aa7f6c77 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -223,7 +223,8 @@ static int update_netprio(const void *v, struct file *file, unsigned n) int err; struct socket *sock = sock_from_file(file, &err); if (sock) - sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v; + sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data, + (unsigned long)v); return 0; } diff --git a/net/core/scm.c b/net/core/scm.c index 8a1741b14302..14596fb37172 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -289,8 +289,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) /* Bump the usage count and install the file. */ sock = sock_from_file(fp[i], &err); if (sock) { - sock_update_netprioidx(sock->sk); - sock_update_classid(sock->sk); + sock_update_netprioidx(&sock->sk->sk_cgrp_data); + sock_update_classid(&sock->sk->sk_cgrp_data); } fd_install(new_fd, get_file(fp[i])); } diff --git a/net/core/sock.c b/net/core/sock.c index 7965ef487375..947741dc43fa 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1393,17 +1393,6 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) module_put(owner); } -#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) -void sock_update_netprioidx(struct sock *sk) -{ - if (in_interrupt()) - return; - - sk->sk_cgrp_prioidx = task_netprioidx(current); -} -EXPORT_SYMBOL_GPL(sock_update_netprioidx); -#endif - /** * sk_alloc - All socket objects are allocated here * @net: the applicable net namespace @@ -1432,8 +1421,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_net_set(sk, net); atomic_set(&sk->sk_wmem_alloc, 1); - sock_update_classid(sk); - sock_update_netprioidx(sk); + sock_update_classid(&sk->sk_cgrp_data); + sock_update_netprioidx(&sk->sk_cgrp_data); } return sk; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 9dfaf4d55ee0..1915cab7f32d 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -174,7 +174,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, sk = skb_to_full_sk(skb); if (!sk || !sk_fullsock(sk)) goto err; - *dest = sk->sk_classid; + *dest = sock_cgroup_classid(&sk->sk_cgrp_data); break; #endif default: diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index a1d126f29463..54eaeb45ce99 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -42,7 +42,8 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) if (skb->sk == NULL || !sk_fullsock(skb->sk)) return false; - return (info->id == skb->sk->sk_classid) ^ info->invert; + return (info->id == sock_cgroup_classid(&skb->sk->sk_cgrp_data)) ^ + info->invert; } static struct xt_match cgroup_mt_reg __read_mostly = { -- cgit v1.2.3-71-gd317 From bd1060a1d67128bb8fbe2e1384c518912cbe54e7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Dec 2015 17:38:53 -0500 Subject: sock, cgroup: add sock->sk_cgroup In cgroup v1, dealing with cgroup membership was difficult because the number of membership associations was unbound. As a result, cgroup v1 grew several controllers whose primary purpose is either tagging membership or pull in configuration knobs from other subsystems so that cgroup membership test can be avoided. net_cls and net_prio controllers are examples of the latter. They allow configuring network-specific attributes from cgroup side so that network subsystem can avoid testing cgroup membership; unfortunately, these are not only cumbersome but also problematic. Both net_cls and net_prio aren't properly hierarchical. Both inherit configuration from the parent on creation but there's no interaction afterwards. An ancestor doesn't restrict the behavior in its subtree in anyway and configuration changes aren't propagated downwards. Especially when combined with cgroup delegation, this is problematic because delegatees can mess up whatever network configuration implemented at the system level. net_prio would allow the delegatees to set whatever priority value regardless of CAP_NET_ADMIN and net_cls the same for classid. While it is possible to solve these issues from controller side by implementing hierarchical allowable ranges in both controllers, it would involve quite a bit of complexity in the controllers and further obfuscate network configuration as it becomes even more difficult to tell what's actually being configured looking from the network side. While not much can be done for v1 at this point, as membership handling is sane on cgroup v2, it'd be better to make cgroup matching behave like other network matches and classifiers than introducing further complications. In preparation, this patch updates sock->sk_cgrp_data handling so that it points to the v2 cgroup that sock was created in until either net_prio or net_cls is used. Once either of the two is used, sock->sk_cgrp_data reverts to its previous role of carrying prioidx and classid. This is to avoid adding yet another cgroup related field to struct sock. As the mode switching can happen at most once per boot, the switching mechanism is aimed at lowering hot path overhead. It may leak a finite, likely small, number of cgroup refs and report spurious prioidx or classid on switching; however, dynamic updates of prioidx and classid have always been racy and lossy - socks between creation and fd installation are never updated, config changes don't update existing sockets at all, and prioidx may index with dead and recycled cgroup IDs. Non-critical inaccuracies from small race windows won't make any noticeable difference. This patch doesn't make use of the pointer yet. The following patch will implement netfilter match for cgroup2 membership. v2: Use sock_cgroup_data to avoid inflating struct sock w/ another cgroup specific field. v3: Add comments explaining why sock_data_prioidx() and sock_data_classid() use different fallback values. Signed-off-by: Tejun Heo Cc: Daniel Borkmann Cc: Daniel Wagner CC: Neil Horman Signed-off-by: David S. Miller --- include/linux/cgroup-defs.h | 88 +++++++++++++++++++++++++++++++++++++++++--- include/linux/cgroup.h | 41 +++++++++++++++++++++ kernel/cgroup.c | 55 ++++++++++++++++++++++++++- net/core/netclassid_cgroup.c | 7 +++- net/core/netprio_cgroup.c | 7 +++- net/core/sock.c | 2 + 6 files changed, 191 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ed128fed0335..9dc226345e4e 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -544,31 +544,107 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} #ifdef CONFIG_SOCK_CGROUP_DATA +/* + * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains + * per-socket cgroup information except for memcg association. + * + * On legacy hierarchies, net_prio and net_cls controllers directly set + * attributes on each sock which can then be tested by the network layer. + * On the default hierarchy, each sock is associated with the cgroup it was + * created in and the networking layer can match the cgroup directly. + * + * To avoid carrying all three cgroup related fields separately in sock, + * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer. + * On boot, sock_cgroup_data records the cgroup that the sock was created + * in so that cgroup2 matches can be made; however, once either net_prio or + * net_cls starts being used, the area is overriden to carry prioidx and/or + * classid. The two modes are distinguished by whether the lowest bit is + * set. Clear bit indicates cgroup pointer while set bit prioidx and + * classid. + * + * While userland may start using net_prio or net_cls at any time, once + * either is used, cgroup2 matching no longer works. There is no reason to + * mix the two and this is in line with how legacy and v2 compatibility is + * handled. On mode switch, cgroup references which are already being + * pointed to by socks may be leaked. While this can be remedied by adding + * synchronization around sock_cgroup_data, given that the number of leaked + * cgroups is bound and highly unlikely to be high, this seems to be the + * better trade-off. + */ struct sock_cgroup_data { - u16 prioidx; - u32 classid; + union { +#ifdef __LITTLE_ENDIAN + struct { + u8 is_data; + u8 padding; + u16 prioidx; + u32 classid; + } __packed; +#else + struct { + u32 classid; + u16 prioidx; + u8 padding; + u8 is_data; + } __packed; +#endif + u64 val; + }; }; +/* + * There's a theoretical window where the following accessors race with + * updaters and return part of the previous pointer as the prioidx or + * classid. Such races are short-lived and the result isn't critical. + */ static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd) { - return skcd->prioidx; + /* fallback to 1 which is always the ID of the root cgroup */ + return (skcd->is_data & 1) ? skcd->prioidx : 1; } static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd) { - return skcd->classid; + /* fallback to 0 which is the unconfigured default classid */ + return (skcd->is_data & 1) ? skcd->classid : 0; } +/* + * If invoked concurrently, the updaters may clobber each other. The + * caller is responsible for synchronization. + */ static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, u16 prioidx) { - skcd->prioidx = prioidx; + struct sock_cgroup_data skcd_buf = { .val = READ_ONCE(skcd->val) }; + + if (sock_cgroup_prioidx(&skcd_buf) == prioidx) + return; + + if (!(skcd_buf.is_data & 1)) { + skcd_buf.val = 0; + skcd_buf.is_data = 1; + } + + skcd_buf.prioidx = prioidx; + WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ } static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, u32 classid) { - skcd->classid = classid; + struct sock_cgroup_data skcd_buf = { .val = READ_ONCE(skcd->val) }; + + if (sock_cgroup_classid(&skcd_buf) == classid) + return; + + if (!(skcd_buf.is_data & 1)) { + skcd_buf.val = 0; + skcd_buf.is_data = 1; + } + + skcd_buf.classid = classid; + WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ } #else /* CONFIG_SOCK_CGROUP_DATA */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4c3ffab81ba7..a8ba1ea0ea5a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -578,4 +578,45 @@ static inline int cgroup_init(void) { return 0; } #endif /* !CONFIG_CGROUPS */ +/* + * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data + * definition in cgroup-defs.h. + */ +#ifdef CONFIG_SOCK_CGROUP_DATA + +#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) +extern spinlock_t cgroup_sk_update_lock; +#endif + +void cgroup_sk_alloc_disable(void); +void cgroup_sk_alloc(struct sock_cgroup_data *skcd); +void cgroup_sk_free(struct sock_cgroup_data *skcd); + +static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) +{ +#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) + unsigned long v; + + /* + * @skcd->val is 64bit but the following is safe on 32bit too as we + * just need the lower ulong to be written and read atomically. + */ + v = READ_ONCE(skcd->val); + + if (v & 1) + return &cgrp_dfl_root.cgrp; + + return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp; +#else + return (struct cgroup *)(unsigned long)skcd->val; +#endif +} + +#else /* CONFIG_CGROUP_DATA */ + +static inline void cgroup_sk_alloc(struct sock_cgroup_data *skcd) {} +static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} + +#endif /* CONFIG_CGROUP_DATA */ + #endif /* _LINUX_CGROUP_H */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3db5e8f5b702..4f8f7927b422 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -57,8 +57,8 @@ #include /* TODO: replace with more sophisticated array */ #include #include - #include +#include /* * pidlists linger the following amount before being destroyed. The goal @@ -5782,6 +5782,59 @@ struct cgroup *cgroup_get_from_path(const char *path) } EXPORT_SYMBOL_GPL(cgroup_get_from_path); +/* + * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data + * definition in cgroup-defs.h. + */ +#ifdef CONFIG_SOCK_CGROUP_DATA + +#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) + +spinlock_t cgroup_sk_update_lock; +static bool cgroup_sk_alloc_disabled __read_mostly; + +void cgroup_sk_alloc_disable(void) +{ + if (cgroup_sk_alloc_disabled) + return; + pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n"); + cgroup_sk_alloc_disabled = true; +} + +#else + +#define cgroup_sk_alloc_disabled false + +#endif + +void cgroup_sk_alloc(struct sock_cgroup_data *skcd) +{ + if (cgroup_sk_alloc_disabled) + return; + + rcu_read_lock(); + + while (true) { + struct css_set *cset; + + cset = task_css_set(current); + if (likely(cgroup_tryget(cset->dfl_cgrp))) { + skcd->val = (unsigned long)cset->dfl_cgrp; + break; + } + cpu_relax(); + } + + rcu_read_unlock(); +} + +void cgroup_sk_free(struct sock_cgroup_data *skcd) +{ + cgroup_put(sock_cgroup_ptr(skcd)); +} + +#endif /* CONFIG_SOCK_CGROUP_DATA */ + #ifdef CONFIG_CGROUP_DEBUG static struct cgroup_subsys_state * debug_css_alloc(struct cgroup_subsys_state *parent_css) diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index e60ded46b3ac..04257a0e3534 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -61,9 +61,12 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n) int err; struct socket *sock = sock_from_file(file, &err); - if (sock) + if (sock) { + spin_lock(&cgroup_sk_update_lock); sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, (unsigned long)v); + spin_unlock(&cgroup_sk_update_lock); + } return 0; } @@ -98,6 +101,8 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, { struct cgroup_cls_state *cs = css_cls_state(css); + cgroup_sk_alloc_disable(); + cs->classid = (u32)value; update_classid(css, (void *)(unsigned long)cs->classid); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index de42aa7f6c77..053d60c33395 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -209,6 +209,8 @@ static ssize_t write_priomap(struct kernfs_open_file *of, if (!dev) return -ENODEV; + cgroup_sk_alloc_disable(); + rtnl_lock(); ret = netprio_set_prio(of_css(of), dev, prio); @@ -222,9 +224,12 @@ static int update_netprio(const void *v, struct file *file, unsigned n) { int err; struct socket *sock = sock_from_file(file, &err); - if (sock) + if (sock) { + spin_lock(&cgroup_sk_update_lock); sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data, (unsigned long)v); + spin_unlock(&cgroup_sk_update_lock); + } return 0; } diff --git a/net/core/sock.c b/net/core/sock.c index 947741dc43fa..1278d7b7bd9a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1363,6 +1363,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, if (!try_module_get(prot->owner)) goto out_free_sec; sk_tx_queue_clear(sk); + cgroup_sk_alloc(&sk->sk_cgrp_data); } return sk; @@ -1385,6 +1386,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) owner = prot->owner; slab = prot->slab; + cgroup_sk_free(&sk->sk_cgrp_data); security_sk_free(sk); if (slab != NULL) kmem_cache_free(slab, sk); -- cgit v1.2.3-71-gd317 From ad2c8c73d29702c3193f739390f6661f9a4ecad9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Dec 2015 12:30:46 -0500 Subject: cgroup: fix sock_cgroup_data initialization on earlier compilers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sock_cgroup_data is a struct containing an anonymous union. sock_cgroup_set_prioidx() and sock_cgroup_set_classid() were initializing a field inside the anonymous union as follows. struct sock_ccgroup_data skcd_buf = { .val = VAL }; While this is fine on more recent compilers, gcc-4.4.7 triggers the following errors. include/linux/cgroup-defs.h: In function ‘sock_cgroup_set_prioidx’: include/linux/cgroup-defs.h:619: error: unknown field ‘val’ specified in initializer include/linux/cgroup-defs.h:619: warning: missing braces around initializer include/linux/cgroup-defs.h:619: warning: (near initialization for ‘skcd_buf.’) This is because .val belongs to the anonymous union nested inside the struct but the initializer is missing the nesting. Fix it by adding an extra pair of braces. Signed-off-by: Tejun Heo Reported-by: Alaa Hleihel Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") Signed-off-by: David S. Miller --- include/linux/cgroup-defs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 9dc226345e4e..097901a68671 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -616,7 +616,7 @@ static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd) static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, u16 prioidx) { - struct sock_cgroup_data skcd_buf = { .val = READ_ONCE(skcd->val) }; + struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }}; if (sock_cgroup_prioidx(&skcd_buf) == prioidx) return; @@ -633,7 +633,7 @@ static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, u32 classid) { - struct sock_cgroup_data skcd_buf = { .val = READ_ONCE(skcd->val) }; + struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }}; if (sock_cgroup_classid(&skcd_buf) == classid) return; -- cgit v1.2.3-71-gd317 From 899077791403ff7a2d8cfaa87bd1a82d729463e2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 8 Dec 2015 16:32:27 +0100 Subject: netcp: try to reduce type confusion in descriptors The netcp driver produces tons of warnings when CONFIG_LPAE is enabled on ARM: drivers/net/ethernet/ti/netcp_core.c: In function 'netcp_tx_map_skb': drivers/net/ethernet/ti/netcp_core.c:1084:13: warning: passing argument 1 of 'set_words' from incompatible pointer type [-Wincompatible-pointer-types] This is the result of trying to pass a pointer to a dma_addr_t to a function that expects a u32 pointer to copy that into a DMA descriptor. Looking at that code in more detail to fix the warnings, I see multiple related problems: * The conversion functions are not endian-safe, as the DMA descriptors are almost certainly fixed-endian, but the CPU is not. * On 64-bit machines, passing a pointer through a u32 variable is a bug, accessing an indirect pointer as a u32 pointer even more so. * The handling of epib and psdata mixes native-endian and device-endian data. In this patch, I try to sort out the types for most accesses here, adding le32_to_cpu/cpu_to_le32 where appropriate, and passing pointers through two 32-bit words in the descriptor padding, to make it plausible that the driver does the right thing if compiled for big-endian or 64-bit systems. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp_core.c | 123 ++++++++++++++++++++--------------- include/linux/soc/ti/knav_dma.h | 22 +++---- 2 files changed, 82 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index e5e20e734f21..eb2585e777e1 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -109,69 +109,80 @@ module_param(netcp_debug_level, int, 0); MODULE_PARM_DESC(netcp_debug_level, "Netcp debug level (NETIF_MSG bits) (0=none,...,16=all)"); /* Helper functions - Get/Set */ -static void get_pkt_info(u32 *buff, u32 *buff_len, u32 *ndesc, +static void get_pkt_info(dma_addr_t *buff, u32 *buff_len, dma_addr_t *ndesc, struct knav_dma_desc *desc) { - *buff_len = desc->buff_len; - *buff = desc->buff; - *ndesc = desc->next_desc; + *buff_len = le32_to_cpu(desc->buff_len); + *buff = le32_to_cpu(desc->buff); + *ndesc = le32_to_cpu(desc->next_desc); } -static void get_pad_info(u32 *pad0, u32 *pad1, struct knav_dma_desc *desc) +static void get_pad_info(u32 *pad0, u32 *pad1, u32 *pad2, struct knav_dma_desc *desc) { - *pad0 = desc->pad[0]; - *pad1 = desc->pad[1]; + *pad0 = le32_to_cpu(desc->pad[0]); + *pad1 = le32_to_cpu(desc->pad[1]); + *pad2 = le32_to_cpu(desc->pad[2]); } -static void get_org_pkt_info(u32 *buff, u32 *buff_len, +static void get_pad_ptr(void **padptr, struct knav_dma_desc *desc) +{ + u64 pad64; + + pad64 = le32_to_cpu(desc->pad[0]) + + ((u64)le32_to_cpu(desc->pad[1]) << 32); + *padptr = (void *)(uintptr_t)pad64; +} + +static void get_org_pkt_info(dma_addr_t *buff, u32 *buff_len, struct knav_dma_desc *desc) { - *buff = desc->orig_buff; - *buff_len = desc->orig_len; + *buff = le32_to_cpu(desc->orig_buff); + *buff_len = le32_to_cpu(desc->orig_len); } -static void get_words(u32 *words, int num_words, u32 *desc) +static void get_words(dma_addr_t *words, int num_words, __le32 *desc) { int i; for (i = 0; i < num_words; i++) - words[i] = desc[i]; + words[i] = le32_to_cpu(desc[i]); } -static void set_pkt_info(u32 buff, u32 buff_len, u32 ndesc, +static void set_pkt_info(dma_addr_t buff, u32 buff_len, u32 ndesc, struct knav_dma_desc *desc) { - desc->buff_len = buff_len; - desc->buff = buff; - desc->next_desc = ndesc; + desc->buff_len = cpu_to_le32(buff_len); + desc->buff = cpu_to_le32(buff); + desc->next_desc = cpu_to_le32(ndesc); } static void set_desc_info(u32 desc_info, u32 pkt_info, struct knav_dma_desc *desc) { - desc->desc_info = desc_info; - desc->packet_info = pkt_info; + desc->desc_info = cpu_to_le32(desc_info); + desc->packet_info = cpu_to_le32(pkt_info); } -static void set_pad_info(u32 pad0, u32 pad1, struct knav_dma_desc *desc) +static void set_pad_info(u32 pad0, u32 pad1, u32 pad2, struct knav_dma_desc *desc) { - desc->pad[0] = pad0; - desc->pad[1] = pad1; + desc->pad[0] = cpu_to_le32(pad0); + desc->pad[1] = cpu_to_le32(pad1); + desc->pad[2] = cpu_to_le32(pad1); } -static void set_org_pkt_info(u32 buff, u32 buff_len, +static void set_org_pkt_info(dma_addr_t buff, u32 buff_len, struct knav_dma_desc *desc) { - desc->orig_buff = buff; - desc->orig_len = buff_len; + desc->orig_buff = cpu_to_le32(buff); + desc->orig_len = cpu_to_le32(buff_len); } -static void set_words(u32 *words, int num_words, u32 *desc) +static void set_words(u32 *words, int num_words, __le32 *desc) { int i; for (i = 0; i < num_words; i++) - desc[i] = words[i]; + desc[i] = cpu_to_le32(words[i]); } /* Read the e-fuse value as 32 bit values to be endian independent */ @@ -570,7 +581,7 @@ static void netcp_free_rx_desc_chain(struct netcp_intf *netcp, dma_addr_t dma_desc, dma_buf; unsigned int buf_len, dma_sz = sizeof(*ndesc); void *buf_ptr; - u32 tmp; + u32 pad[2]; get_words(&dma_desc, 1, &desc->next_desc); @@ -580,14 +591,15 @@ static void netcp_free_rx_desc_chain(struct netcp_intf *netcp, dev_err(netcp->ndev_dev, "failed to unmap Rx desc\n"); break; } - get_pkt_info(&dma_buf, &tmp, &dma_desc, ndesc); - get_pad_info((u32 *)&buf_ptr, &tmp, ndesc); + get_pad_ptr(&buf_ptr, ndesc); dma_unmap_page(netcp->dev, dma_buf, PAGE_SIZE, DMA_FROM_DEVICE); __free_page(buf_ptr); knav_pool_desc_put(netcp->rx_pool, desc); } - get_pad_info((u32 *)&buf_ptr, &buf_len, desc); + get_pad_info(&pad[0], &pad[1], &buf_len, desc); + buf_ptr = (void *)(uintptr_t)(pad[0] + ((u64)pad[1] << 32)); + if (buf_ptr) netcp_frag_free(buf_len <= PAGE_SIZE, buf_ptr); knav_pool_desc_put(netcp->rx_pool, desc); @@ -626,7 +638,6 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp) struct netcp_packet p_info; struct sk_buff *skb; void *org_buf_ptr; - u32 tmp; dma_desc = knav_queue_pop(netcp->rx_queue, &dma_sz); if (!dma_desc) @@ -639,7 +650,7 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp) } get_pkt_info(&dma_buff, &buf_len, &dma_desc, desc); - get_pad_info((u32 *)&org_buf_ptr, &org_buf_len, desc); + get_pad_ptr(&org_buf_ptr, desc); if (unlikely(!org_buf_ptr)) { dev_err(netcp->ndev_dev, "NULL bufptr in desc\n"); @@ -664,6 +675,7 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp) /* Fill in the page fragment list */ while (dma_desc) { struct page *page; + void *ptr; ndesc = knav_pool_desc_unmap(netcp->rx_pool, dma_desc, dma_sz); if (unlikely(!ndesc)) { @@ -672,14 +684,15 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp) } get_pkt_info(&dma_buff, &buf_len, &dma_desc, ndesc); - get_pad_info((u32 *)&page, &tmp, ndesc); + get_pad_ptr(ptr, ndesc); + page = ptr; if (likely(dma_buff && buf_len && page)) { dma_unmap_page(netcp->dev, dma_buff, PAGE_SIZE, DMA_FROM_DEVICE); } else { - dev_err(netcp->ndev_dev, "Bad Rx desc dma_buff(%p), len(%d), page(%p)\n", - (void *)dma_buff, buf_len, page); + dev_err(netcp->ndev_dev, "Bad Rx desc dma_buff(%pad), len(%d), page(%p)\n", + &dma_buff, buf_len, page); goto free_desc; } @@ -750,7 +763,6 @@ static void netcp_free_rx_buf(struct netcp_intf *netcp, int fdq) unsigned int buf_len, dma_sz; dma_addr_t dma; void *buf_ptr; - u32 tmp; /* Allocate descriptor */ while ((dma = knav_queue_pop(netcp->rx_fdq[fdq], &dma_sz))) { @@ -761,7 +773,7 @@ static void netcp_free_rx_buf(struct netcp_intf *netcp, int fdq) } get_org_pkt_info(&dma, &buf_len, desc); - get_pad_info((u32 *)&buf_ptr, &tmp, desc); + get_pad_ptr(buf_ptr, desc); if (unlikely(!dma)) { dev_err(netcp->ndev_dev, "NULL orig_buff in desc\n"); @@ -813,7 +825,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) struct page *page; dma_addr_t dma; void *bufptr; - u32 pad[2]; + u32 pad[3]; /* Allocate descriptor */ hwdesc = knav_pool_desc_get(netcp->rx_pool); @@ -830,7 +842,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); bufptr = netdev_alloc_frag(primary_buf_len); - pad[1] = primary_buf_len; + pad[2] = primary_buf_len; if (unlikely(!bufptr)) { dev_warn_ratelimited(netcp->ndev_dev, @@ -842,7 +854,8 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) if (unlikely(dma_mapping_error(netcp->dev, dma))) goto fail; - pad[0] = (u32)bufptr; + pad[0] = lower_32_bits((uintptr_t)bufptr); + pad[1] = upper_32_bits((uintptr_t)bufptr); } else { /* Allocate a secondary receive queue entry */ @@ -853,8 +866,9 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) } buf_len = PAGE_SIZE; dma = dma_map_page(netcp->dev, page, 0, buf_len, DMA_TO_DEVICE); - pad[0] = (u32)page; - pad[1] = 0; + pad[0] = lower_32_bits(dma); + pad[1] = upper_32_bits(dma); + pad[2] = 0; } desc_info = KNAV_DMA_DESC_PS_INFO_IN_DESC; @@ -864,7 +878,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) pkt_info |= (netcp->rx_queue_id & KNAV_DMA_DESC_RETQ_MASK) << KNAV_DMA_DESC_RETQ_SHIFT; set_org_pkt_info(dma, buf_len, hwdesc); - set_pad_info(pad[0], pad[1], hwdesc); + set_pad_info(pad[0], pad[1], pad[2], hwdesc); set_desc_info(desc_info, pkt_info, hwdesc); /* Push to FDQs */ @@ -935,8 +949,8 @@ static void netcp_free_tx_desc_chain(struct netcp_intf *netcp, dma_unmap_single(netcp->dev, dma_buf, buf_len, DMA_TO_DEVICE); else - dev_warn(netcp->ndev_dev, "bad Tx desc buf(%p), len(%d)\n", - (void *)dma_buf, buf_len); + dev_warn(netcp->ndev_dev, "bad Tx desc buf(%pad), len(%d)\n", + &dma_buf, buf_len); knav_pool_desc_put(netcp->tx_pool, ndesc); ndesc = NULL; @@ -953,11 +967,11 @@ static int netcp_process_tx_compl_packets(struct netcp_intf *netcp, unsigned int budget) { struct knav_dma_desc *desc; + void *ptr; struct sk_buff *skb; unsigned int dma_sz; dma_addr_t dma; int pkts = 0; - u32 tmp; while (budget--) { dma = knav_queue_pop(netcp->tx_compl_q, &dma_sz); @@ -970,7 +984,8 @@ static int netcp_process_tx_compl_packets(struct netcp_intf *netcp, continue; } - get_pad_info((u32 *)&skb, &tmp, desc); + get_pad_ptr(&ptr, desc); + skb = ptr; netcp_free_tx_desc_chain(netcp, desc, dma_sz); if (!skb) { dev_err(netcp->ndev_dev, "No skb in Tx desc\n"); @@ -1059,6 +1074,7 @@ netcp_tx_map_skb(struct sk_buff *skb, struct netcp_intf *netcp) u32 page_offset = frag->page_offset; u32 buf_len = skb_frag_size(frag); dma_addr_t desc_dma; + u32 desc_dma_32; u32 pkt_info; dma_addr = dma_map_page(dev, page, page_offset, buf_len, @@ -1075,13 +1091,13 @@ netcp_tx_map_skb(struct sk_buff *skb, struct netcp_intf *netcp) goto free_descs; } - desc_dma = knav_pool_desc_virt_to_dma(netcp->tx_pool, - (void *)ndesc); + desc_dma = knav_pool_desc_virt_to_dma(netcp->tx_pool, ndesc); pkt_info = (netcp->tx_compl_qid & KNAV_DMA_DESC_RETQ_MASK) << KNAV_DMA_DESC_RETQ_SHIFT; set_pkt_info(dma_addr, buf_len, 0, ndesc); - set_words(&desc_dma, 1, &pdesc->next_desc); + desc_dma_32 = (u32)desc_dma; + set_words(&desc_dma_32, 1, &pdesc->next_desc); pkt_len += buf_len; if (pdesc != desc) knav_pool_desc_map(netcp->tx_pool, pdesc, @@ -1173,11 +1189,14 @@ static int netcp_tx_submit_skb(struct netcp_intf *netcp, } set_words(&tmp, 1, &desc->packet_info); - set_words((u32 *)&skb, 1, &desc->pad[0]); + tmp = lower_32_bits((uintptr_t)&skb); + set_words(&tmp, 1, &desc->pad[0]); + tmp = upper_32_bits((uintptr_t)&skb); + set_words(&tmp, 1, &desc->pad[1]); if (tx_pipe->flags & SWITCH_TO_PORT_IN_TAGINFO) { tmp = tx_pipe->switch_to_port; - set_words((u32 *)&tmp, 1, &desc->tag_info); + set_words(&tmp, 1, &desc->tag_info); } /* submit packet descriptor */ diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h index dad035c16d94..343c13ac4f71 100644 --- a/include/linux/soc/ti/knav_dma.h +++ b/include/linux/soc/ti/knav_dma.h @@ -144,17 +144,17 @@ struct knav_dma_cfg { * @psdata: Protocol specific */ struct knav_dma_desc { - u32 desc_info; - u32 tag_info; - u32 packet_info; - u32 buff_len; - u32 buff; - u32 next_desc; - u32 orig_len; - u32 orig_buff; - u32 epib[KNAV_DMA_NUM_EPIB_WORDS]; - u32 psdata[KNAV_DMA_NUM_PS_WORDS]; - u32 pad[4]; + __le32 desc_info; + __le32 tag_info; + __le32 packet_info; + __le32 buff_len; + __le32 buff; + __le32 next_desc; + __le32 orig_len; + __le32 orig_buff; + __le32 epib[KNAV_DMA_NUM_EPIB_WORDS]; + __le32 psdata[KNAV_DMA_NUM_PS_WORDS]; + __le32 pad[4]; } ____cacheline_aligned; #if IS_ENABLED(CONFIG_KEYSTONE_NAVIGATOR_DMA) -- cgit v1.2.3-71-gd317 From 26a8145390b36cbe97a5bd0b9e97249f21af6aea Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 10 Dec 2015 17:12:39 +0200 Subject: net/mlx5_core: Introduce flow steering firmware commands Introduce new Flow Steering (FS) firmware commands, in-order to support the new flow steering infrastructure. Signed-off-by: Maor Gottlieb Signed-off-by: Moni Shoua Signed-off-by: Matan Barak Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 239 ++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h | 65 ++++++ drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 81 ++++++++ include/linux/mlx5/fs.h | 47 +++++ include/linux/mlx5/mlx5_ifc.h | 32 ++- 6 files changed, 455 insertions(+), 11 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h create mode 100644 include/linux/mlx5/fs.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index a0755919ccaf..be10592e0518 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o sriov.o + mad.o transobj.o vport.o sriov.o fs_cmd.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o eswitch.o \ en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \ en_txrx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c new file mode 100644 index 000000000000..5096f4f336bd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "fs_core.h" +#include "fs_cmd.h" +#include "mlx5_core.h" + +int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, + enum fs_flow_table_type type, unsigned int level, + unsigned int log_size, unsigned int *table_id) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_table_out)]; + u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(create_flow_table_in, in, opcode, + MLX5_CMD_OP_CREATE_FLOW_TABLE); + + MLX5_SET(create_flow_table_in, in, table_type, type); + MLX5_SET(create_flow_table_in, in, level, level); + MLX5_SET(create_flow_table_in, in, log_size, log_size); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); + + if (!err) + *table_id = MLX5_GET(create_flow_table_out, out, + table_id); + return err; +} + +int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(destroy_flow_table_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_TABLE); + MLX5_SET(destroy_flow_table_in, in, table_type, ft->type); + MLX5_SET(destroy_flow_table_in, in, table_id, ft->id); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, + unsigned int *group_id) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + u32 out[MLX5_ST_SZ_DW(create_flow_group_out)]; + int err; + + memset(out, 0, sizeof(out)); + + MLX5_SET(create_flow_group_in, in, opcode, + MLX5_CMD_OP_CREATE_FLOW_GROUP); + MLX5_SET(create_flow_group_in, in, table_type, ft->type); + MLX5_SET(create_flow_group_in, in, table_id, ft->id); + + err = mlx5_cmd_exec_check_status(dev, in, + inlen, out, + sizeof(out)); + if (!err) + *group_id = MLX5_GET(create_flow_group_out, out, + group_id); + + return err; +} + +int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id) +{ + u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)]; + u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(destroy_flow_group_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_GROUP); + MLX5_SET(destroy_flow_group_in, in, table_type, ft->type); + MLX5_SET(destroy_flow_group_in, in, table_id, ft->id); + MLX5_SET(destroy_flow_group_in, in, group_id, group_id); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, + int opmod, int modify_mask, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte) +{ + unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) + + fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct); + u32 out[MLX5_ST_SZ_DW(set_fte_out)]; + struct mlx5_flow_rule *dst; + void *in_flow_context; + void *in_match_value; + void *in_dests; + u32 *in; + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(dev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + MLX5_SET(set_fte_in, in, op_mod, opmod); + MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask); + MLX5_SET(set_fte_in, in, table_type, ft->type); + MLX5_SET(set_fte_in, in, table_id, ft->id); + MLX5_SET(set_fte_in, in, flow_index, fte->index); + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + MLX5_SET(flow_context, in_flow_context, group_id, group_id); + MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag); + MLX5_SET(flow_context, in_flow_context, action, fte->action); + MLX5_SET(flow_context, in_flow_context, destination_list_size, + fte->dests_size); + in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, + match_value); + memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param)); + + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + list_for_each_entry(dst, &fte->node.children, node.list) { + unsigned int id; + + MLX5_SET(dest_format_struct, in_dests, destination_type, + dst->dest_attr.type); + if (dst->dest_attr.type == + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + id = dst->dest_attr.ft->id; + else + id = dst->dest_attr.tir_num; + MLX5_SET(dest_format_struct, in_dests, destination_id, id); + in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + } + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, + sizeof(out)); + kvfree(in); + + return err; +} + +int mlx5_cmd_create_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte) +{ + return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte); +} + +int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte) +{ + int opmod; + int modify_mask; + int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive. + flow_modify_en); + if (!atomic_mod_cap) + return -ENOTSUPP; + opmod = 1; + modify_mask = 1 << + MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST; + + return mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte); +} + +int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int index) +{ + u32 out[MLX5_ST_SZ_DW(delete_fte_out)]; + u32 in[MLX5_ST_SZ_DW(delete_fte_in)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + MLX5_SET(delete_fte_in, in, table_type, ft->type); + MLX5_SET(delete_fte_in, in, table_id, ft->id); + MLX5_SET(delete_fte_in, in, flow_index, index); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h new file mode 100644 index 000000000000..f39304ede186 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MLX5_FS_CMD_ +#define _MLX5_FS_CMD_ + +int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, + enum fs_flow_table_type type, unsigned int level, + unsigned int log_size, unsigned int *table_id); + +int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft); + +int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, unsigned int *group_id); + +int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id); + +int mlx5_cmd_create_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte); + +int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte); + +int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int index); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h new file mode 100644 index 000000000000..e8b34a9b147b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MLX5_FS_CORE_ +#define _MLX5_FS_CORE_ + +#include + +enum fs_node_type { + FS_TYPE_NAMESPACE, + FS_TYPE_PRIO, + FS_TYPE_FLOW_TABLE, + FS_TYPE_FLOW_GROUP, + FS_TYPE_FLOW_ENTRY, + FS_TYPE_FLOW_DEST +}; + +enum fs_flow_table_type { + FS_FT_NIC_RX = 0x0, +}; + +enum fs_fte_status { + FS_FTE_STATUS_EXISTING = 1UL << 0, +}; + +struct fs_node { + struct list_head list; + struct list_head children; + enum fs_node_type type; +}; + +struct mlx5_flow_rule { + struct fs_node node; + struct mlx5_flow_destination dest_attr; +}; + +struct mlx5_flow_table { + struct fs_node node; + u32 id; + enum fs_flow_table_type type; +}; + +struct fs_fte { + struct fs_node node; + u32 val[MLX5_ST_SZ_DW(fte_match_param)]; + u32 dests_size; + u32 flow_tag; + u32 index; + u32 action; +}; + +#endif diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h new file mode 100644 index 000000000000..34fd8dc0b3e1 --- /dev/null +++ b/include/linux/mlx5/fs.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MLX5_FS_ +#define _MLX5_FS_ + +#include + +struct mlx5_flow_table; + +struct mlx5_flow_destination { + enum mlx5_flow_destination_type type; + union { + u32 tir_num; + struct mlx5_flow_table *ft; + }; +}; +#endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f5d94495758a..131a2737cfa3 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -256,25 +256,27 @@ struct mlx5_ifc_flow_table_fields_supported_bits { struct mlx5_ifc_flow_table_prop_layout_bits { u8 ft_support[0x1]; - u8 reserved_0[0x1f]; + u8 reserved_0[0x2]; + u8 flow_modify_en[0x1]; + u8 reserved_1[0x1c]; - u8 reserved_1[0x2]; + u8 reserved_2[0x2]; u8 log_max_ft_size[0x6]; - u8 reserved_2[0x10]; + u8 reserved_3[0x10]; u8 max_ft_level[0x8]; - u8 reserved_3[0x20]; + u8 reserved_4[0x20]; - u8 reserved_4[0x18]; + u8 reserved_5[0x18]; u8 log_max_ft_num[0x8]; - u8 reserved_5[0x18]; + u8 reserved_6[0x18]; u8 log_max_destination[0x8]; - u8 reserved_6[0x18]; + u8 reserved_7[0x18]; u8 log_max_flow[0x8]; - u8 reserved_7[0x40]; + u8 reserved_8[0x40]; struct mlx5_ifc_flow_table_fields_supported_bits ft_field_support; @@ -2843,6 +2845,13 @@ struct mlx5_ifc_set_hca_cap_in_bits { union mlx5_ifc_hca_cap_union_bits capability; }; +enum { + MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION = 0x0, + MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_TAG = 0x1, + MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST = 0x2, + MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS = 0x3 +}; + struct mlx5_ifc_set_fte_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; @@ -2867,11 +2876,14 @@ struct mlx5_ifc_set_fte_in_bits { u8 reserved_4[0x8]; u8 table_id[0x18]; - u8 reserved_5[0x40]; + u8 reserved_5[0x18]; + u8 modify_enable_mask[0x8]; + + u8 reserved_6[0x20]; u8 flow_index[0x20]; - u8 reserved_6[0xe0]; + u8 reserved_7[0xe0]; struct mlx5_ifc_flow_context_bits flow_context; }; -- cgit v1.2.3-71-gd317 From 2530236303d9e705db6a28eb9a10c8d79b288b37 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 10 Dec 2015 17:12:43 +0200 Subject: net/mlx5_core: Flow steering tree initialization Flow steering initialization is based on static tree which illustrates the flow steering tree when the driver is loaded. The initialization considers the max supported flow table level of the device, a minimum of 2 kernel flow tables(vlan and mac) are required to have kernel flow table functionality. The tree structures when the driver is loaded: root_namespace(receive nic) | priority-0 (kernel priority) | namespace(kernel namespace) | priority-0 (flow tables priority) In the following patches, When the EN driver will use the flow steering API, it create two flow tables and their flow groups under priority-0(flow tables priority). Signed-off-by: Maor Gottlieb Signed-off-by: Moni Shoua Signed-off-by: Matan Barak Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 374 ++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 4 + include/linux/mlx5/driver.h | 2 + include/linux/mlx5/fs.h | 8 + 4 files changed, 388 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 1828351102c5..4264e8b34b76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -37,6 +37,54 @@ #include "fs_core.h" #include "fs_cmd.h" +#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ + sizeof(struct init_tree_node)) + +#define INIT_PRIO(min_level_val, max_ft_val,\ + start_level_val, ...) {.type = FS_TYPE_PRIO,\ + .min_ft_level = min_level_val,\ + .start_level = start_level_val,\ + .max_ft = max_ft_val,\ + .children = (struct init_tree_node[]) {__VA_ARGS__},\ + .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ +} + +#define ADD_PRIO(min_level_val, max_ft_val, start_level_val, ...)\ + INIT_PRIO(min_level_val, max_ft_val, start_level_val,\ + __VA_ARGS__)\ + +#define ADD_FT_PRIO(max_ft_val, start_level_val, ...)\ + INIT_PRIO(0, max_ft_val, start_level_val,\ + __VA_ARGS__)\ + +#define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\ + .children = (struct init_tree_node[]) {__VA_ARGS__},\ + .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ +} + +#define KERNEL_START_LEVEL 0 +#define KERNEL_P0_START_LEVEL KERNEL_START_LEVEL +#define KERNEL_MAX_FT 2 +#define KENREL_MIN_LEVEL 2 +static struct init_tree_node { + enum fs_node_type type; + struct init_tree_node *children; + int ar_size; + int min_ft_level; + int prio; + int max_ft; + int start_level; +} root_fs = { + .type = FS_TYPE_NAMESPACE, + .ar_size = 1, + .children = (struct init_tree_node[]) { + ADD_PRIO(KENREL_MIN_LEVEL, KERNEL_MAX_FT, + KERNEL_START_LEVEL, + ADD_NS(ADD_FT_PRIO(KERNEL_MAX_FT, + KERNEL_P0_START_LEVEL))), + } +}; + static void del_rule(struct fs_node *node); static void del_flow_table(struct fs_node *node); static void del_flow_group(struct fs_node *node); @@ -671,3 +719,329 @@ static void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n", fg->id); } + +static struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type) +{ + struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; + int prio; + static struct fs_prio *fs_prio; + struct mlx5_flow_namespace *ns; + + if (!root_ns) + return NULL; + + switch (type) { + case MLX5_FLOW_NAMESPACE_KERNEL: + prio = 0; + break; + case MLX5_FLOW_NAMESPACE_FDB: + if (dev->priv.fdb_root_ns) + return &dev->priv.fdb_root_ns->ns; + else + return NULL; + default: + return NULL; + } + + fs_prio = find_prio(&root_ns->ns, prio); + if (!fs_prio) + return NULL; + + ns = list_first_entry(&fs_prio->node.children, + typeof(*ns), + node.list); + + return ns; +} + +static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, + unsigned prio, int max_ft, + int start_level) +{ + struct fs_prio *fs_prio; + + fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL); + if (!fs_prio) + return ERR_PTR(-ENOMEM); + + fs_prio->node.type = FS_TYPE_PRIO; + tree_init_node(&fs_prio->node, 1, NULL); + tree_add_node(&fs_prio->node, &ns->node); + fs_prio->max_ft = max_ft; + fs_prio->prio = prio; + fs_prio->start_level = start_level; + list_add_tail(&fs_prio->node.list, &ns->node.children); + + return fs_prio; +} + +static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace + *ns) +{ + ns->node.type = FS_TYPE_NAMESPACE; + + return ns; +} + +static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) +{ + struct mlx5_flow_namespace *ns; + + ns = kzalloc(sizeof(*ns), GFP_KERNEL); + if (!ns) + return ERR_PTR(-ENOMEM); + + fs_init_namespace(ns); + tree_init_node(&ns->node, 1, NULL); + tree_add_node(&ns->node, &prio->node); + list_add_tail(&ns->node.list, &prio->node.children); + + return ns; +} + +static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *init_node, + struct fs_node *fs_parent_node, + struct init_tree_node *init_parent_node, + int index) +{ + struct mlx5_flow_namespace *fs_ns; + struct fs_prio *fs_prio; + struct fs_node *base; + int i; + int err; + + if (init_node->type == FS_TYPE_PRIO) { + if (init_node->min_ft_level > max_ft_level) + return -ENOTSUPP; + + fs_get_obj(fs_ns, fs_parent_node); + fs_prio = fs_create_prio(fs_ns, index, init_node->max_ft, + init_node->start_level); + if (IS_ERR(fs_prio)) + return PTR_ERR(fs_prio); + base = &fs_prio->node; + } else if (init_node->type == FS_TYPE_NAMESPACE) { + fs_get_obj(fs_prio, fs_parent_node); + fs_ns = fs_create_namespace(fs_prio); + if (IS_ERR(fs_ns)) + return PTR_ERR(fs_ns); + base = &fs_ns->node; + } else { + return -EINVAL; + } + for (i = 0; i < init_node->ar_size; i++) { + err = init_root_tree_recursive(max_ft_level, + &init_node->children[i], base, + init_node, i); + if (err) + return err; + } + + return 0; +} + +static int init_root_tree(int max_ft_level, struct init_tree_node *init_node, + struct fs_node *fs_parent_node) +{ + int i; + struct mlx5_flow_namespace *fs_ns; + int err; + + fs_get_obj(fs_ns, fs_parent_node); + for (i = 0; i < init_node->ar_size; i++) { + err = init_root_tree_recursive(max_ft_level, + &init_node->children[i], + &fs_ns->node, + init_node, i); + if (err) + return err; + } + return 0; +} + +static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev, + enum fs_flow_table_type + table_type) +{ + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_namespace *ns; + + /* create the root namespace */ + root_ns = mlx5_vzalloc(sizeof(*root_ns)); + if (!root_ns) + return NULL; + + root_ns->dev = dev; + root_ns->table_type = table_type; + + ns = &root_ns->ns; + fs_init_namespace(ns); + tree_init_node(&ns->node, 1, NULL); + tree_add_node(&ns->node, NULL); + + return root_ns; +} + +static int init_root_ns(struct mlx5_core_dev *dev) +{ + int max_ft_level = MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive. + max_ft_level); + + dev->priv.root_ns = create_root_ns(dev, FS_FT_NIC_RX); + if (IS_ERR_OR_NULL(dev->priv.root_ns)) + goto cleanup; + + if (init_root_tree(max_ft_level, &root_fs, &dev->priv.root_ns->ns.node)) + goto cleanup; + + return 0; + +cleanup: + mlx5_cleanup_fs(dev); + return -ENOMEM; +} + +static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev, + struct mlx5_flow_root_namespace *root_ns) +{ + struct fs_node *prio; + + if (!root_ns) + return; + + if (!list_empty(&root_ns->ns.node.children)) { + prio = list_first_entry(&root_ns->ns.node.children, + struct fs_node, + list); + if (tree_remove_node(prio)) + mlx5_core_warn(dev, + "Flow steering priority wasn't destroyed, refcount > 1\n"); + } + if (tree_remove_node(&root_ns->ns.node)) + mlx5_core_warn(dev, + "Flow steering namespace wasn't destroyed, refcount > 1\n"); + root_ns = NULL; +} + +static void cleanup_root_ns(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; + struct fs_prio *iter_prio; + + if (!MLX5_CAP_GEN(dev, nic_flow_table)) + return; + + if (!root_ns) + return; + + /* stage 1 */ + fs_for_each_prio(iter_prio, &root_ns->ns) { + struct fs_node *node; + struct mlx5_flow_namespace *iter_ns; + + fs_for_each_ns_or_ft(node, iter_prio) { + if (node->type == FS_TYPE_FLOW_TABLE) + continue; + fs_get_obj(iter_ns, node); + while (!list_empty(&iter_ns->node.children)) { + struct fs_prio *obj_iter_prio2; + struct fs_node *iter_prio2 = + list_first_entry(&iter_ns->node.children, + struct fs_node, + list); + + fs_get_obj(obj_iter_prio2, iter_prio2); + if (tree_remove_node(iter_prio2)) { + mlx5_core_warn(dev, + "Priority %d wasn't destroyed, refcount > 1\n", + obj_iter_prio2->prio); + return; + } + } + } + } + + /* stage 2 */ + fs_for_each_prio(iter_prio, &root_ns->ns) { + while (!list_empty(&iter_prio->node.children)) { + struct fs_node *iter_ns = + list_first_entry(&iter_prio->node.children, + struct fs_node, + list); + if (tree_remove_node(iter_ns)) { + mlx5_core_warn(dev, + "Namespace wasn't destroyed, refcount > 1\n"); + return; + } + } + } + + /* stage 3 */ + while (!list_empty(&root_ns->ns.node.children)) { + struct fs_prio *obj_prio_node; + struct fs_node *prio_node = + list_first_entry(&root_ns->ns.node.children, + struct fs_node, + list); + + fs_get_obj(obj_prio_node, prio_node); + if (tree_remove_node(prio_node)) { + mlx5_core_warn(dev, + "Priority %d wasn't destroyed, refcount > 1\n", + obj_prio_node->prio); + return; + } + } + + if (tree_remove_node(&root_ns->ns.node)) { + mlx5_core_warn(dev, + "root namespace wasn't destroyed, refcount > 1\n"); + return; + } + + dev->priv.root_ns = NULL; +} + +void mlx5_cleanup_fs(struct mlx5_core_dev *dev) +{ + cleanup_root_ns(dev); + cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); +} + +static int init_fdb_root_ns(struct mlx5_core_dev *dev) +{ + struct fs_prio *prio; + + dev->priv.fdb_root_ns = create_root_ns(dev, FS_FT_FDB); + if (!dev->priv.fdb_root_ns) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1, 0); + if (IS_ERR(prio)) { + cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); + return PTR_ERR(prio); + } else { + return 0; + } +} + +int mlx5_init_fs(struct mlx5_core_dev *dev) +{ + int err = 0; + + if (MLX5_CAP_GEN(dev, nic_flow_table)) { + err = init_root_ns(dev); + if (err) + return err; + } + if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { + err = init_fdb_root_ns(dev); + if (err) + cleanup_root_ns(dev); + } + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 6c27b8ef42b7..4ebb97fd5544 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -46,6 +46,7 @@ enum fs_node_type { enum fs_flow_table_type { FS_FT_NIC_RX = 0x0, + FS_FT_FDB = 0X4, }; enum fs_fte_status { @@ -125,6 +126,9 @@ struct mlx5_flow_root_namespace { struct mlx5_core_dev *dev; }; +int mlx5_init_fs(struct mlx5_core_dev *dev); +void mlx5_cleanup_fs(struct mlx5_core_dev *dev); + #define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); } #define fs_list_for_each_entry(pos, root) \ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ac098b6b97bf..2fd7019f69db 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -502,6 +502,8 @@ struct mlx5_priv { struct mlx5_eswitch *eswitch; struct mlx5_core_sriov sriov; unsigned long pci_dev_data; + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_root_namespace *fdb_root_ns; }; enum mlx5_device_state { diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 34fd8dc0b3e1..16ae5233dc7b 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -35,6 +35,13 @@ #include +#define MLX5_FS_DEFAULT_FLOW_TAG 0x0 + +enum mlx5_flow_namespace_type { + MLX5_FLOW_NAMESPACE_KERNEL, + MLX5_FLOW_NAMESPACE_FDB, +}; + struct mlx5_flow_table; struct mlx5_flow_destination { @@ -42,6 +49,7 @@ struct mlx5_flow_destination { union { u32 tir_num; struct mlx5_flow_table *ft; + u32 vport_num; }; }; #endif -- cgit v1.2.3-71-gd317 From 86d722ad2c3bd2f0536b196b7fd67ae2a7e2a492 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 10 Dec 2015 17:12:44 +0200 Subject: net/mlx5: Use flow steering infrastructure for mlx5_en Expose the new flow steering API and remove the old one. Few changes are required: 1. The Ethernet flow steering follows the existing implementation, but uses the new steering API. The old flow steering implementation is removed. 2. Move the E-switch FDB management to use the new API. 3. When driver is loaded call to mlx5_init_fs which initialize the flow steering tree structure, open namespaces for NIC receive and for E-switch FDB. 4. Call to mlx5_cleanup_fs when the driver is unloaded. Signed-off-by: Maor Gottlieb Signed-off-by: Moni Shoua Signed-off-by: Matan Barak Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 23 +- .../ethernet/mellanox/mlx5/core/en_flow_table.c | 824 +++++++++++++-------- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 291 ++------ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 15 +- .../net/ethernet/mellanox/mlx5/core/flow_table.c | 422 ----------- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 26 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 9 + include/linux/mlx5/flow_table.h | 63 -- include/linux/mlx5/fs.h | 38 + 11 files changed, 633 insertions(+), 1082 deletions(-) delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/flow_table.c delete mode 100644 include/linux/mlx5/flow_table.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 7fc5e2388dec..11ee062965c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -3,6 +3,6 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o eswitch.o \ +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \ en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \ en_txrx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 89313d46952d..f689ce580b44 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -64,6 +64,8 @@ #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ #define MLX5E_SQ_BF_BUDGET 16 +#define MLX5E_NUM_MAIN_GROUPS 9 + static const char vport_strings[][ETH_GSTRING_LEN] = { /* vport statistics */ "rx_packets", @@ -442,7 +444,7 @@ enum mlx5e_rqt_ix { struct mlx5e_eth_addr_info { u8 addr[ETH_ALEN + 2]; u32 tt_vec; - u32 ft_ix[MLX5E_NUM_TT]; /* flow table index per traffic type */ + struct mlx5_flow_rule *ft_rule[MLX5E_NUM_TT]; }; #define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE) @@ -466,15 +468,22 @@ enum { struct mlx5e_vlan_db { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; - u32 active_vlans_ft_ix[VLAN_N_VID]; - u32 untagged_rule_ft_ix; - u32 any_vlan_rule_ft_ix; + struct mlx5_flow_rule *active_vlans_rule[VLAN_N_VID]; + struct mlx5_flow_rule *untagged_rule; + struct mlx5_flow_rule *any_vlan_rule; bool filter_disabled; }; struct mlx5e_flow_table { - void *vlan; - void *main; + int num_groups; + struct mlx5_flow_table *t; + struct mlx5_flow_group **g; +}; + +struct mlx5e_flow_tables { + struct mlx5_flow_namespace *ns; + struct mlx5e_flow_table vlan; + struct mlx5e_flow_table main; }; struct mlx5e_priv { @@ -497,7 +506,7 @@ struct mlx5e_priv { u32 rqtn[MLX5E_NUM_RQT]; u32 tirn[MLX5E_NUM_TT]; - struct mlx5e_flow_table ft; + struct mlx5e_flow_tables fts; struct mlx5e_eth_addr_db eth_addr; struct mlx5e_vlan_db vlan; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c index 5b93c9c6e341..80d81abc4820 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c @@ -34,9 +34,11 @@ #include #include #include -#include +#include #include "en.h" +#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) + enum { MLX5E_FULLMATCH = 0, MLX5E_ALLMULTI = 1, @@ -103,44 +105,38 @@ static void mlx5e_del_eth_addr_from_hash(struct mlx5e_eth_addr_hash_node *hn) static void mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv, struct mlx5e_eth_addr_info *ai) { - void *ft = priv->ft.main; - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) - mlx5_del_flow_table_entry(ft, - ai->ft_ix[MLX5E_TT_IPV6_IPSEC_ESP]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]); if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) - mlx5_del_flow_table_entry(ft, - ai->ft_ix[MLX5E_TT_IPV4_IPSEC_ESP]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]); if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) - mlx5_del_flow_table_entry(ft, - ai->ft_ix[MLX5E_TT_IPV6_IPSEC_AH]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]); if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) - mlx5_del_flow_table_entry(ft, - ai->ft_ix[MLX5E_TT_IPV4_IPSEC_AH]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]); if (ai->tt_vec & BIT(MLX5E_TT_IPV6_TCP)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_TCP]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_TCP]); if (ai->tt_vec & BIT(MLX5E_TT_IPV4_TCP)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_TCP]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_TCP]); if (ai->tt_vec & BIT(MLX5E_TT_IPV6_UDP)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_UDP]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_UDP]); if (ai->tt_vec & BIT(MLX5E_TT_IPV4_UDP)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_UDP]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_UDP]); if (ai->tt_vec & BIT(MLX5E_TT_IPV6)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6]); if (ai->tt_vec & BIT(MLX5E_TT_IPV4)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4]); if (ai->tt_vec & BIT(MLX5E_TT_ANY)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_ANY]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_ANY]); } static int mlx5e_get_eth_addr_type(u8 *addr) @@ -240,44 +236,34 @@ static u32 mlx5e_get_tt_vec(struct mlx5e_eth_addr_info *ai, int type) } static int __mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_info *ai, int type, - void *flow_context, void *match_criteria) + struct mlx5e_eth_addr_info *ai, + int type, u32 *mc, u32 *mv) { + struct mlx5_flow_destination dest; u8 match_criteria_enable = 0; - void *match_value; - void *dest; - u8 *dmac; - u8 *match_criteria_dmac; - void *ft = priv->ft.main; - u32 *tirn = priv->tirn; - u32 *ft_ix; - u32 tt_vec; - int err; - - match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); - dmac = MLX5_ADDR_OF(fte_match_param, match_value, - outer_headers.dmac_47_16); - match_criteria_dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, - outer_headers.dmac_47_16); - dest = MLX5_ADDR_OF(flow_context, flow_context, destination); - - MLX5_SET(flow_context, flow_context, action, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); - MLX5_SET(flow_context, flow_context, destination_list_size, 1); - MLX5_SET(dest_format_struct, dest, destination_type, - MLX5_FLOW_CONTEXT_DEST_TYPE_TIR); + struct mlx5_flow_rule **rule_p; + struct mlx5_flow_table *ft = priv->fts.main.t; + u8 *mc_dmac = MLX5_ADDR_OF(fte_match_param, mc, + outer_headers.dmac_47_16); + u8 *mv_dmac = MLX5_ADDR_OF(fte_match_param, mv, + outer_headers.dmac_47_16); + u32 *tirn = priv->tirn; + u32 tt_vec; + int err = 0; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; switch (type) { case MLX5E_FULLMATCH: match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - memset(match_criteria_dmac, 0xff, ETH_ALEN); - ether_addr_copy(dmac, ai->addr); + eth_broadcast_addr(mc_dmac); + ether_addr_copy(mv_dmac, ai->addr); break; case MLX5E_ALLMULTI: match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - match_criteria_dmac[0] = 0x01; - dmac[0] = 0x01; + mc_dmac[0] = 0x01; + mv_dmac[0] = 0x01; break; case MLX5E_PROMISC: @@ -286,190 +272,165 @@ static int __mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, tt_vec = mlx5e_get_tt_vec(ai, type); - ft_ix = &ai->ft_ix[MLX5E_TT_ANY]; if (tt_vec & BIT(MLX5E_TT_ANY)) { - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_ANY]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) + rule_p = &ai->ft_rule[MLX5E_TT_ANY]; + dest.tir_num = tirn[MLX5E_TT_ANY]; + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_ANY); } match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - ft_ix = &ai->ft_ix[MLX5E_TT_IPV4]; if (tt_vec & BIT(MLX5E_TT_IPV4)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + rule_p = &ai->ft_rule[MLX5E_TT_IPV4]; + dest.tir_num = tirn[MLX5E_TT_IPV4]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETH_P_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4); } - ft_ix = &ai->ft_ix[MLX5E_TT_IPV6]; if (tt_vec & BIT(MLX5E_TT_IPV6)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + rule_p = &ai->ft_rule[MLX5E_TT_IPV6]; + dest.tir_num = tirn[MLX5E_TT_IPV6]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETH_P_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV6); } - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - outer_headers.ip_protocol); - MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, - IPPROTO_UDP); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_UDP); - ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_UDP]; if (tt_vec & BIT(MLX5E_TT_IPV4_UDP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + rule_p = &ai->ft_rule[MLX5E_TT_IPV4_UDP]; + dest.tir_num = tirn[MLX5E_TT_IPV4_UDP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETH_P_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4_UDP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4_UDP); } - ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_UDP]; if (tt_vec & BIT(MLX5E_TT_IPV6_UDP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + rule_p = &ai->ft_rule[MLX5E_TT_IPV6_UDP]; + dest.tir_num = tirn[MLX5E_TT_IPV6_UDP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETH_P_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6_UDP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV6_UDP); } - MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, - IPPROTO_TCP); + MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_TCP); - ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_TCP]; if (tt_vec & BIT(MLX5E_TT_IPV4_TCP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + rule_p = &ai->ft_rule[MLX5E_TT_IPV4_TCP]; + dest.tir_num = tirn[MLX5E_TT_IPV4_TCP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETH_P_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4_TCP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4_TCP); } - ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_TCP]; if (tt_vec & BIT(MLX5E_TT_IPV6_TCP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + rule_p = &ai->ft_rule[MLX5E_TT_IPV6_TCP]; + dest.tir_num = tirn[MLX5E_TT_IPV6_TCP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETH_P_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6_TCP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; ai->tt_vec |= BIT(MLX5E_TT_IPV6_TCP); } - MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, - IPPROTO_AH); + MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_AH); - ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_IPSEC_AH]; if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]; + dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_AH]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETH_P_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4_IPSEC_AH]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_AH); } - ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_IPSEC_AH]; if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]; + dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_AH]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETH_P_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6_IPSEC_AH]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_AH); } - MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, - IPPROTO_ESP); + MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_ESP); - ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_IPSEC_ESP]; if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]; + dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_ESP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETH_P_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4_IPSEC_ESP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_ESP); } - ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_IPSEC_ESP]; if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]; + dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_ESP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETH_P_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6_IPSEC_ESP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_ESP); } return 0; err_del_ai: + err = PTR_ERR(*rule_p); + *rule_p = NULL; mlx5e_del_eth_addr_from_flow_table(priv, ai); return err; @@ -478,27 +439,25 @@ err_del_ai: static int mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, struct mlx5e_eth_addr_info *ai, int type) { - u32 *flow_context; u32 *match_criteria; - int err; + u32 *match_value; + int err = 0; - flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + - MLX5_ST_SZ_BYTES(dest_format_struct)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!flow_context || !match_criteria) { + match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!match_value || !match_criteria) { netdev_err(priv->netdev, "%s: alloc failed\n", __func__); err = -ENOMEM; goto add_eth_addr_rule_out; } - err = __mlx5e_add_eth_addr_rule(priv, ai, type, flow_context, - match_criteria); - if (err) - netdev_err(priv->netdev, "%s: failed\n", __func__); + err = __mlx5e_add_eth_addr_rule(priv, ai, type, match_criteria, + match_value); add_eth_addr_rule_out: kvfree(match_criteria); - kvfree(flow_context); + kvfree(match_value); + return err; } @@ -551,72 +510,77 @@ enum mlx5e_vlan_rule_type { MLX5E_VLAN_RULE_TYPE_MATCH_VID, }; -static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, - enum mlx5e_vlan_rule_type rule_type, u16 vid) +static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, + u16 vid, u32 *mc, u32 *mv) { + struct mlx5_flow_table *ft = priv->fts.vlan.t; + struct mlx5_flow_destination dest; u8 match_criteria_enable = 0; - u32 *flow_context; - void *match_value; - void *dest; - u32 *match_criteria; - u32 *ft_ix; - int err; + struct mlx5_flow_rule **rule_p; + int err = 0; - flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + - MLX5_ST_SZ_BYTES(dest_format_struct)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!flow_context || !match_criteria) { - netdev_err(priv->netdev, "%s: alloc failed\n", __func__); - err = -ENOMEM; - goto add_vlan_rule_out; - } - match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); - dest = MLX5_ADDR_OF(flow_context, flow_context, destination); - - MLX5_SET(flow_context, flow_context, action, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); - MLX5_SET(flow_context, flow_context, destination_list_size, 1); - MLX5_SET(dest_format_struct, dest, destination_type, - MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE); - MLX5_SET(dest_format_struct, dest, destination_id, - mlx5_get_flow_table_id(priv->ft.main)); + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fts.main.t; match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: - ft_ix = &priv->vlan.untagged_rule_ft_ix; + rule_p = &priv->vlan.untagged_rule; break; case MLX5E_VLAN_RULE_TYPE_ANY_VID: - ft_ix = &priv->vlan.any_vlan_rule_ft_ix; - MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag, - 1); + rule_p = &priv->vlan.any_vlan_rule; + MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1); break; default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ - err = mlx5e_vport_context_update_vlans(priv); - if (err) - goto add_vlan_rule_out; - - ft_ix = &priv->vlan.active_vlans_ft_ix[vid]; - MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag, - 1); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - outer_headers.first_vid); - MLX5_SET(fte_match_param, match_value, outer_headers.first_vid, - vid); + rule_p = &priv->vlan.active_vlans_rule[vid]; + MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); + MLX5_SET(fte_match_param, mv, outer_headers.first_vid, vid); break; } - err = mlx5_add_flow_table_entry(priv->ft.vlan, match_criteria_enable, - match_criteria, flow_context, ft_ix); - if (err) - netdev_err(priv->netdev, "%s: failed\n", __func__); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + &dest); + + if (IS_ERR(*rule_p)) { + err = PTR_ERR(*rule_p); + *rule_p = NULL; + netdev_err(priv->netdev, "%s: add rule failed\n", __func__); + } + + return err; +} + +static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + u32 *match_criteria; + u32 *match_value; + int err = 0; + + match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!match_value || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto add_vlan_rule_out; + } + + if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_VID) + mlx5e_vport_context_update_vlans(priv); + + err = __mlx5e_add_vlan_rule(priv, rule_type, vid, match_criteria, + match_value); add_vlan_rule_out: kvfree(match_criteria); - kvfree(flow_context); + kvfree(match_value); + return err; } @@ -625,16 +589,23 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, { switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: - mlx5_del_flow_table_entry(priv->ft.vlan, - priv->vlan.untagged_rule_ft_ix); + if (priv->vlan.untagged_rule) { + mlx5_del_flow_rule(priv->vlan.untagged_rule); + priv->vlan.untagged_rule = NULL; + } break; case MLX5E_VLAN_RULE_TYPE_ANY_VID: - mlx5_del_flow_table_entry(priv->ft.vlan, - priv->vlan.any_vlan_rule_ft_ix); + if (priv->vlan.any_vlan_rule) { + mlx5_del_flow_rule(priv->vlan.any_vlan_rule); + priv->vlan.any_vlan_rule = NULL; + } break; case MLX5E_VLAN_RULE_TYPE_MATCH_VID: - mlx5_del_flow_table_entry(priv->ft.vlan, - priv->vlan.active_vlans_ft_ix[vid]); + mlx5e_vport_context_update_vlans(priv); + if (priv->vlan.active_vlans_rule[vid]) { + mlx5_del_flow_rule(priv->vlan.active_vlans_rule[vid]); + priv->vlan.active_vlans_rule[vid] = NULL; + } mlx5e_vport_context_update_vlans(priv); break; } @@ -889,151 +860,358 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) mlx5e_vport_context_update(priv); } +static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft) +{ + int i; + + for (i = ft->num_groups - 1; i >= 0; i--) { + if (!IS_ERR_OR_NULL(ft->g[i])) + mlx5_destroy_flow_group(ft->g[i]); + ft->g[i] = NULL; + } + ft->num_groups = 0; +} + void mlx5e_init_eth_addr(struct mlx5e_priv *priv) { ether_addr_copy(priv->eth_addr.broadcast.addr, priv->netdev->broadcast); } -static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv) +#define MLX5E_MAIN_GROUP0_SIZE BIT(3) +#define MLX5E_MAIN_GROUP1_SIZE BIT(1) +#define MLX5E_MAIN_GROUP2_SIZE BIT(0) +#define MLX5E_MAIN_GROUP3_SIZE BIT(14) +#define MLX5E_MAIN_GROUP4_SIZE BIT(13) +#define MLX5E_MAIN_GROUP5_SIZE BIT(11) +#define MLX5E_MAIN_GROUP6_SIZE BIT(2) +#define MLX5E_MAIN_GROUP7_SIZE BIT(1) +#define MLX5E_MAIN_GROUP8_SIZE BIT(0) +#define MLX5E_MAIN_TABLE_SIZE (MLX5E_MAIN_GROUP0_SIZE +\ + MLX5E_MAIN_GROUP1_SIZE +\ + MLX5E_MAIN_GROUP2_SIZE +\ + MLX5E_MAIN_GROUP3_SIZE +\ + MLX5E_MAIN_GROUP4_SIZE +\ + MLX5E_MAIN_GROUP5_SIZE +\ + MLX5E_MAIN_GROUP6_SIZE +\ + MLX5E_MAIN_GROUP7_SIZE +\ + MLX5E_MAIN_GROUP8_SIZE) + +static int __mlx5e_create_main_groups(struct mlx5e_flow_table *ft, u32 *in, + int inlen) { - struct mlx5_flow_table_group *g; - u8 *dmac; + u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + u8 *dmac = MLX5_ADDR_OF(create_flow_group_in, in, + match_criteria.outer_headers.dmac_47_16); + int err; + int ix = 0; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP0_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + eth_broadcast_addr(dmac); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + eth_broadcast_addr(dmac); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP4_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + eth_broadcast_addr(dmac); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP5_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + dmac[0] = 0x01; + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP6_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + dmac[0] = 0x01; + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP7_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + dmac[0] = 0x01; + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP8_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + return 0; + +err_destroy_groups: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + mlx5e_destroy_groups(ft); + + return err; +} - g = kcalloc(9, sizeof(*g), GFP_KERNEL); - if (!g) +static int mlx5e_create_main_groups(struct mlx5e_flow_table *ft) +{ + u32 *in; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int err; + + in = mlx5_vzalloc(inlen); + if (!in) return -ENOMEM; - g[0].log_sz = 3; - g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, - outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, - outer_headers.ip_protocol); - - g[1].log_sz = 1; - g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria, - outer_headers.ethertype); - - g[2].log_sz = 0; - - g[3].log_sz = 14; - g[3].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[3].match_criteria, - outer_headers.dmac_47_16); - memset(dmac, 0xff, ETH_ALEN); - MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria, - outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria, - outer_headers.ip_protocol); - - g[4].log_sz = 13; - g[4].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[4].match_criteria, - outer_headers.dmac_47_16); - memset(dmac, 0xff, ETH_ALEN); - MLX5_SET_TO_ONES(fte_match_param, g[4].match_criteria, - outer_headers.ethertype); - - g[5].log_sz = 11; - g[5].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[5].match_criteria, - outer_headers.dmac_47_16); - memset(dmac, 0xff, ETH_ALEN); - - g[6].log_sz = 2; - g[6].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[6].match_criteria, - outer_headers.dmac_47_16); - dmac[0] = 0x01; - MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria, - outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria, - outer_headers.ip_protocol); - - g[7].log_sz = 1; - g[7].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[7].match_criteria, - outer_headers.dmac_47_16); - dmac[0] = 0x01; - MLX5_SET_TO_ONES(fte_match_param, g[7].match_criteria, - outer_headers.ethertype); + err = __mlx5e_create_main_groups(ft, in, inlen); - g[8].log_sz = 0; - g[8].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[8].match_criteria, - outer_headers.dmac_47_16); - dmac[0] = 0x01; - priv->ft.main = mlx5_create_flow_table(priv->mdev, 1, - MLX5_FLOW_TABLE_TYPE_NIC_RCV, - 9, g); - kfree(g); + kvfree(in); + return err; +} - return priv->ft.main ? 0 : -ENOMEM; +static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv) +{ + struct mlx5e_flow_table *ft = &priv->fts.main; + int err; + + ft->num_groups = 0; + ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_MAIN_TABLE_SIZE); + + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + ft->g = kcalloc(MLX5E_NUM_MAIN_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) { + err = -ENOMEM; + goto err_destroy_main_flow_table; + } + + err = mlx5e_create_main_groups(ft); + if (err) + goto err_free_g; + return 0; + +err_free_g: + kfree(ft->g); + +err_destroy_main_flow_table: + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; + + return err; +} + +static void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) +{ + mlx5e_destroy_groups(ft); + kfree(ft->g); + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; } static void mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv) { - mlx5_destroy_flow_table(priv->ft.main); + mlx5e_destroy_flow_table(&priv->fts.main); } -static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) +#define MLX5E_NUM_VLAN_GROUPS 2 +#define MLX5E_VLAN_GROUP0_SIZE BIT(12) +#define MLX5E_VLAN_GROUP1_SIZE BIT(1) +#define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\ + MLX5E_VLAN_GROUP1_SIZE) + +static int __mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft, u32 *in, + int inlen) +{ + int err; + int ix = 0; + u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP0_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + return 0; + +err_destroy_groups: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + mlx5e_destroy_groups(ft); + + return err; +} + +static int mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft) { - struct mlx5_flow_table_group *g; + u32 *in; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int err; - g = kcalloc(2, sizeof(*g), GFP_KERNEL); - if (!g) + in = mlx5_vzalloc(inlen); + if (!in) return -ENOMEM; - g[0].log_sz = 12; - g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, - outer_headers.vlan_tag); - MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, - outer_headers.first_vid); - - /* untagged + any vlan id */ - g[1].log_sz = 1; - g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria, - outer_headers.vlan_tag); - - priv->ft.vlan = mlx5_create_flow_table(priv->mdev, 0, - MLX5_FLOW_TABLE_TYPE_NIC_RCV, - 2, g); - - kfree(g); - return priv->ft.vlan ? 0 : -ENOMEM; + err = __mlx5e_create_vlan_groups(ft, in, inlen); + + kvfree(in); + return err; +} + +static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) +{ + struct mlx5e_flow_table *ft = &priv->fts.vlan; + int err; + + ft->num_groups = 0; + ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_VLAN_TABLE_SIZE); + + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) { + err = -ENOMEM; + goto err_destroy_vlan_flow_table; + } + + err = mlx5e_create_vlan_groups(ft); + if (err) + goto err_free_g; + + return 0; + +err_free_g: + kfree(ft->g); + +err_destroy_vlan_flow_table: + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; + + return err; } static void mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv) { - mlx5_destroy_flow_table(priv->ft.vlan); + mlx5e_destroy_flow_table(&priv->fts.vlan); } int mlx5e_create_flow_tables(struct mlx5e_priv *priv) { int err; - err = mlx5e_create_main_flow_table(priv); + priv->fts.ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + + if (!priv->fts.ns) + return -EINVAL; + + err = mlx5e_create_vlan_flow_table(priv); if (err) return err; - err = mlx5e_create_vlan_flow_table(priv); + err = mlx5e_create_main_flow_table(priv); if (err) - goto err_destroy_main_flow_table; + goto err_destroy_vlan_flow_table; err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); if (err) - goto err_destroy_vlan_flow_table; + goto err_destroy_main_flow_table; return 0; -err_destroy_vlan_flow_table: - mlx5e_destroy_vlan_flow_table(priv); - err_destroy_main_flow_table: mlx5e_destroy_main_flow_table(priv); +err_destroy_vlan_flow_table: + mlx5e_destroy_vlan_flow_table(priv); return err; } @@ -1041,6 +1219,6 @@ err_destroy_main_flow_table: void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv) { mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - mlx5e_destroy_vlan_flow_table(priv); mlx5e_destroy_main_flow_table(priv); + mlx5e_destroy_vlan_flow_table(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a20be56df553..d4601a564699 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -30,7 +30,7 @@ * SOFTWARE. */ -#include +#include #include "en.h" #include "eswitch.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d8939e597c54..bc3d9f8a75c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include "mlx5_core.h" #include "eswitch.h" @@ -321,220 +321,6 @@ static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) free_l2_table_index(l2_table, index); } -/* E-Switch FDB flow steering */ -struct dest_node { - struct list_head list; - struct mlx5_flow_destination dest; -}; - -static int _mlx5_flow_rule_apply(struct mlx5_flow_rule *fr) -{ - bool was_valid = fr->valid; - struct dest_node *dest_n; - u32 dest_list_size = 0; - void *in_match_value; - u32 *flow_context; - u32 flow_index; - int err; - int i; - - if (list_empty(&fr->dest_list)) { - if (fr->valid) - mlx5_del_flow_table_entry(fr->ft, fr->fi); - fr->valid = false; - return 0; - } - - list_for_each_entry(dest_n, &fr->dest_list, list) - dest_list_size++; - - flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + - MLX5_ST_SZ_BYTES(dest_format_struct) * - dest_list_size); - if (!flow_context) - return -ENOMEM; - - MLX5_SET(flow_context, flow_context, flow_tag, fr->flow_tag); - MLX5_SET(flow_context, flow_context, action, fr->action); - MLX5_SET(flow_context, flow_context, destination_list_size, - dest_list_size); - - i = 0; - list_for_each_entry(dest_n, &fr->dest_list, list) { - void *dest_addr = MLX5_ADDR_OF(flow_context, flow_context, - destination[i++]); - - MLX5_SET(dest_format_struct, dest_addr, destination_type, - dest_n->dest.type); - MLX5_SET(dest_format_struct, dest_addr, destination_id, - dest_n->dest.vport_num); - } - - in_match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); - memcpy(in_match_value, fr->match_value, MLX5_ST_SZ_BYTES(fte_match_param)); - - err = mlx5_add_flow_table_entry(fr->ft, fr->match_criteria_enable, - fr->match_criteria, flow_context, - &flow_index); - if (!err) { - if (was_valid) - mlx5_del_flow_table_entry(fr->ft, fr->fi); - fr->fi = flow_index; - fr->valid = true; - } - kfree(flow_context); - return err; -} - -static int mlx5_flow_rule_add_dest(struct mlx5_flow_rule *fr, - struct mlx5_flow_destination *new_dest) -{ - struct dest_node *dest_n; - int err; - - dest_n = kzalloc(sizeof(*dest_n), GFP_KERNEL); - if (!dest_n) - return -ENOMEM; - - memcpy(&dest_n->dest, new_dest, sizeof(dest_n->dest)); - mutex_lock(&fr->mutex); - list_add(&dest_n->list, &fr->dest_list); - err = _mlx5_flow_rule_apply(fr); - if (err) { - list_del(&dest_n->list); - kfree(dest_n); - } - mutex_unlock(&fr->mutex); - return err; -} - -static int mlx5_flow_rule_del_dest(struct mlx5_flow_rule *fr, - struct mlx5_flow_destination *dest) -{ - struct dest_node *dest_n; - struct dest_node *n; - int err; - - mutex_lock(&fr->mutex); - list_for_each_entry_safe(dest_n, n, &fr->dest_list, list) { - if (dest->vport_num == dest_n->dest.vport_num) - goto found; - } - mutex_unlock(&fr->mutex); - return -ENOENT; - -found: - list_del(&dest_n->list); - err = _mlx5_flow_rule_apply(fr); - mutex_unlock(&fr->mutex); - kfree(dest_n); - - return err; -} - -static struct mlx5_flow_rule *find_fr(struct mlx5_eswitch *esw, - u8 match_criteria_enable, - u32 *match_value) -{ - struct hlist_head *hash = esw->mc_table; - struct esw_mc_addr *esw_mc; - u8 *dmac_v; - - dmac_v = MLX5_ADDR_OF(fte_match_param, match_value, - outer_headers.dmac_47_16); - - /* UNICAST FULL MATCH */ - if (!is_multicast_ether_addr(dmac_v)) - return NULL; - - /* MULTICAST FULL MATCH */ - esw_mc = l2addr_hash_find(hash, dmac_v, struct esw_mc_addr); - - return esw_mc ? esw_mc->uplink_rule : NULL; -} - -static struct mlx5_flow_rule *alloc_fr(void *ft, - u8 match_criteria_enable, - u32 *match_criteria, - u32 *match_value, - u32 action, - u32 flow_tag) -{ - struct mlx5_flow_rule *fr = kzalloc(sizeof(*fr), GFP_KERNEL); - - if (!fr) - return NULL; - - fr->match_criteria = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - fr->match_value = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - if (!fr->match_criteria || !fr->match_value) { - kfree(fr->match_criteria); - kfree(fr->match_value); - kfree(fr); - return NULL; - } - - memcpy(fr->match_criteria, match_criteria, MLX5_ST_SZ_BYTES(fte_match_param)); - memcpy(fr->match_value, match_value, MLX5_ST_SZ_BYTES(fte_match_param)); - fr->match_criteria_enable = match_criteria_enable; - fr->flow_tag = flow_tag; - fr->action = action; - - mutex_init(&fr->mutex); - INIT_LIST_HEAD(&fr->dest_list); - atomic_set(&fr->refcount, 0); - fr->ft = ft; - return fr; -} - -static void deref_fr(struct mlx5_flow_rule *fr) -{ - if (!atomic_dec_and_test(&fr->refcount)) - return; - - kfree(fr->match_criteria); - kfree(fr->match_value); - kfree(fr); -} - -static struct mlx5_flow_rule * -mlx5_add_flow_rule(struct mlx5_eswitch *esw, - u8 match_criteria_enable, - u32 *match_criteria, - u32 *match_value, - u32 action, - u32 flow_tag, - struct mlx5_flow_destination *dest) -{ - struct mlx5_flow_rule *fr; - int err; - - fr = find_fr(esw, match_criteria_enable, match_value); - fr = fr ? fr : alloc_fr(esw->fdb_table.fdb, match_criteria_enable, match_criteria, - match_value, action, flow_tag); - if (!fr) - return NULL; - - atomic_inc(&fr->refcount); - - err = mlx5_flow_rule_add_dest(fr, dest); - if (err) { - deref_fr(fr); - return NULL; - } - - return fr; -} - -static void mlx5_del_flow_rule(struct mlx5_flow_rule *fr, u32 vport) -{ - struct mlx5_flow_destination dest; - - dest.vport_num = vport; - mlx5_flow_rule_del_dest(fr, &dest); - deref_fr(fr); -} - /* E-Switch FDB */ static struct mlx5_flow_rule * esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) @@ -569,7 +355,7 @@ esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", dmac_v, dmac_c, vport); flow_rule = - mlx5_add_flow_rule(esw, + mlx5_add_flow_rule(esw->fdb_table.fdb, match_header, match_c, match_v, @@ -589,33 +375,61 @@ out: static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) { + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_table_group g; + struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb; + struct mlx5_flow_group *g; + void *match_criteria; + int table_size; + u32 *flow_group_in; u8 *dmac; + int err = 0; esw_debug(dev, "Create FDB log_max_size(%d)\n", MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - memset(&g, 0, sizeof(g)); - /* UC MC Full match rules*/ - g.log_sz = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); - g.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g.match_criteria, - outer_headers.dmac_47_16); - /* Match criteria mask */ - memset(dmac, 0xff, 6); + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -ENOMEM; + } - fdb = mlx5_create_flow_table(dev, 0, - MLX5_FLOW_TABLE_TYPE_ESWITCH, - 1, &g); - if (fdb) - esw_debug(dev, "ESW: FDB Table created fdb->id %d\n", mlx5_get_flow_table_id(fdb)); - else - esw_warn(dev, "ESW: Failed to create FDB Table\n"); + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return -ENOMEM; + memset(flow_group_in, 0, inlen); + + table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + fdb = mlx5_create_flow_table(root_ns, 0, table_size); + if (IS_ERR_OR_NULL(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create FDB Table err %d\n", err); + goto out; + } + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); + eth_broadcast_addr(dmac); + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR_OR_NULL(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create flow group err(%d)\n", err); + goto out; + } + + esw->fdb_table.addr_grp = g; esw->fdb_table.fdb = fdb; - return fdb ? 0 : -ENOMEM; +out: + kfree(flow_group_in); + if (err && !IS_ERR_OR_NULL(fdb)) + mlx5_destroy_flow_table(fdb); + return err; } static void esw_destroy_fdb_table(struct mlx5_eswitch *esw) @@ -623,10 +437,11 @@ static void esw_destroy_fdb_table(struct mlx5_eswitch *esw) if (!esw->fdb_table.fdb) return; - esw_debug(esw->dev, "Destroy FDB Table fdb(%d)\n", - mlx5_get_flow_table_id(esw->fdb_table.fdb)); + esw_debug(esw->dev, "Destroy FDB Table\n"); + mlx5_destroy_flow_group(esw->fdb_table.addr_grp); mlx5_destroy_flow_table(esw->fdb_table.fdb); esw->fdb_table.fdb = NULL; + esw->fdb_table.addr_grp = NULL; } /* E-Switch vport UC/MC lists management */ @@ -689,7 +504,7 @@ static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) del_l2_table_entry(esw->dev, esw_uc->table_index); if (vaddr->flow_rule) - mlx5_del_flow_rule(vaddr->flow_rule, vport); + mlx5_del_flow_rule(vaddr->flow_rule); vaddr->flow_rule = NULL; l2addr_hash_del(esw_uc); @@ -750,14 +565,14 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) esw_mc->uplink_rule); if (vaddr->flow_rule) - mlx5_del_flow_rule(vaddr->flow_rule, vport); + mlx5_del_flow_rule(vaddr->flow_rule); vaddr->flow_rule = NULL; if (--esw_mc->refcnt) return 0; if (esw_mc->uplink_rule) - mlx5_del_flow_rule(esw_mc->uplink_rule, UPLINK_VPORT); + mlx5_del_flow_rule(esw_mc->uplink_rule); l2addr_hash_del(esw_mc); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 02ff3eade026..3416a428f70f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -88,20 +88,6 @@ struct l2addr_node { kfree(ptr); \ }) -struct mlx5_flow_rule { - void *ft; - u32 fi; - u8 match_criteria_enable; - u32 *match_criteria; - u32 *match_value; - u32 action; - u32 flow_tag; - bool valid; - atomic_t refcount; - struct mutex mutex; /* protect flow rule updates */ - struct list_head dest_list; -}; - struct mlx5_vport { struct mlx5_core_dev *dev; int vport; @@ -126,6 +112,7 @@ struct mlx5_l2_table { struct mlx5_eswitch_fdb { void *fdb; + struct mlx5_flow_group *addr_grp; }; struct mlx5_eswitch { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c deleted file mode 100644 index ca90b9bc3b95..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c +++ /dev/null @@ -1,422 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include "mlx5_core.h" - -struct mlx5_ftg { - struct mlx5_flow_table_group g; - u32 id; - u32 start_ix; -}; - -struct mlx5_flow_table { - struct mlx5_core_dev *dev; - u8 level; - u8 type; - u32 id; - struct mutex mutex; /* sync bitmap alloc */ - u16 num_groups; - struct mlx5_ftg *group; - unsigned long *bitmap; - u32 size; -}; - -static int mlx5_set_flow_entry_cmd(struct mlx5_flow_table *ft, u32 group_ix, - u32 flow_index, void *flow_context) -{ - u32 out[MLX5_ST_SZ_DW(set_fte_out)]; - u32 *in; - void *in_flow_context; - int fcdls = - MLX5_GET(flow_context, flow_context, destination_list_size) * - MLX5_ST_SZ_BYTES(dest_format_struct); - int inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fcdls; - int err; - - in = mlx5_vzalloc(inlen); - if (!in) { - mlx5_core_warn(ft->dev, "failed to allocate inbox\n"); - return -ENOMEM; - } - - MLX5_SET(set_fte_in, in, table_type, ft->type); - MLX5_SET(set_fte_in, in, table_id, ft->id); - MLX5_SET(set_fte_in, in, flow_index, flow_index); - MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); - - in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); - memcpy(in_flow_context, flow_context, - MLX5_ST_SZ_BYTES(flow_context) + fcdls); - - MLX5_SET(flow_context, in_flow_context, group_id, - ft->group[group_ix].id); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(ft->dev, in, inlen, out, - sizeof(out)); - kvfree(in); - - return err; -} - -static void mlx5_del_flow_entry_cmd(struct mlx5_flow_table *ft, u32 flow_index) -{ - u32 in[MLX5_ST_SZ_DW(delete_fte_in)]; - u32 out[MLX5_ST_SZ_DW(delete_fte_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - -#define MLX5_SET_DFTEI(p, x, v) MLX5_SET(delete_fte_in, p, x, v) - MLX5_SET_DFTEI(in, table_type, ft->type); - MLX5_SET_DFTEI(in, table_id, ft->id); - MLX5_SET_DFTEI(in, flow_index, flow_index); - MLX5_SET_DFTEI(in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); - - mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); -} - -static void mlx5_destroy_flow_group_cmd(struct mlx5_flow_table *ft, int i) -{ - u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - -#define MLX5_SET_DFGI(p, x, v) MLX5_SET(destroy_flow_group_in, p, x, v) - MLX5_SET_DFGI(in, table_type, ft->type); - MLX5_SET_DFGI(in, table_id, ft->id); - MLX5_SET_DFGI(in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP); - MLX5_SET_DFGI(in, group_id, ft->group[i].id); - mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); -} - -static int mlx5_create_flow_group_cmd(struct mlx5_flow_table *ft, int i) -{ - u32 out[MLX5_ST_SZ_DW(create_flow_group_out)]; - u32 *in; - void *in_match_criteria; - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5_flow_table_group *g = &ft->group[i].g; - u32 start_ix = ft->group[i].start_ix; - u32 end_ix = start_ix + (1 << g->log_sz) - 1; - int err; - - in = mlx5_vzalloc(inlen); - if (!in) { - mlx5_core_warn(ft->dev, "failed to allocate inbox\n"); - return -ENOMEM; - } - in_match_criteria = MLX5_ADDR_OF(create_flow_group_in, in, - match_criteria); - - memset(out, 0, sizeof(out)); - -#define MLX5_SET_CFGI(p, x, v) MLX5_SET(create_flow_group_in, p, x, v) - MLX5_SET_CFGI(in, table_type, ft->type); - MLX5_SET_CFGI(in, table_id, ft->id); - MLX5_SET_CFGI(in, opcode, MLX5_CMD_OP_CREATE_FLOW_GROUP); - MLX5_SET_CFGI(in, start_flow_index, start_ix); - MLX5_SET_CFGI(in, end_flow_index, end_ix); - MLX5_SET_CFGI(in, match_criteria_enable, g->match_criteria_enable); - - memcpy(in_match_criteria, g->match_criteria, - MLX5_ST_SZ_BYTES(fte_match_param)); - - err = mlx5_cmd_exec_check_status(ft->dev, in, inlen, out, - sizeof(out)); - if (!err) - ft->group[i].id = MLX5_GET(create_flow_group_out, out, - group_id); - - kvfree(in); - - return err; -} - -static void mlx5_destroy_flow_table_groups(struct mlx5_flow_table *ft) -{ - int i; - - for (i = 0; i < ft->num_groups; i++) - mlx5_destroy_flow_group_cmd(ft, i); -} - -static int mlx5_create_flow_table_groups(struct mlx5_flow_table *ft) -{ - int err; - int i; - - for (i = 0; i < ft->num_groups; i++) { - err = mlx5_create_flow_group_cmd(ft, i); - if (err) - goto err_destroy_flow_table_groups; - } - - return 0; - -err_destroy_flow_table_groups: - for (i--; i >= 0; i--) - mlx5_destroy_flow_group_cmd(ft, i); - - return err; -} - -static int mlx5_create_flow_table_cmd(struct mlx5_flow_table *ft) -{ - u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]; - u32 out[MLX5_ST_SZ_DW(create_flow_table_out)]; - int err; - - memset(in, 0, sizeof(in)); - - MLX5_SET(create_flow_table_in, in, table_type, ft->type); - MLX5_SET(create_flow_table_in, in, level, ft->level); - MLX5_SET(create_flow_table_in, in, log_size, order_base_2(ft->size)); - - MLX5_SET(create_flow_table_in, in, opcode, - MLX5_CMD_OP_CREATE_FLOW_TABLE); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, - sizeof(out)); - if (err) - return err; - - ft->id = MLX5_GET(create_flow_table_out, out, table_id); - - return 0; -} - -static void mlx5_destroy_flow_table_cmd(struct mlx5_flow_table *ft) -{ - u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - -#define MLX5_SET_DFTI(p, x, v) MLX5_SET(destroy_flow_table_in, p, x, v) - MLX5_SET_DFTI(in, table_type, ft->type); - MLX5_SET_DFTI(in, table_id, ft->id); - MLX5_SET_DFTI(in, opcode, MLX5_CMD_OP_DESTROY_FLOW_TABLE); - - mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); -} - -static int mlx5_find_group(struct mlx5_flow_table *ft, u8 match_criteria_enable, - u32 *match_criteria, int *group_ix) -{ - void *mc_outer = MLX5_ADDR_OF(fte_match_param, match_criteria, - outer_headers); - void *mc_misc = MLX5_ADDR_OF(fte_match_param, match_criteria, - misc_parameters); - void *mc_inner = MLX5_ADDR_OF(fte_match_param, match_criteria, - inner_headers); - int mc_outer_sz = MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4); - int mc_misc_sz = MLX5_ST_SZ_BYTES(fte_match_set_misc); - int mc_inner_sz = MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4); - int i; - - for (i = 0; i < ft->num_groups; i++) { - struct mlx5_flow_table_group *g = &ft->group[i].g; - void *gmc_outer = MLX5_ADDR_OF(fte_match_param, - g->match_criteria, - outer_headers); - void *gmc_misc = MLX5_ADDR_OF(fte_match_param, - g->match_criteria, - misc_parameters); - void *gmc_inner = MLX5_ADDR_OF(fte_match_param, - g->match_criteria, - inner_headers); - - if (g->match_criteria_enable != match_criteria_enable) - continue; - - if (match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) - if (memcmp(mc_outer, gmc_outer, mc_outer_sz)) - continue; - - if (match_criteria_enable & MLX5_MATCH_MISC_PARAMETERS) - if (memcmp(mc_misc, gmc_misc, mc_misc_sz)) - continue; - - if (match_criteria_enable & MLX5_MATCH_INNER_HEADERS) - if (memcmp(mc_inner, gmc_inner, mc_inner_sz)) - continue; - - *group_ix = i; - return 0; - } - - return -EINVAL; -} - -static int alloc_flow_index(struct mlx5_flow_table *ft, int group_ix, u32 *ix) -{ - struct mlx5_ftg *g = &ft->group[group_ix]; - int err = 0; - - mutex_lock(&ft->mutex); - - *ix = find_next_zero_bit(ft->bitmap, ft->size, g->start_ix); - if (*ix >= (g->start_ix + (1 << g->g.log_sz))) - err = -ENOSPC; - else - __set_bit(*ix, ft->bitmap); - - mutex_unlock(&ft->mutex); - - return err; -} - -static void mlx5_free_flow_index(struct mlx5_flow_table *ft, u32 ix) -{ - __clear_bit(ix, ft->bitmap); -} - -int mlx5_add_flow_table_entry(void *flow_table, u8 match_criteria_enable, - void *match_criteria, void *flow_context, - u32 *flow_index) -{ - struct mlx5_flow_table *ft = flow_table; - int group_ix; - int err; - - err = mlx5_find_group(ft, match_criteria_enable, match_criteria, - &group_ix); - if (err) { - mlx5_core_warn(ft->dev, "mlx5_find_group failed\n"); - return err; - } - - err = alloc_flow_index(ft, group_ix, flow_index); - if (err) { - mlx5_core_warn(ft->dev, "alloc_flow_index failed\n"); - return err; - } - - return mlx5_set_flow_entry_cmd(ft, group_ix, *flow_index, flow_context); -} -EXPORT_SYMBOL(mlx5_add_flow_table_entry); - -void mlx5_del_flow_table_entry(void *flow_table, u32 flow_index) -{ - struct mlx5_flow_table *ft = flow_table; - - mlx5_del_flow_entry_cmd(ft, flow_index); - mlx5_free_flow_index(ft, flow_index); -} -EXPORT_SYMBOL(mlx5_del_flow_table_entry); - -void *mlx5_create_flow_table(struct mlx5_core_dev *dev, u8 level, u8 table_type, - u16 num_groups, - struct mlx5_flow_table_group *group) -{ - struct mlx5_flow_table *ft; - u32 start_ix = 0; - u32 ft_size = 0; - void *gr; - void *bm; - int err; - int i; - - for (i = 0; i < num_groups; i++) - ft_size += (1 << group[i].log_sz); - - ft = kzalloc(sizeof(*ft), GFP_KERNEL); - gr = kcalloc(num_groups, sizeof(struct mlx5_ftg), GFP_KERNEL); - bm = kcalloc(BITS_TO_LONGS(ft_size), sizeof(uintptr_t), GFP_KERNEL); - if (!ft || !gr || !bm) - goto err_free_ft; - - ft->group = gr; - ft->bitmap = bm; - ft->num_groups = num_groups; - ft->level = level; - ft->type = table_type; - ft->size = ft_size; - ft->dev = dev; - mutex_init(&ft->mutex); - - for (i = 0; i < ft->num_groups; i++) { - memcpy(&ft->group[i].g, &group[i], sizeof(*group)); - ft->group[i].start_ix = start_ix; - start_ix += 1 << group[i].log_sz; - } - - err = mlx5_create_flow_table_cmd(ft); - if (err) - goto err_free_ft; - - err = mlx5_create_flow_table_groups(ft); - if (err) - goto err_destroy_flow_table_cmd; - - return ft; - -err_destroy_flow_table_cmd: - mlx5_destroy_flow_table_cmd(ft); - -err_free_ft: - mlx5_core_warn(dev, "failed to alloc flow table\n"); - kfree(bm); - kfree(gr); - kfree(ft); - - return NULL; -} -EXPORT_SYMBOL(mlx5_create_flow_table); - -void mlx5_destroy_flow_table(void *flow_table) -{ - struct mlx5_flow_table *ft = flow_table; - - mlx5_destroy_flow_table_groups(ft); - mlx5_destroy_flow_table_cmd(ft); - kfree(ft->bitmap); - kfree(ft->group); - kfree(ft); -} -EXPORT_SYMBOL(mlx5_destroy_flow_table); - -u32 mlx5_get_flow_table_id(void *flow_table) -{ - struct mlx5_flow_table *ft = flow_table; - - return ft->id; -} -EXPORT_SYMBOL(mlx5_get_flow_table_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 4264e8b34b76..f7d62fe595f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -436,9 +436,9 @@ static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte, return ft; } -static struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, - int prio, - int max_fte) +struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + int prio, + int max_fte) { struct mlx5_flow_table *ft; int err; @@ -491,8 +491,8 @@ unlock_prio: return ERR_PTR(err); } -static struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, - u32 *fg_in) +struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, + u32 *fg_in) { struct mlx5_flow_group *fg; struct mlx5_core_dev *dev = get_dev(&ft->node); @@ -669,7 +669,7 @@ unlock_fg: return rule; } -static struct mlx5_flow_rule * +struct mlx5_flow_rule * mlx5_add_flow_rule(struct mlx5_flow_table *ft, u8 match_criteria_enable, u32 *match_criteria, @@ -699,12 +699,12 @@ put: return rule; } -static void mlx5_del_flow_rule(struct mlx5_flow_rule *rule) +void mlx5_del_flow_rule(struct mlx5_flow_rule *rule) { tree_remove_node(&rule->node); } -static int mlx5_destroy_flow_table(struct mlx5_flow_table *ft) +int mlx5_destroy_flow_table(struct mlx5_flow_table *ft) { if (tree_remove_node(&ft->node)) mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n", @@ -713,15 +713,15 @@ static int mlx5_destroy_flow_table(struct mlx5_flow_table *ft) return 0; } -static void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) +void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) { if (tree_remove_node(&fg->node)) mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n", fg->id); } -static struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, - enum mlx5_flow_namespace_type type) +struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type) { struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; int prio; @@ -867,7 +867,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_namespace *ns; - /* create the root namespace */ + /* Create the root namespace */ root_ns = mlx5_vzalloc(sizeof(*root_ns)); if (!root_ns) return NULL; @@ -1018,7 +1018,7 @@ static int init_fdb_root_ns(struct mlx5_core_dev *dev) if (!dev->priv.fdb_root_ns) return -ENOMEM; - /* create 1 prio*/ + /* Create single prio */ prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1, 0); if (IS_ERR(prio)) { cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c6de3240f76f..789882b7b711 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -49,6 +49,7 @@ #include #include #include "mlx5_core.h" +#include "fs_core.h" #ifdef CONFIG_MLX5_CORE_EN #include "eswitch.h" #endif @@ -1055,6 +1056,11 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_srq_table(dev); mlx5_init_mr_table(dev); + err = mlx5_init_fs(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init flow steering\n"); + goto err_fs; + } #ifdef CONFIG_MLX5_CORE_EN err = mlx5_eswitch_init(dev); if (err) { @@ -1093,6 +1099,8 @@ err_sriov: mlx5_eswitch_cleanup(dev->priv.eswitch); #endif err_reg_dev: + mlx5_cleanup_fs(dev); +err_fs: mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); @@ -1165,6 +1173,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_eswitch_cleanup(dev->priv.eswitch); #endif + mlx5_cleanup_fs(dev); mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); diff --git a/include/linux/mlx5/flow_table.h b/include/linux/mlx5/flow_table.h deleted file mode 100644 index 0f2a15cf3317..000000000000 --- a/include/linux/mlx5/flow_table.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef MLX5_FLOW_TABLE_H -#define MLX5_FLOW_TABLE_H - -#include - -struct mlx5_flow_table_group { - u8 log_sz; - u8 match_criteria_enable; - u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; -}; - -struct mlx5_flow_destination { - enum mlx5_flow_destination_type type; - union { - u32 tir_num; - void *ft; - u32 vport_num; - }; -}; - -void *mlx5_create_flow_table(struct mlx5_core_dev *dev, u8 level, u8 table_type, - u16 num_groups, - struct mlx5_flow_table_group *group); -void mlx5_destroy_flow_table(void *flow_table); -int mlx5_add_flow_table_entry(void *flow_table, u8 match_criteria_enable, - void *match_criteria, void *flow_context, - u32 *flow_index); -void mlx5_del_flow_table_entry(void *flow_table, u32 flow_index); -u32 mlx5_get_flow_table_id(void *flow_table); - -#endif /* MLX5_FLOW_TABLE_H */ diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 16ae5233dc7b..bc7ad019afde 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -33,6 +33,7 @@ #ifndef _MLX5_FS_ #define _MLX5_FS_ +#include #include #define MLX5_FS_DEFAULT_FLOW_TAG 0x0 @@ -43,6 +44,9 @@ enum mlx5_flow_namespace_type { }; struct mlx5_flow_table; +struct mlx5_flow_group; +struct mlx5_flow_rule; +struct mlx5_flow_namespace; struct mlx5_flow_destination { enum mlx5_flow_destination_type type; @@ -52,4 +56,38 @@ struct mlx5_flow_destination { u32 vport_num; }; }; + +struct mlx5_flow_namespace * +mlx5_get_flow_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type); + +struct mlx5_flow_table * +mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + int prio, + int num_flow_table_entries); +int mlx5_destroy_flow_table(struct mlx5_flow_table *ft); + +/* inbox should be set with the following values: + * start_flow_index + * end_flow_index + * match_criteria_enable + * match_criteria + */ +struct mlx5_flow_group * +mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *in); +void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); + +/* Single destination per rule. + * Group ID is implied by the match criteria. + */ +struct mlx5_flow_rule * +mlx5_add_flow_rule(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest); +void mlx5_del_flow_rule(struct mlx5_flow_rule *fr); + #endif -- cgit v1.2.3-71-gd317 From bda13fed677bdb423b97dcf054f68b9eb4c6dbfb Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Sun, 13 Dec 2015 16:53:02 +0900 Subject: net: Fix typo in skb_fclone_busy This patch fix a typo found within comment of skb_fclone_busy. Signed-off-by: Masanari Iida Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9b9b9ead7bb3..af4f6ac025b6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -833,7 +833,7 @@ struct sk_buff_fclones { * skb_fclone_busy - check if fclone is busy * @skb: buffer * - * Returns true is skb is a fast clone, and its clone is not freed. + * Returns true if skb is a fast clone, and its clone is not freed. * Some drivers call skb_orphan() in their ndo_start_xmit(), * so we also check that this didnt happen. */ -- cgit v1.2.3-71-gd317 From 55dc5a9f2f2afd32d7b1bda44a5fc95e67a3371f Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:40 -0800 Subject: net: Add skb_inner_transport_offset function Same thing as skb_transport_offset but returns the offset of the inner transport header (when skb->encpasulation is set). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index af4f6ac025b6..2393373c9d08 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1939,6 +1939,11 @@ static inline unsigned char *skb_inner_transport_header(const struct sk_buff return skb->head + skb->inner_transport_header; } +static inline int skb_inner_transport_offset(const struct sk_buff *skb) +{ + return skb_inner_transport_header(skb) - skb->data; +} + static inline void skb_reset_inner_transport_header(struct sk_buff *skb) { skb->inner_transport_header = skb->data - skb->head; -- cgit v1.2.3-71-gd317 From 53692b1de419c1b59106909c7f6b4dd3dbc768ac Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:41 -0800 Subject: sctp: Rename NETIF_F_SCTP_CSUM to NETIF_F_SCTP_CRC The SCTP checksum is really a CRC and is very different from the standards 1's complement checksum that serves as the checksum for IP protocols. This offload interface is also very different. Rename NETIF_F_SCTP_CSUM to NETIF_F_SCTP_CRC to highlight these differences. The term CSUM should be reserved in the stack to refer to the standard 1's complement IP checksum. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 2 +- drivers/net/ethernet/intel/igb/igb_main.c | 4 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++-- drivers/net/loopback.c | 2 +- include/linux/netdev_features.h | 4 ++-- net/8021q/vlan_dev.c | 2 +- net/core/ethtool.c | 4 ++-- net/netfilter/ipvs/ip_vs_proto_sctp.c | 2 +- net/sctp/output.c | 2 +- 10 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b118deb08ce6..a63d980f478e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8842,7 +8842,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) netdev->features = NETIF_F_SG | NETIF_F_IP_CSUM | - NETIF_F_SCTP_CSUM | + NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 455394cf7f80..4d05ff6f0423 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -2321,7 +2321,7 @@ int i40evf_process_config(struct i40evf_adapter *adapter) netdev->features |= NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_IP_CSUM | - NETIF_F_SCTP_CSUM | + NETIF_F_SCTP_CRC | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 7afde455326d..31e5f3942839 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2379,8 +2379,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } if (hw->mac.type >= e1000_82576) { - netdev->hw_features |= NETIF_F_SCTP_CSUM; - netdev->features |= NETIF_F_SCTP_CSUM; + netdev->hw_features |= NETIF_F_SCTP_CRC; + netdev->features |= NETIF_F_SCTP_CRC; } netdev->priv_flags |= IFF_UNICAST_FLT; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 66c64a376719..9f27001cac1f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8995,8 +8995,8 @@ skip_sriov: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: - netdev->features |= NETIF_F_SCTP_CSUM; - netdev->hw_features |= NETIF_F_SCTP_CSUM | + netdev->features |= NETIF_F_SCTP_CRC; + netdev->hw_features |= NETIF_F_SCTP_CRC | NETIF_F_NTUPLE; break; default: diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index dc7d970bd1c0..a400288cb37b 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -175,7 +175,7 @@ static void loopback_setup(struct net_device *dev) | NETIF_F_UFO | NETIF_F_HW_CSUM | NETIF_F_RXCSUM - | NETIF_F_SCTP_CSUM + | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index f0d87347df19..6395f8309393 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -52,7 +52,7 @@ enum { NETIF_F_GSO_TUNNEL_REMCSUM_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ - NETIF_F_SCTP_CSUM_BIT, /* SCTP checksum offload */ + NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ NETIF_F_FCOE_MTU_BIT, /* Supports max FCoE MTU, 2158 bytes*/ NETIF_F_NTUPLE_BIT, /* N-tuple filters supported */ NETIF_F_RXHASH_BIT, /* Receive hashing offload */ @@ -103,7 +103,7 @@ enum { #define NETIF_F_NTUPLE __NETIF_F(NTUPLE) #define NETIF_F_RXCSUM __NETIF_F(RXCSUM) #define NETIF_F_RXHASH __NETIF_F(RXHASH) -#define NETIF_F_SCTP_CSUM __NETIF_F(SCTP_CSUM) +#define NETIF_F_SCTP_CRC __NETIF_F(SCTP_CRC) #define NETIF_F_SG __NETIF_F(SG) #define NETIF_F_TSO6 __NETIF_F(TSO6) #define NETIF_F_TSO_ECN __NETIF_F(TSO_ECN) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 9f4bd137e045..45b74e875381 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -545,7 +545,7 @@ static int vlan_dev_init(struct net_device *dev) dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | - NETIF_F_HIGHDMA | NETIF_F_SCTP_CSUM | + NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC | NETIF_F_ALL_FCOE; dev->features |= real_dev->vlan_features | NETIF_F_LLTX | diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 29edf74846fc..4a0cab85d67d 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -87,7 +87,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", - [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", + [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu", [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter", [NETIF_F_RXHASH_BIT] = "rx-hashing", @@ -235,7 +235,7 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) switch (eth_cmd) { case ETHTOOL_GTXCSUM: case ETHTOOL_STXCSUM: - return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM; + return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CRC; case ETHTOOL_GRXCSUM: case ETHTOOL_SRXCSUM: return NETIF_F_RXCSUM; diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 010ddeec135f..d952d67f904d 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -169,7 +169,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, /* Only update csum if we really have to */ if (sctph->dest != cp->dport || payload_csum || (skb->ip_summed == CHECKSUM_PARTIAL && - !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) { + !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) { sctph->dest = cp->dport; sctp_nat_csum(skb, sctph, sctphoff); } else if (skb->ip_summed != CHECKSUM_PARTIAL) { diff --git a/net/sctp/output.c b/net/sctp/output.c index abe7c2db2412..9d610eddd19e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -534,7 +534,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) * by CRC32-C as described in . */ if (!sctp_checksum_disable) { - if (!(dst->dev->features & NETIF_F_SCTP_CSUM) || + if (!(dst->dev->features & NETIF_F_SCTP_CRC) || (dst_xfrm(dst) != NULL) || packet->ipfragok) { sh->checksum = sctp_compute_cksum(nskb, 0); } else { -- cgit v1.2.3-71-gd317 From a188222b6ed29404ac2d4232d35d1fe0e77af370 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:43 -0800 Subject: net: Rename NETIF_F_ALL_CSUM to NETIF_F_CSUM_MASK The name NETIF_F_ALL_CSUM is a misnomer. This does not correspond to the set of features for offloading all checksums. This is a mask of the checksum offload related features bits. It is incorrect to set both NETIF_F_HW_CSUM and NETIF_F_IP_CSUM or NETIF_F_IPV6 at the same time for features of a device. This patch: - Changes instances of NETIF_F_ALL_CSUM to NETIF_F_CSUM_MASK (where NETIF_F_ALL_CSUM is being used as a mask). - Changes bonding, sfc/efx, ipvlan, macvlan, vlan, and team drivers to use NEITF_F_HW_CSUM in features list instead of NETIF_F_ALL_CSUM. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 7 +++---- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- drivers/net/ethernet/ibm/ibmveth.c | 5 +++-- drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/jme.c | 2 +- drivers/net/ethernet/marvell/sky2.c | 2 +- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 4 ++-- drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c | 2 +- drivers/net/ethernet/sfc/efx.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++-- drivers/net/ipvlan/ipvlan_main.c | 2 +- drivers/net/macvlan.c | 2 +- drivers/net/macvtap.c | 2 +- drivers/net/team/team.c | 3 +-- drivers/net/usb/r8152.c | 2 +- drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c | 2 +- include/linux/netdev_features.h | 7 ++++++- include/linux/netdevice.h | 6 +++--- include/net/vxlan.h | 2 +- net/8021q/vlan_dev.c | 2 +- net/core/dev.c | 10 +++++----- net/core/ethtool.c | 2 +- net/ipv4/tcp.c | 4 ++-- 25 files changed, 43 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index fe0e7a6f4d72..cab99fd44c8e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1067,12 +1067,12 @@ static netdev_features_t bond_fix_features(struct net_device *dev, return features; } -#define BOND_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \ +#define BOND_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ NETIF_F_HIGHDMA | NETIF_F_LRO) -#define BOND_ENC_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | NETIF_F_RXCSUM |\ - NETIF_F_ALL_TSO) +#define BOND_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_RXCSUM | NETIF_F_ALL_TSO) static void bond_compute_features(struct bonding *bond) { @@ -4182,7 +4182,6 @@ void bond_setup(struct net_device *bond_dev) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; - bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; bond_dev->features |= bond_dev->hw_features; } diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 4cab8879f5ae..34e324f20d80 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5289,7 +5289,7 @@ static netdev_features_t be_features_check(struct sk_buff *skb, skb->inner_protocol != htons(ETH_P_TEB) || skb_inner_mac_header(skb) - skb_transport_header(skb) != sizeof(struct udphdr) + sizeof(struct vxlanhdr)) - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 7af870a3c549..6691b5a45b9d 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -763,7 +763,7 @@ static netdev_features_t ibmveth_fix_features(struct net_device *dev, */ if (!(features & NETIF_F_RXCSUM)) - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; return features; } @@ -928,7 +928,8 @@ static int ibmveth_set_features(struct net_device *dev, rc1 = ibmveth_set_csum_offload(dev, rx_csum); if (rc1 && !adapter->rx_csum) dev->features = - features & ~(NETIF_F_ALL_CSUM | NETIF_F_RXCSUM); + features & ~(NETIF_F_CSUM_MASK | + NETIF_F_RXCSUM); } if (large_send != adapter->large_send) { diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index d9854d39576d..83ddf362ea77 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1357,7 +1357,7 @@ static netdev_features_t fm10k_features_check(struct sk_buff *skb, if (!skb->encapsulation || fm10k_tx_encap_offload(skb)) return features; - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } static const struct net_device_ops fm10k_netdev_ops = { diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index a63d980f478e..c284e4341c7c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8766,7 +8766,7 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb, if (skb->encapsulation && (skb_inner_mac_header(skb) - skb_transport_header(skb) > I40E_MAX_TUNNEL_HDR_LEN)) - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 9f27001cac1f..fca35aa90d0f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8598,7 +8598,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, if (unlikely(skb_inner_mac_header(skb) - skb_transport_header(skb) > IXGBE_MAX_TUNNEL_HDR_LEN)) - return features & ~NETIF_F_ALL_CSUM; + return features & ~NETIF_F_CSUM_MASK; return features; } diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 060dd3922974..b1de7afd4116 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -2753,7 +2753,7 @@ static netdev_features_t jme_fix_features(struct net_device *netdev, netdev_features_t features) { if (netdev->mtu > 1900) - features &= ~(NETIF_F_ALL_TSO | NETIF_F_ALL_CSUM); + features &= ~(NETIF_F_ALL_TSO | NETIF_F_CSUM_MASK); return features; } diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 5606a043063e..ec0a22119e09 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4380,7 +4380,7 @@ static netdev_features_t sky2_fix_features(struct net_device *dev, */ if (dev->mtu > ETH_DATA_LEN && hw->chip_id == CHIP_ID_YUKON_EC_U) { netdev_info(dev, "checksum offload not possible with jumbo frames\n"); - features &= ~(NETIF_F_TSO|NETIF_F_SG|NETIF_F_ALL_CSUM); + features &= ~(NETIF_F_TSO | NETIF_F_SG | NETIF_F_CSUM_MASK); } /* Some hardware requires receive checksum for RSS to work. */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 038ac6b14a60..7060539d276a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2071,7 +2071,7 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, l4_hdr = ipv6_hdr(skb)->nexthdr; break; default: - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } if (skb->inner_protocol_type != ENCAP_TYPE_ETHER || @@ -2080,7 +2080,7 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, (l4_hdr == IPPROTO_UDP && (skb_inner_mac_header(skb) - skb_transport_header(skb) != sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c index 08d4be616064..e097e6baaac4 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c @@ -500,7 +500,7 @@ void pch_gbe_check_options(struct pch_gbe_adapter *adapter) val = XsumTX; pch_gbe_validate_option(&val, &opt, adapter); if (!val) - dev->features &= ~NETIF_F_ALL_CSUM; + dev->features &= ~NETIF_F_CSUM_MASK; } { /* Flow Control */ static const struct pch_gbe_option opt = { diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index b405349a570c..1fe13c733c1e 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -3131,7 +3131,7 @@ static int efx_pci_probe(struct pci_dev *pci_dev, if (efx->type->offload_features & NETIF_F_V6_CSUM) net_dev->features |= NETIF_F_TSO6; /* Mask for features that also apply to VLAN devices */ - net_dev->vlan_features |= (NETIF_F_ALL_CSUM | NETIF_F_SG | + net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_ALL_TSO | NETIF_F_RXCSUM); /* All offloads can be toggled */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3c6549aee11d..0b0fea73a7a7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2402,7 +2402,7 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev, features &= ~NETIF_F_RXCSUM; if (!priv->plat->tx_coe) - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; /* Some GMAC devices have a bugged Jumbo frame support that * needs to have the Tx COE disabled for oversized frames @@ -2410,7 +2410,7 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev, * the TX csum insertionin the TDES and not use SF. */ if (priv->plat->bugged_jumbo && (dev->mtu > ETH_DATA_LEN)) - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; return features; } diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index a9268db4e349..f94392d07126 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -88,7 +88,7 @@ static struct lock_class_key ipvlan_netdev_xmit_lock_key; static struct lock_class_key ipvlan_netdev_addr_lock_key; #define IPVLAN_FEATURES \ - (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ + (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \ NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 06c8bfeaccd6..ae3b486fb663 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -762,7 +762,7 @@ static struct lock_class_key macvlan_netdev_addr_lock_key; NETIF_F_GSO_ROBUST) #define MACVLAN_FEATURES \ - (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ + (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_LRO | \ NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 0fc521941c71..d636d051fac8 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -388,7 +388,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) * check, we either support them all or none. */ if (skb->ip_summed == CHECKSUM_PARTIAL && - !(features & NETIF_F_ALL_CSUM) && + !(features & NETIF_F_CSUM_MASK) && skb_checksum_help(skb)) goto drop; skb_queue_tail(&q->sk.sk_receive_queue, skb); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 059c0f60a2b2..915f60fce186 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -981,7 +981,7 @@ static void team_port_disable(struct team *team, team_lower_state_changed(port); } -#define TEAM_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \ +#define TEAM_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ NETIF_F_HIGHDMA | NETIF_F_LRO) @@ -2091,7 +2091,6 @@ static void team_setup(struct net_device *dev) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; - dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); dev->features |= dev->hw_features; } diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d9427ca3dba7..34642a9583e0 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1986,7 +1986,7 @@ rtl8152_features_check(struct sk_buff *skb, struct net_device *dev, int offset = skb_transport_offset(skb); if ((mss || skb->ip_summed == CHECKSUM_PARTIAL) && offset > max_offset) - features &= ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); else if ((skb->len + sizeof(struct tx_desc)) > agg_buf_sz) features &= ~NETIF_F_GSO_MASK; diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c index 679785b0209c..9de4f23910d8 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c @@ -69,7 +69,7 @@ ksocknal_lib_zc_capable(ksock_conn_t *conn) /* ZC if the socket supports scatter/gather and doesn't need software * checksums */ - return ((caps & NETIF_F_SG) != 0 && (caps & NETIF_F_ALL_CSUM) != 0); + return ((caps & NETIF_F_SG) != 0 && (caps & NETIF_F_CSUM_MASK) != 0); } int diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 6395f8309393..2c4e94ab88da 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -149,7 +149,12 @@ enum { #define NETIF_F_GEN_CSUM NETIF_F_HW_CSUM #define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM) #define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) -#define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) + +/* List of IP checksum features. Note that NETIF_HW_CSUM should not be + * set in features when NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM are set-- + * this would be contradictory + */ +#define NETIF_F_CSUM_MASK (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) #define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1bb21ff0fa64..a54223a113b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3763,12 +3763,12 @@ static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, netdev_features_t f2) { if (f1 & NETIF_F_GEN_CSUM) - f1 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + f1 |= (NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); if (f2 & NETIF_F_GEN_CSUM) - f2 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + f2 |= (NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); f1 &= f2; if (f1 & NETIF_F_GEN_CSUM) - f1 &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + f1 &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); return f1; } diff --git a/include/net/vxlan.h b/include/net/vxlan.h index c1c899c3a51b..b5a1aec1a167 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -232,7 +232,7 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, skb->inner_protocol != htons(ETH_P_TEB) || (skb_inner_mac_header(skb) - skb_transport_header(skb) != sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 45b74e875381..ad5e2fd1012c 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -543,7 +543,7 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); - dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG | + dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC | NETIF_F_ALL_FCOE; diff --git a/net/core/dev.c b/net/core/dev.c index 8f705fcedb94..5a3b5a404642 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2645,7 +2645,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, type)) { - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } @@ -2792,7 +2792,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device else skb_set_transport_header(skb, skb_checksum_start_offset(skb)); - if (!(features & NETIF_F_ALL_CSUM) && + if (!(features & NETIF_F_CSUM_MASK) && skb_checksum_help(skb)) goto out_kfree_skb; } @@ -7572,15 +7572,15 @@ netdev_features_t netdev_increment_features(netdev_features_t all, netdev_features_t one, netdev_features_t mask) { if (mask & NETIF_F_GEN_CSUM) - mask |= NETIF_F_ALL_CSUM; + mask |= NETIF_F_CSUM_MASK; mask |= NETIF_F_VLAN_CHALLENGED; - all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; + all |= one & (NETIF_F_ONE_FOR_ALL | NETIF_F_CSUM_MASK) & mask; all &= one | ~NETIF_F_ALL_FOR_ALL; /* If one device supports hw checksumming, set for all. */ if (all & NETIF_F_GEN_CSUM) - all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); return all; } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4a0cab85d67d..09948a726347 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -235,7 +235,7 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) switch (eth_cmd) { case ETHTOOL_GTXCSUM: case ETHTOOL_STXCSUM: - return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CRC; + return NETIF_F_CSUM_MASK | NETIF_F_SCTP_CRC; case ETHTOOL_GRXCSUM: case ETHTOOL_SRXCSUM: return NETIF_F_RXCSUM; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c82cca18c90f..cf7ef7be79f0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1018,7 +1018,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, ssize_t res; if (!(sk->sk_route_caps & NETIF_F_SG) || - !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) + !(sk->sk_route_caps & NETIF_F_CSUM_MASK)) return sock_no_sendpage(sk->sk_socket, page, offset, size, flags); @@ -1175,7 +1175,7 @@ new_segment: /* * Check whether we can use HW checksum. */ - if (sk->sk_route_caps & NETIF_F_ALL_CSUM) + if (sk->sk_route_caps & NETIF_F_CSUM_MASK) skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, skb); -- cgit v1.2.3-71-gd317 From c8cd0989bd151fda87bbf10887b3df18021284bc Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:44 -0800 Subject: net: Eliminate NETIF_F_GEN_CSUM and NETIF_F_V[46]_CSUM These netif flags are unnecessary convolutions. It is more straightforward to just use NETIF_F_HW_CSUM, NETIF_F_IP_CSUM, and NETIF_F_IPV6_CSUM directly. This patch also: - Cleans up can_checksum_protocol - Simplifies netdev_intersect_features Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.c | 2 +- drivers/net/macvlan.c | 2 +- include/linux/if_vlan.h | 2 +- include/linux/netdev_features.h | 9 +++---- include/linux/netdevice.h | 40 ++++++++++++++++++++------------ net/core/dev.c | 12 +++++----- net/core/pktgen.c | 4 ++-- net/ipv4/ip_output.c | 2 +- net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 3 ++- net/ipv4/udp.c | 3 ++- net/ipv4/udp_offload.c | 5 ++-- net/ipv6/ip6_output.c | 2 +- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 3 ++- 13 files changed, 50 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 1fe13c733c1e..6f697438545d 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -3128,7 +3128,7 @@ static int efx_pci_probe(struct pci_dev *pci_dev, net_dev->features |= (efx->type->offload_features | NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_RXCSUM); - if (efx->type->offload_features & NETIF_F_V6_CSUM) + if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) net_dev->features |= NETIF_F_TSO6; /* Mask for features that also apply to VLAN devices */ net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG | diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index ae3b486fb663..6a57a005e0ca 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -758,7 +758,7 @@ static struct lock_class_key macvlan_netdev_xmit_lock_key; static struct lock_class_key macvlan_netdev_addr_lock_key; #define ALWAYS_ON_FEATURES \ - (NETIF_F_SG | NETIF_F_GEN_CSUM | NETIF_F_GSO_SOFTWARE | NETIF_F_LLTX | \ + (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | NETIF_F_LLTX | \ NETIF_F_GSO_ROBUST) #define MACVLAN_FEATURES \ diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 05f5879821b8..a5f6ce6b578c 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -621,7 +621,7 @@ static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM | + NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 2c4e94ab88da..d9654f0eecb3 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -146,15 +146,12 @@ enum { #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ NETIF_F_TSO6 | NETIF_F_UFO) -#define NETIF_F_GEN_CSUM NETIF_F_HW_CSUM -#define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM) -#define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) - -/* List of IP checksum features. Note that NETIF_HW_CSUM should not be +/* List of IP checksum features. Note that NETIF_F_ HW_CSUM should not be * set in features when NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM are set-- * this would be contradictory */ -#define NETIF_F_CSUM_MASK (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) +#define NETIF_F_CSUM_MASK (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \ + NETIF_F_HW_CSUM) #define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a54223a113b1..283984b67cd9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3691,13 +3691,24 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth); static inline bool can_checksum_protocol(netdev_features_t features, __be16 protocol) { - return ((features & NETIF_F_GEN_CSUM) || - ((features & NETIF_F_V4_CSUM) && - protocol == htons(ETH_P_IP)) || - ((features & NETIF_F_V6_CSUM) && - protocol == htons(ETH_P_IPV6)) || - ((features & NETIF_F_FCOE_CRC) && - protocol == htons(ETH_P_FCOE))); + if (protocol == htons(ETH_P_FCOE)) + return !!(features & NETIF_F_FCOE_CRC); + + /* Assume this is an IP checksum (not SCTP CRC) */ + + if (features & NETIF_F_HW_CSUM) { + /* Can checksum everything */ + return true; + } + + switch (protocol) { + case htons(ETH_P_IP): + return !!(features & NETIF_F_IP_CSUM); + case htons(ETH_P_IPV6): + return !!(features & NETIF_F_IPV6_CSUM); + default: + return false; + } } #ifdef CONFIG_BUG @@ -3762,15 +3773,14 @@ void linkwatch_run_queue(void); static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, netdev_features_t f2) { - if (f1 & NETIF_F_GEN_CSUM) - f1 |= (NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); - if (f2 & NETIF_F_GEN_CSUM) - f2 |= (NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); - f1 &= f2; - if (f1 & NETIF_F_GEN_CSUM) - f1 &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); + if ((f1 ^ f2) & NETIF_F_HW_CSUM) { + if (f1 & NETIF_F_HW_CSUM) + f1 |= (NETIF_F_IP_CSUM|NETIF_F_IP_CSUM); + else + f2 |= (NETIF_F_IP_CSUM|NETIF_F_IP_CSUM); + } - return f1; + return f1 & f2; } static inline netdev_features_t netdev_get_wanted_features( diff --git a/net/core/dev.c b/net/core/dev.c index 5a3b5a404642..45b013f27625 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6467,9 +6467,9 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, /* UFO needs SG and checksumming */ if (features & NETIF_F_UFO) { /* maybe split UFO into V4 and V6? */ - if (!((features & NETIF_F_GEN_CSUM) || - (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) - == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + if (!(features & NETIF_F_HW_CSUM) && + ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) != + (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) { netdev_dbg(dev, "Dropping NETIF_F_UFO since no checksum offload features.\n"); features &= ~NETIF_F_UFO; @@ -7571,7 +7571,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, netdev_features_t netdev_increment_features(netdev_features_t all, netdev_features_t one, netdev_features_t mask) { - if (mask & NETIF_F_GEN_CSUM) + if (mask & NETIF_F_HW_CSUM) mask |= NETIF_F_CSUM_MASK; mask |= NETIF_F_VLAN_CHALLENGED; @@ -7579,8 +7579,8 @@ netdev_features_t netdev_increment_features(netdev_features_t all, all &= one | ~NETIF_F_ALL_FOR_ALL; /* If one device supports hw checksumming, set for all. */ - if (all & NETIF_F_GEN_CSUM) - all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); + if (all & NETIF_F_HW_CSUM) + all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_HW_CSUM); return all; } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index de8d5cc5eb24..2be144498bcf 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2898,7 +2898,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, if (!(pkt_dev->flags & F_UDPCSUM)) { skb->ip_summed = CHECKSUM_NONE; - } else if (odev->features & NETIF_F_V4_CSUM) { + } else if (odev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; udp4_hwcsum(skb, iph->saddr, iph->daddr); @@ -3032,7 +3032,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, if (!(pkt_dev->flags & F_UDPCSUM)) { skb->ip_summed = CHECKSUM_NONE; - } else if (odev->features & NETIF_F_V6_CSUM) { + } else if (odev->features & (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM)) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e0b94cd843d7..568e2bc0d93d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -911,7 +911,7 @@ static int __ip_append_data(struct sock *sk, */ if (transhdrlen && length + fragheaderlen <= mtu && - rt->dst.dev->features & NETIF_F_V4_CSUM && + rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) && !(flags & MSG_MORE) && !exthdrlen) csummode = CHECKSUM_PARTIAL; diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 5075b7ecd26d..61c7cc22ea68 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -132,7 +132,8 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { + (!skb->dev || skb->dev->features & + (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0c7b0e61b917..8841e984f8bf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -772,7 +772,8 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb, else if (skb_is_gso(skb)) uh->check = ~udp_v4_check(len, saddr, daddr, 0); else if (skb_dst(skb) && skb_dst(skb)->dev && - (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { + (skb_dst(skb)->dev->features & + (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) { BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index f9386160cbee..130042660181 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -60,8 +60,9 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, /* Try to offload checksum if possible */ offload_csum = !!(need_csum && - (skb->dev->features & - (is_ipv6 ? NETIF_F_V6_CSUM : NETIF_F_V4_CSUM))); + ((skb->dev->features & NETIF_F_HW_CSUM) || + (skb->dev->features & (is_ipv6 ? + NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM)))); /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & features; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e6a7bd15b9b7..2f748452b4aa 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1322,7 +1322,7 @@ emsgsize: headersize == sizeof(struct ipv6hdr) && length < mtu - headersize && !(flags & MSG_MORE) && - rt->dst.dev->features & NETIF_F_V6_CSUM) + rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) csummode = CHECKSUM_PARTIAL; if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) { diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 238e70c3f7b7..6ce309928841 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -136,7 +136,8 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt6i_flags & RTF_LOCAL) && - (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) { + (!skb->dev || skb->dev->features & + (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + -- cgit v1.2.3-71-gd317 From 6ae23ad36253a8033c5714c52b691b84456487c5 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:46 -0800 Subject: net: Add driver helper functions to determine checksum offloadability Add skb_csum_offload_chk driver helper function to determine if a device with limited checksum offload capabilities is able to offload the checksum for a given packet. This patch includes: - The skb_csum_offload_chk function. Returns true if checksum is offloadable, else false. Optionally, in the case that the checksum is not offloable, the function can call skb_checksum_help to resolve the checksum. skb_csum_offload_chk also returns whether the checksum refers to an encapsulated checksum. - Definition of skb_csum_offl_spec structure that caller uses to indicate rules about what it can offload (e.g. IPv4/v6, TCP/UDP only, whether encapsulated checksums can be offloaded, whether checksum with IPv6 extension headers can be offloaded). - Ancilary functions called skb_csum_offload_chk_help, skb_csum_off_chk_help_cmn, skb_csum_off_chk_help_cmn_v4_only. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 78 ++++++++++++++++++++++++++ net/core/dev.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 214 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 283984b67cd9..9fb6395967de 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2522,6 +2522,71 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, remcsum_unadjust((__sum16 *)ptr, grc->delta); } +struct skb_csum_offl_spec { + __u16 ipv4_okay:1, + ipv6_okay:1, + encap_okay:1, + ip_options_okay:1, + ext_hdrs_okay:1, + tcp_okay:1, + udp_okay:1, + sctp_okay:1, + vlan_okay:1, + no_encapped_ipv6:1, + no_not_encapped:1; +}; + +bool __skb_csum_offload_chk(struct sk_buff *skb, + const struct skb_csum_offl_spec *spec, + bool *csum_encapped, + bool csum_help); + +static inline bool skb_csum_offload_chk(struct sk_buff *skb, + const struct skb_csum_offl_spec *spec, + bool *csum_encapped, + bool csum_help) +{ + if (skb->ip_summed != CHECKSUM_PARTIAL) + return false; + + return __skb_csum_offload_chk(skb, spec, csum_encapped, csum_help); +} + +static inline bool skb_csum_offload_chk_help(struct sk_buff *skb, + const struct skb_csum_offl_spec *spec) +{ + bool csum_encapped; + + return skb_csum_offload_chk(skb, spec, &csum_encapped, true); +} + +static inline bool skb_csum_off_chk_help_cmn(struct sk_buff *skb) +{ + static const struct skb_csum_offl_spec csum_offl_spec = { + .ipv4_okay = 1, + .ip_options_okay = 1, + .ipv6_okay = 1, + .vlan_okay = 1, + .tcp_okay = 1, + .udp_okay = 1, + }; + + return skb_csum_offload_chk_help(skb, &csum_offl_spec); +} + +static inline bool skb_csum_off_chk_help_cmn_v4_only(struct sk_buff *skb) +{ + static const struct skb_csum_offl_spec csum_offl_spec = { + .ipv4_okay = 1, + .ip_options_okay = 1, + .tcp_okay = 1, + .udp_okay = 1, + .vlan_okay = 1, + }; + + return skb_csum_offload_chk_help(skb, &csum_offl_spec); +} + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, @@ -3711,6 +3776,19 @@ static inline bool can_checksum_protocol(netdev_features_t features, } } +/* Map an ethertype into IP protocol if possible */ +static inline int eproto_to_ipproto(int eproto) +{ + switch (eproto) { + case htons(ETH_P_IP): + return IPPROTO_IP; + case htons(ETH_P_IPV6): + return IPPROTO_IPV6; + default: + return -1; + } +} + #ifdef CONFIG_BUG void netdev_rx_csum_fault(struct net_device *dev); #else diff --git a/net/core/dev.c b/net/core/dev.c index 45b013f27625..914b4a24c654 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -138,6 +138,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -2471,6 +2472,141 @@ out: } EXPORT_SYMBOL(skb_checksum_help); +/* skb_csum_offload_check - Driver helper function to determine if a device + * with limited checksum offload capabilities is able to offload the checksum + * for a given packet. + * + * Arguments: + * skb - sk_buff for the packet in question + * spec - contains the description of what device can offload + * csum_encapped - returns true if the checksum being offloaded is + * encpasulated. That is it is checksum for the transport header + * in the inner headers. + * checksum_help - when set indicates that helper function should + * call skb_checksum_help if offload checks fail + * + * Returns: + * true: Packet has passed the checksum checks and should be offloadable to + * the device (a driver may still need to check for additional + * restrictions of its device) + * false: Checksum is not offloadable. If checksum_help was set then + * skb_checksum_help was called to resolve checksum for non-GSO + * packets and when IP protocol is not SCTP + */ +bool __skb_csum_offload_chk(struct sk_buff *skb, + const struct skb_csum_offl_spec *spec, + bool *csum_encapped, + bool csum_help) +{ + struct iphdr *iph; + struct ipv6hdr *ipv6; + void *nhdr; + int protocol; + u8 ip_proto; + + if (skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD)) { + if (!spec->vlan_okay) + goto need_help; + } + + /* We check whether the checksum refers to a transport layer checksum in + * the outermost header or an encapsulated transport layer checksum that + * corresponds to the inner headers of the skb. If the checksum is for + * something else in the packet we need help. + */ + if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) { + /* Non-encapsulated checksum */ + protocol = eproto_to_ipproto(vlan_get_protocol(skb)); + nhdr = skb_network_header(skb); + *csum_encapped = false; + if (spec->no_not_encapped) + goto need_help; + } else if (skb->encapsulation && spec->encap_okay && + skb_checksum_start_offset(skb) == + skb_inner_transport_offset(skb)) { + /* Encapsulated checksum */ + *csum_encapped = true; + switch (skb->inner_protocol_type) { + case ENCAP_TYPE_ETHER: + protocol = eproto_to_ipproto(skb->inner_protocol); + break; + case ENCAP_TYPE_IPPROTO: + protocol = skb->inner_protocol; + break; + } + nhdr = skb_inner_network_header(skb); + } else { + goto need_help; + } + + switch (protocol) { + case IPPROTO_IP: + if (!spec->ipv4_okay) + goto need_help; + iph = nhdr; + ip_proto = iph->protocol; + if (iph->ihl != 5 && !spec->ip_options_okay) + goto need_help; + break; + case IPPROTO_IPV6: + if (!spec->ipv6_okay) + goto need_help; + if (spec->no_encapped_ipv6 && *csum_encapped) + goto need_help; + ipv6 = nhdr; + nhdr += sizeof(*ipv6); + ip_proto = ipv6->nexthdr; + break; + default: + goto need_help; + } + +ip_proto_again: + switch (ip_proto) { + case IPPROTO_TCP: + if (!spec->tcp_okay || + skb->csum_offset != offsetof(struct tcphdr, check)) + goto need_help; + break; + case IPPROTO_UDP: + if (!spec->udp_okay || + skb->csum_offset != offsetof(struct udphdr, check)) + goto need_help; + break; + case IPPROTO_SCTP: + if (!spec->sctp_okay || + skb->csum_offset != offsetof(struct sctphdr, checksum)) + goto cant_help; + break; + case NEXTHDR_HOP: + case NEXTHDR_ROUTING: + case NEXTHDR_DEST: { + u8 *opthdr = nhdr; + + if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay) + goto need_help; + + ip_proto = opthdr[0]; + nhdr += (opthdr[1] + 1) << 3; + + goto ip_proto_again; + } + default: + goto need_help; + } + + /* Passed the tests for offloading checksum */ + return true; + +need_help: + if (csum_help && !skb_shinfo(skb)->gso_size) + skb_checksum_help(skb); +cant_help: + return false; +} +EXPORT_SYMBOL(__skb_csum_offload_chk); + __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; -- cgit v1.2.3-71-gd317 From 7a6ae71b2490586ed55105893a18dfc648e5fcbb Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:47 -0800 Subject: net: Elaborate on checksum offload interface description Add specifics and details the description of the interface between the stack and drivers for doing checksum offload. This description is meant to be as specific and complete as possible. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 138 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 109 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2393373c9d08..6b6bd42d6134 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -39,11 +39,55 @@ #include #include -/* A. Checksumming of received packets by device. +/* The interface for checksum offload between the stack and networking drivers + * is as follows... + * + * A. IP checksum related features + * + * Drivers advertise checksum offload capabilities in the features of a device. + * From the stack's point of view these are capabilities offered by the driver, + * a driver typically only advertises features that it is capable of offloading + * to its device. + * + * The checksum related features are: + * + * NETIF_F_HW_CSUM - The driver (or its device) is able to compute one + * IP (one's complement) checksum for any combination + * of protocols or protocol layering. The checksum is + * computed and set in a packet per the CHECKSUM_PARTIAL + * interface (see below). + * + * NETIF_F_IP_CSUM - Driver (device) is only able to checksum plain + * TCP or UDP packets over IPv4. These are specifically + * unencapsulated packets of the form IPv4|TCP or + * IPv4|UDP where the Protocol field in the IPv4 header + * is TCP or UDP. The IPv4 header may contain IP options + * This feature cannot be set in features for a device + * with NETIF_F_HW_CSUM also set. This feature is being + * DEPRECATED (see below). + * + * NETIF_F_IPV6_CSUM - Driver (device) is only able to checksum plain + * TCP or UDP packets over IPv6. These are specifically + * unencapsulated packets of the form IPv6|TCP or + * IPv4|UDP where the Next Header field in the IPv6 + * header is either TCP or UDP. IPv6 extension headers + * are not supported with this feature. This feature + * cannot be set in features for a device with + * NETIF_F_HW_CSUM also set. This feature is being + * DEPRECATED (see below). + * + * NETIF_F_RXCSUM - Driver (device) performs receive checksum offload. + * This flag is used only used to disable the RX checksum + * feature for a device. The stack will accept receive + * checksum indication in packets received on a device + * regardless of whether NETIF_F_RXCSUM is set. + * + * B. Checksumming of received packets by device. Indication of checksum + * verification is in set skb->ip_summed. Possible values are: * * CHECKSUM_NONE: * - * Device failed to checksum this packet e.g. due to lack of capabilities. + * Device did not checksum this packet e.g. due to lack of capabilities. * The packet contains full (though not verified) checksum in packet but * not in skb->csum. Thus, skb->csum is undefined in this case. * @@ -53,9 +97,8 @@ * (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums * for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY * if their checksums are okay. skb->csum is still undefined in this case - * though. It is a bad option, but, unfortunately, nowadays most vendors do - * this. Apparently with the secret goal to sell you new devices, when you - * will add new protocol to your host, f.e. IPv6 8) + * though. A driver or device must never modify the checksum field in the + * packet even if checksum is verified. * * CHECKSUM_UNNECESSARY is applicable to following protocols: * TCP: IPv6 and IPv4. @@ -96,40 +139,77 @@ * packet that are after the checksum being offloaded are not considered to * be verified. * - * B. Checksumming on output. - * - * CHECKSUM_NONE: - * - * The skb was already checksummed by the protocol, or a checksum is not - * required. + * C. Checksumming on transmit for non-GSO. The stack requests checksum offload + * in the skb->ip_summed for a packet. Values are: * * CHECKSUM_PARTIAL: * - * The device is required to checksum the packet as seen by hard_start_xmit() + * The driver is required to checksum the packet as seen by hard_start_xmit() * from skb->csum_start up to the end, and to record/write the checksum at - * offset skb->csum_start + skb->csum_offset. + * offset skb->csum_start + skb->csum_offset. A driver may verify that the + * csum_start and csum_offset values are valid values given the length and + * offset of the packet, however they should not attempt to validate that the + * checksum refers to a legitimate transport layer checksum-- it is the + * purview of the stack to validate that csum_start and csum_offset are set + * correctly. + * + * When the stack requests checksum offload for a packet, the driver MUST + * ensure that the checksum is set correctly. A driver can either offload the + * checksum calculation to the device, or call skb_checksum_help (in the case + * that the device does not support offload for a particular checksum). + * + * NETIF_F_IP_CSUM and NETIF_F_IPV6_CSUM are being deprecated in favor of + * NETIF_F_HW_CSUM. New devices should use NETIF_F_HW_CSUM to indicate + * checksum offload capability. If a device has limited checksum capabilities + * (for instance can only perform NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM as + * described above) a helper function can be called to resolve + * CHECKSUM_PARTIAL. The helper functions are skb_csum_off_chk*. The helper + * function takes a spec argument that describes the protocol layer that is + * supported for checksum offload and can be called for each packet. If a + * packet does not match the specification for offload, skb_checksum_help + * is called to resolve the checksum. * - * The device must show its capabilities in dev->features, set up at device - * setup time, e.g. netdev_features.h: + * CHECKSUM_NONE: * - * NETIF_F_HW_CSUM - It's a clever device, it's able to checksum everything. - * NETIF_F_IP_CSUM - Device is dumb, it's able to checksum only TCP/UDP over - * IPv4. Sigh. Vendors like this way for an unknown reason. - * Though, see comment above about CHECKSUM_UNNECESSARY. 8) - * NETIF_F_IPV6_CSUM - About as dumb as the last one but does IPv6 instead. - * NETIF_F_... - Well, you get the picture. + * The skb was already checksummed by the protocol, or a checksum is not + * required. * * CHECKSUM_UNNECESSARY: * - * Normally, the device will do per protocol specific checksumming. Protocol - * implementations that do not want the NIC to perform the checksum - * calculation should use this flag in their outgoing skbs. + * This has the same meaning on as CHECKSUM_NONE for checksum offload on + * output. * - * NETIF_F_FCOE_CRC - This indicates that the device can do FCoE FC CRC - * offload. Correspondingly, the FCoE protocol driver - * stack should use CHECKSUM_UNNECESSARY. - * - * Any questions? No questions, good. --ANK + * CHECKSUM_COMPLETE: + * Not used in checksum output. If a driver observes a packet with this value + * set in skbuff, if should treat as CHECKSUM_NONE being set. + * + * D. Non-IP checksum (CRC) offloads + * + * NETIF_F_SCTP_CRC - This feature indicates that a device is capable of + * offloading the SCTP CRC in a packet. To perform this offload the stack + * will set ip_summed to CHECKSUM_PARTIAL and set csum_start and csum_offset + * accordingly. Note the there is no indication in the skbuff that the + * CHECKSUM_PARTIAL refers to an SCTP checksum, a driver that supports + * both IP checksum offload and SCTP CRC offload must verify which offload + * is configured for a packet presumably by inspecting packet headers. + * + * NETIF_F_FCOE_CRC - This feature indicates that a device is capable of + * offloading the FCOE CRC in a packet. To perform this offload the stack + * will set ip_summed to CHECKSUM_PARTIAL and set csum_start and csum_offset + * accordingly. Note the there is no indication in the skbuff that the + * CHECKSUM_PARTIAL refers to an FCOE checksum, a driver that supports + * both IP checksum offload and FCOE CRC offload must verify which offload + * is configured for a packet presumably by inspecting packet headers. + * + * E. Checksumming on output with GSO. + * + * In the case of a GSO packet (skb_is_gso(skb) is true), checksum offload + * is implied by the SKB_GSO_* flags in gso_type. Most obviously, if the + * gso_type is SKB_GSO_TCPV4 or SKB_GSO_TCPV6, TCP checksum offload as + * part of the GSO operation is implied. If a checksum is being offloaded + * with GSO then ip_summed is CHECKSUM_PARTIAL, csum_start and csum_offset + * are set to refer to the outermost checksum being offload (two offloaded + * checksums are possible with UDP encapsulation). */ /* Don't change this without changing skb_csum_unnecessary! */ -- cgit v1.2.3-71-gd317 From 3502cad73c4bbf8f6365d539e814159275252c59 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 15 Dec 2015 15:41:36 -0800 Subject: rhashtable: add function to replace an element Add the rhashtable_replace_fast function. This replaces one object in the table with another atomically. The hashes of the new and old objects must be equal. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 82 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 843ceca9a21e..77deece15fb3 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -819,4 +819,86 @@ out: return err; } +/* Internal function, please use rhashtable_replace_fast() instead */ +static inline int __rhashtable_replace_fast( + struct rhashtable *ht, struct bucket_table *tbl, + struct rhash_head *obj_old, struct rhash_head *obj_new, + const struct rhashtable_params params) +{ + struct rhash_head __rcu **pprev; + struct rhash_head *he; + spinlock_t *lock; + unsigned int hash; + int err = -ENOENT; + + /* Minimally, the old and new objects must have same hash + * (which should mean identifiers are the same). + */ + hash = rht_head_hashfn(ht, tbl, obj_old, params); + if (hash != rht_head_hashfn(ht, tbl, obj_new, params)) + return -EINVAL; + + lock = rht_bucket_lock(tbl, hash); + + spin_lock_bh(lock); + + pprev = &tbl->buckets[hash]; + rht_for_each(he, tbl, hash) { + if (he != obj_old) { + pprev = &he->next; + continue; + } + + rcu_assign_pointer(obj_new->next, obj_old->next); + rcu_assign_pointer(*pprev, obj_new); + err = 0; + break; + } + + spin_unlock_bh(lock); + + return err; +} + +/** + * rhashtable_replace_fast - replace an object in hash table + * @ht: hash table + * @obj_old: pointer to hash head inside object being replaced + * @obj_new: pointer to hash head inside object which is new + * @params: hash table parameters + * + * Replacing an object doesn't affect the number of elements in the hash table + * or bucket, so we don't need to worry about shrinking or expanding the + * table here. + * + * Returns zero on success, -ENOENT if the entry could not be found, + * -EINVAL if hash is not the same for the old and new objects. + */ +static inline int rhashtable_replace_fast( + struct rhashtable *ht, struct rhash_head *obj_old, + struct rhash_head *obj_new, + const struct rhashtable_params params) +{ + struct bucket_table *tbl; + int err; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + + /* Because we have already taken (and released) the bucket + * lock in old_tbl, if we find that future_tbl is not yet + * visible then that guarantees the entry to still be in + * the old tbl if it exists. + */ + while ((err = __rhashtable_replace_fast(ht, tbl, obj_old, + obj_new, params)) && + (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) + ; + + rcu_read_unlock(); + + return err; +} + #endif /* _LINUX_RHASHTABLE_H */ -- cgit v1.2.3-71-gd317 From fc9e50f5a5a4e1fa9ba2756f745a13e693cf6a06 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 15 Dec 2015 15:41:37 -0800 Subject: netlink: add a start callback for starting a netlink dump The start callback allows the caller to set up a context for the dump callbacks. Presumably, the context can then be destroyed in the done callback. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 ++ include/net/genetlink.h | 2 ++ net/netlink/af_netlink.c | 4 ++++ net/netlink/genetlink.c | 16 ++++++++++++++++ 4 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 639e9b8b0e4d..0b41959aab9f 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -131,6 +131,7 @@ netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask) struct netlink_callback { struct sk_buff *skb; const struct nlmsghdr *nlh; + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); @@ -153,6 +154,7 @@ struct nlmsghdr * __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags); struct netlink_dump_control { + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff *skb, struct netlink_callback *); int (*done)(struct netlink_callback *); void *data; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 1b6b6dcb018d..43c0e771f417 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -114,6 +114,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) * @flags: flags * @policy: attribute validation policy * @doit: standard command callback + * @start: start callback for dumps * @dumpit: callback for dumpers * @done: completion callback for dumps * @ops_list: operations list @@ -122,6 +123,7 @@ struct genl_ops { const struct nla_policy *policy; int (*doit)(struct sk_buff *skb, struct genl_info *info); + int (*start)(struct netlink_callback *cb); int (*dumpit)(struct sk_buff *skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 59651af8cc27..81dc1bb6e016 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2915,6 +2915,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb = &nlk->cb; memset(cb, 0, sizeof(*cb)); + cb->start = control->start; cb->dump = control->dump; cb->done = control->done; cb->nlh = nlh; @@ -2927,6 +2928,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, mutex_unlock(nlk->cb_mutex); + if (cb->start) + cb->start(cb); + ret = netlink_dump(sk); sock_put(sk); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index bc0e504f33a6..8e63662c6fb0 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -513,6 +513,20 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, } EXPORT_SYMBOL(genlmsg_put); +static int genl_lock_start(struct netlink_callback *cb) +{ + /* our ops are always const - netlink API doesn't propagate that */ + const struct genl_ops *ops = cb->data; + int rc = 0; + + if (ops->start) { + genl_lock(); + rc = ops->start(cb); + genl_unlock(); + } + return rc; +} + static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { /* our ops are always const - netlink API doesn't propagate that */ @@ -577,6 +591,7 @@ static int genl_family_rcv_msg(struct genl_family *family, .module = family->module, /* we have const, but the netlink API doesn't */ .data = (void *)ops, + .start = genl_lock_start, .dump = genl_lock_dumpit, .done = genl_lock_done, }; @@ -588,6 +603,7 @@ static int genl_family_rcv_msg(struct genl_family *family, } else { struct netlink_dump_control c = { .module = family->module, + .start = ops->start, .dump = ops->dumpit, .done = ops->done, }; -- cgit v1.2.3-71-gd317 From b613f56ec9baf30edf5d9d607b822532a273dad7 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:02 +0900 Subject: net: diag: split inet_diag_dump_one_icsk into two Currently, inet_diag_dump_one_icsk finds a socket and then dumps its information to userspace. Split it into a part that finds the socket and a part that dumps the information. Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 5 +++++ net/ipv4/inet_diag.c | 42 +++++++++++++++++++++++++++--------------- 2 files changed, 32 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 0e707f0c1a3e..e7032f041982 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -3,6 +3,7 @@ #include +struct net; struct sock; struct inet_hashinfo; struct nlattr; @@ -41,6 +42,10 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, const struct nlmsghdr *nlh, const struct inet_diag_req_v2 *req); +struct sock *inet_diag_find_one_icsk(struct net *net, + struct inet_hashinfo *hashinfo, + const struct inet_diag_req_v2 *req); + int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); extern int inet_diag_register(const struct inet_diag_handler *handler); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index ab9f8a66615d..cfabb8f8f0a0 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -350,17 +350,12 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, nlmsg_flags, unlh); } -int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct sk_buff *in_skb, - const struct nlmsghdr *nlh, - const struct inet_diag_req_v2 *req) +struct sock *inet_diag_find_one_icsk(struct net *net, + struct inet_hashinfo *hashinfo, + const struct inet_diag_req_v2 *req) { - struct net *net = sock_net(in_skb->sk); - struct sk_buff *rep; struct sock *sk; - int err; - err = -EINVAL; if (req->sdiag_family == AF_INET) sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], @@ -375,15 +370,33 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, req->id.idiag_if); #endif else - goto out_nosk; + return ERR_PTR(-EINVAL); - err = -ENOENT; if (!sk) - goto out_nosk; + return ERR_PTR(-ENOENT); - err = sock_diag_check_cookie(sk, req->id.idiag_cookie); - if (err) - goto out; + if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { + sock_gen_put(sk); + return ERR_PTR(-ENOENT); + } + + return sk; +} +EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); + +int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, + struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + struct sk_buff *rep; + struct sock *sk; + int err; + + sk = inet_diag_find_one_icsk(net, hashinfo, req); + if (IS_ERR(sk)) + return PTR_ERR(sk); rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL); if (!rep) { @@ -409,7 +422,6 @@ out: if (sk) sock_gen_put(sk); -out_nosk: return err; } EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); -- cgit v1.2.3-71-gd317 From 64be0aed59ad519d6f2160868734f7e278290ac1 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:03 +0900 Subject: net: diag: Add the ability to destroy a socket. This patch adds a SOCK_DESTROY operation, a destroy function pointer to sock_diag_handler, and a diag_destroy function pointer. It does not include any implementation code. Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 2 ++ include/net/sock.h | 1 + include/uapi/linux/sock_diag.h | 1 + net/core/sock_diag.c | 23 ++++++++++++++++++++--- 4 files changed, 24 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index fddebc617469..4018b48f2b3b 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -15,6 +15,7 @@ struct sock_diag_handler { __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); int (*get_info)(struct sk_buff *skb, struct sock *sk); + int (*destroy)(struct sk_buff *skb, struct nlmsghdr *nlh); }; int sock_diag_register(const struct sock_diag_handler *h); @@ -68,4 +69,5 @@ bool sock_diag_has_destroy_listeners(const struct sock *sk) } void sock_diag_broadcast_destroy(struct sock *sk); +int sock_diag_destroy(struct sock *sk, int err); #endif diff --git a/include/net/sock.h b/include/net/sock.h index ab0269f4b2cc..6e6e8a25d997 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1060,6 +1060,7 @@ struct proto { void (*destroy_cgroup)(struct mem_cgroup *memcg); struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg); #endif + int (*diag_destroy)(struct sock *sk, int err); }; int proto_register(struct proto *prot, int alloc_slab); diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h index 49230d36f9ce..bae2d80034d4 100644 --- a/include/uapi/linux/sock_diag.h +++ b/include/uapi/linux/sock_diag.h @@ -4,6 +4,7 @@ #include #define SOCK_DIAG_BY_FAMILY 20 +#define SOCK_DESTROY 21 struct sock_diag_req { __u8 sdiag_family; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 0c1d58d43f67..a996ce8c8fb2 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -214,7 +214,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld) } EXPORT_SYMBOL_GPL(sock_diag_unregister); -static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) { int err; struct sock_diag_req *req = nlmsg_data(nlh); @@ -234,8 +234,12 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) hndl = sock_diag_handlers[req->sdiag_family]; if (hndl == NULL) err = -ENOENT; - else + else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY) err = hndl->dump(skb, nlh); + else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy) + err = hndl->destroy(skb, nlh); + else + err = -EOPNOTSUPP; mutex_unlock(&sock_diag_table_mutex); return err; @@ -261,7 +265,8 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return ret; case SOCK_DIAG_BY_FAMILY: - return __sock_diag_rcv_msg(skb, nlh); + case SOCK_DESTROY: + return __sock_diag_cmd(skb, nlh); default: return -EINVAL; } @@ -295,6 +300,18 @@ static int sock_diag_bind(struct net *net, int group) return 0; } +int sock_diag_destroy(struct sock *sk, int err) +{ + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (!sk->sk_prot->diag_destroy) + return -EOPNOTSUPP; + + return sk->sk_prot->diag_destroy(sk, err); +} +EXPORT_SYMBOL_GPL(sock_diag_destroy); + static int __net_init diag_net_init(struct net *net) { struct netlink_kernel_cfg cfg = { -- cgit v1.2.3-71-gd317 From 6eb5d2e08f071c05ecbe135369c9ad418826cab2 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:04 +0900 Subject: net: diag: Support SOCK_DESTROY for inet sockets. This passes the SOCK_DESTROY operation to the underlying protocol diag handler, or returns -EOPNOTSUPP if that handler does not define a destroy operation. Most of this patch is just renaming functions. This is not strictly necessary, but it would be fairly counterintuitive to have the code to destroy inet sockets be in a function whose name starts with inet_diag_get. Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 4 ++++ net/ipv4/inet_diag.c | 23 +++++++++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index e7032f041982..7c27fa1030e8 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -24,6 +24,10 @@ struct inet_diag_handler { void (*idiag_get_info)(struct sock *sk, struct inet_diag_msg *r, void *info); + + int (*destroy)(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req); + __u16 idiag_type; __u16 idiag_info_size; }; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index cfabb8f8f0a0..8bb8e7ad8548 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -426,7 +426,7 @@ out: } EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); -static int inet_diag_get_exact(struct sk_buff *in_skb, +static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, const struct nlmsghdr *nlh, const struct inet_diag_req_v2 *req) { @@ -436,8 +436,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, handler = inet_diag_lock_handler(req->sdiag_protocol); if (IS_ERR(handler)) err = PTR_ERR(handler); - else + else if (cmd == SOCK_DIAG_BY_FAMILY) err = handler->dump_one(in_skb, nlh, req); + else if (cmd == SOCK_DESTROY && handler->destroy) + err = handler->destroy(in_skb, req); + else + err = -EOPNOTSUPP; inet_diag_unlock_handler(handler); return err; @@ -950,7 +954,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb, req.idiag_states = rc->idiag_states; req.id = rc->id; - return inet_diag_get_exact(in_skb, nlh, &req); + return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req); } static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -984,7 +988,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) return inet_diag_get_exact_compat(skb, nlh); } -static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct inet_diag_req_v2); struct net *net = sock_net(skb->sk); @@ -992,7 +996,8 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) if (nlmsg_len(h) < hdrlen) return -EINVAL; - if (h->nlmsg_flags & NLM_F_DUMP) { + if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && + h->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(h, hdrlen)) { struct nlattr *attr; @@ -1011,7 +1016,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) } } - return inet_diag_get_exact(skb, h, nlmsg_data(h)); + return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h)); } static @@ -1062,14 +1067,16 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) static const struct sock_diag_handler inet_diag_handler = { .family = AF_INET, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, .get_info = inet_diag_handler_get_info, + .destroy = inet_diag_handler_cmd, }; static const struct sock_diag_handler inet6_diag_handler = { .family = AF_INET6, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, .get_info = inet_diag_handler_get_info, + .destroy = inet_diag_handler_cmd, }; int inet_diag_register(const struct inet_diag_handler *h) -- cgit v1.2.3-71-gd317 From 541c9a84cd85203244307d9ebb821102eed82789 Mon Sep 17 00:00:00 2001 From: RafaÅ‚ MiÅ‚ecki Date: Wed, 9 Dec 2015 23:36:51 +0100 Subject: ssb: pick SoC invariants code from MIPS BCM47xx arch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is code in ssb fetching "invariants" that is basically a set of board specific data. Every host requires its own implementation of reading function. In ssb we have support for PCI, PCMCIA & SDIO. For some (historical?) reason code reading "invariants" for SoC was placed in arch code and provided by a callback. This is not needed nowadays, so lets move that into ssb. This way we keep all "invariants" functions in a single module making code cleaner. Signed-off-by: RafaÅ‚ MiÅ‚ecki Signed-off-by: Kalle Valo --- arch/mips/bcm47xx/setup.c | 39 +-------------------------------------- drivers/ssb/Kconfig | 2 +- drivers/ssb/host_soc.c | 37 +++++++++++++++++++++++++++++++++++++ drivers/ssb/main.c | 5 ++--- drivers/ssb/ssb_private.h | 3 +++ include/linux/ssb/ssb.h | 10 +++------- 6 files changed, 47 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index 6d38948f0f1e..c807e32d6d81 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -101,50 +101,13 @@ static void bcm47xx_machine_halt(void) } #ifdef CONFIG_BCM47XX_SSB -static int bcm47xx_get_invariants(struct ssb_bus *bus, - struct ssb_init_invariants *iv) -{ - char buf[20]; - int len, err; - - /* Fill boardinfo structure */ - memset(&iv->boardinfo, 0 , sizeof(struct ssb_boardinfo)); - - len = bcm47xx_nvram_getenv("boardvendor", buf, sizeof(buf)); - if (len > 0) { - err = kstrtou16(strim(buf), 0, &iv->boardinfo.vendor); - if (err) - pr_warn("Couldn't parse nvram board vendor entry with value \"%s\"\n", - buf); - } - if (!iv->boardinfo.vendor) - iv->boardinfo.vendor = SSB_BOARDVENDOR_BCM; - - len = bcm47xx_nvram_getenv("boardtype", buf, sizeof(buf)); - if (len > 0) { - err = kstrtou16(strim(buf), 0, &iv->boardinfo.type); - if (err) - pr_warn("Couldn't parse nvram board type entry with value \"%s\"\n", - buf); - } - - memset(&iv->sprom, 0, sizeof(struct ssb_sprom)); - bcm47xx_fill_sprom(&iv->sprom, NULL, false); - - if (bcm47xx_nvram_getenv("cardbus", buf, sizeof(buf)) >= 0) - iv->has_cardbus_slot = !!simple_strtoul(buf, NULL, 10); - - return 0; -} - static void __init bcm47xx_register_ssb(void) { int err; char buf[100]; struct ssb_mipscore *mcore; - err = ssb_bus_ssbbus_register(&bcm47xx_bus.ssb, SSB_ENUM_BASE, - bcm47xx_get_invariants); + err = ssb_bus_host_soc_register(&bcm47xx_bus.ssb, SSB_ENUM_BASE); if (err) panic("Failed to initialize SSB bus (err %d)", err); diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig index 149214beeda9..0c675861623f 100644 --- a/drivers/ssb/Kconfig +++ b/drivers/ssb/Kconfig @@ -82,7 +82,7 @@ config SSB_SDIOHOST config SSB_HOST_SOC bool "Support for SSB bus on SoC" - depends on SSB + depends on SSB && BCM47XX_NVRAM help Host interface for a SSB directly mapped into memory. This is for some Broadcom SoCs from the BCM47xx and BCM53xx lines. diff --git a/drivers/ssb/host_soc.c b/drivers/ssb/host_soc.c index c809f255af34..d62992dc08b2 100644 --- a/drivers/ssb/host_soc.c +++ b/drivers/ssb/host_soc.c @@ -8,6 +8,7 @@ * Licensed under the GNU/GPL. See COPYING for details. */ +#include #include #include "ssb_private.h" @@ -171,3 +172,39 @@ const struct ssb_bus_ops ssb_host_soc_ops = { .block_write = ssb_host_soc_block_write, #endif }; + +int ssb_host_soc_get_invariants(struct ssb_bus *bus, + struct ssb_init_invariants *iv) +{ + char buf[20]; + int len, err; + + /* Fill boardinfo structure */ + memset(&iv->boardinfo, 0, sizeof(struct ssb_boardinfo)); + + len = bcm47xx_nvram_getenv("boardvendor", buf, sizeof(buf)); + if (len > 0) { + err = kstrtou16(strim(buf), 0, &iv->boardinfo.vendor); + if (err) + pr_warn("Couldn't parse nvram board vendor entry with value \"%s\"\n", + buf); + } + if (!iv->boardinfo.vendor) + iv->boardinfo.vendor = SSB_BOARDVENDOR_BCM; + + len = bcm47xx_nvram_getenv("boardtype", buf, sizeof(buf)); + if (len > 0) { + err = kstrtou16(strim(buf), 0, &iv->boardinfo.type); + if (err) + pr_warn("Couldn't parse nvram board type entry with value \"%s\"\n", + buf); + } + + memset(&iv->sprom, 0, sizeof(struct ssb_sprom)); + ssb_fill_sprom_with_fallback(bus, &iv->sprom); + + if (bcm47xx_nvram_getenv("cardbus", buf, sizeof(buf)) >= 0) + iv->has_cardbus_slot = !!simple_strtoul(buf, NULL, 10); + + return 0; +} diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c index 5d1e9a0fc389..cde5ff7529eb 100644 --- a/drivers/ssb/main.c +++ b/drivers/ssb/main.c @@ -762,15 +762,14 @@ EXPORT_SYMBOL(ssb_bus_sdiobus_register); #endif /* CONFIG_SSB_PCMCIAHOST */ #ifdef CONFIG_SSB_HOST_SOC -int ssb_bus_ssbbus_register(struct ssb_bus *bus, unsigned long baseaddr, - ssb_invariants_func_t get_invariants) +int ssb_bus_host_soc_register(struct ssb_bus *bus, unsigned long baseaddr) { int err; bus->bustype = SSB_BUSTYPE_SSB; bus->ops = &ssb_host_soc_ops; - err = ssb_bus_register(bus, get_invariants, baseaddr); + err = ssb_bus_register(bus, ssb_host_soc_get_invariants, baseaddr); if (!err) { ssb_info("Sonics Silicon Backplane found at address 0x%08lX\n", baseaddr); diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h index 15bfd5c7d2d7..c2f5d3969c8b 100644 --- a/drivers/ssb/ssb_private.h +++ b/drivers/ssb/ssb_private.h @@ -163,6 +163,9 @@ static inline int ssb_sdio_init(struct ssb_bus *bus) #ifdef CONFIG_SSB_HOST_SOC extern const struct ssb_bus_ops ssb_host_soc_ops; + +extern int ssb_host_soc_get_invariants(struct ssb_bus *bus, + struct ssb_init_invariants *iv); #endif /* scan.c */ diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index c3d1a525bacc..26a0b3c3ce5f 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -524,13 +524,9 @@ struct ssb_init_invariants { typedef int (*ssb_invariants_func_t)(struct ssb_bus *bus, struct ssb_init_invariants *iv); -/* Register a SSB system bus. get_invariants() is called after the - * basic system devices are initialized. - * The invariants are usually fetched from some NVRAM. - * Put the invariants into the struct pointed to by iv. */ -extern int ssb_bus_ssbbus_register(struct ssb_bus *bus, - unsigned long baseaddr, - ssb_invariants_func_t get_invariants); +/* Register SoC bus. */ +extern int ssb_bus_host_soc_register(struct ssb_bus *bus, + unsigned long baseaddr); #ifdef CONFIG_SSB_PCIHOST extern int ssb_bus_pcibus_register(struct ssb_bus *bus, struct pci_dev *host_pci); -- cgit v1.2.3-71-gd317 From a8170d2b9e8d38a1f3fa3b40b6f8cd34a87d5382 Mon Sep 17 00:00:00 2001 From: "Singhai, Anjali" Date: Mon, 14 Dec 2015 12:21:17 -0800 Subject: geneve: Add geneve udp port offload for ethernet devices Add ndo_ops to add/del UDP ports to a device that supports geneve offload. v2: Comment fix. Signed-off-by: Anjali Singhai Jain Signed-off-by: Kiran Patil Signed-off-by: David S. Miller --- drivers/net/geneve.c | 23 +++++++++++++++++++++++ include/linux/netdevice.h | 20 +++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 0750d7a93878..89325e483ecf 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -380,8 +380,11 @@ static struct socket *geneve_create_sock(struct net *net, bool ipv6, static void geneve_notify_add_rx_port(struct geneve_sock *gs) { + struct net_device *dev; struct sock *sk = gs->sock->sk; + struct net *net = sock_net(sk); sa_family_t sa_family = sk->sk_family; + __be16 port = inet_sk(sk)->inet_sport; int err; if (sa_family == AF_INET) { @@ -390,6 +393,14 @@ static void geneve_notify_add_rx_port(struct geneve_sock *gs) pr_warn("geneve: udp_add_offload failed with status %d\n", err); } + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + if (dev->netdev_ops->ndo_add_geneve_port) + dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, + port); + } + rcu_read_unlock(); } static int geneve_hlen(struct genevehdr *gh) @@ -530,8 +541,20 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, static void geneve_notify_del_rx_port(struct geneve_sock *gs) { + struct net_device *dev; struct sock *sk = gs->sock->sk; + struct net *net = sock_net(sk); sa_family_t sa_family = sk->sk_family; + __be16 port = inet_sk(sk)->inet_sport; + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + if (dev->netdev_ops->ndo_del_geneve_port) + dev->netdev_ops->ndo_del_geneve_port(dev, sa_family, + port); + } + + rcu_read_unlock(); if (sa_family == AF_INET) udp_del_offload(&gs->udp_offloads); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9fb6395967de..81b26a543a3c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1013,6 +1013,19 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * a new port starts listening. The operation is protected by the * vxlan_net->sock_lock. * + * void (*ndo_add_geneve_port)(struct net_device *dev, + * sa_family_t sa_family, __be16 port); + * Called by geneve to notify a driver about the UDP port and socket + * address family that geneve is listnening to. It is called only when + * a new port starts listening. The operation is protected by the + * geneve_net->sock_lock. + * + * void (*ndo_del_geneve_port)(struct net_device *dev, + * sa_family_t sa_family, __be16 port); + * Called by geneve to notify the driver about a UDP port and socket + * address family that geneve is not listening to anymore. The operation + * is protected by the geneve_net->sock_lock. + * * void (*ndo_del_vxlan_port)(struct net_device *dev, * sa_family_t sa_family, __be16 port); * Called by vxlan to notify the driver about a UDP port and socket @@ -1217,7 +1230,12 @@ struct net_device_ops { void (*ndo_del_vxlan_port)(struct net_device *dev, sa_family_t sa_family, __be16 port); - + void (*ndo_add_geneve_port)(struct net_device *dev, + sa_family_t sa_family, + __be16 port); + void (*ndo_del_geneve_port)(struct net_device *dev, + sa_family_t sa_family, + __be16 port); void* (*ndo_dfwd_add_station)(struct net_device *pdev, struct net_device *dev); void (*ndo_dfwd_del_station)(struct net_device *pdev, -- cgit v1.2.3-71-gd317 From 7b8002a1511fcbcb0596cac90d67ad5c8182d0aa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Dec 2015 18:41:56 +0100 Subject: netfilter: nfnetlink: pass down netns pointer to call() and call_rcu() Adapt callsites to avoid recurrent lookup of the netns pointer. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 8 +-- net/netfilter/ipset/ip_set_core.c | 108 +++++++++++++++-------------------- net/netfilter/nf_conntrack_netlink.c | 96 ++++++++++++++----------------- net/netfilter/nf_tables_api.c | 30 ++++------ net/netfilter/nfnetlink.c | 6 +- net/netfilter/nfnetlink_acct.c | 21 +++---- net/netfilter/nfnetlink_cthelper.c | 18 +++--- net/netfilter/nfnetlink_cttimeout.c | 42 ++++++-------- net/netfilter/nfnetlink_log.c | 15 ++--- net/netfilter/nfnetlink_queue.c | 36 +++++------- net/netfilter/nft_compat.c | 6 +- net/netfilter/xt_osf.c | 7 ++- 12 files changed, 173 insertions(+), 220 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 5646b24bfc64..ceacbf5dcb73 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -8,12 +8,12 @@ #include struct nfnl_callback { - int (*call)(struct sock *nl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]); - int (*call_rcu)(struct sock *nl, struct sk_buff *skb, + int (*call)(struct net *net, struct sock *nl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]); + int (*call_rcu)(struct net *net, struct sock *nl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]); int (*call_batch)(struct net *net, struct sock *nl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 54f3d7cb23e6..95db43fc0303 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -825,20 +825,17 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, return 0; } -static int -ip_set_none(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_none(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { return -EOPNOTSUPP; } -static int -ip_set_create(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_create(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct net *net = sock_net(ctnl); struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *clash = NULL; ip_set_id_t index = IPSET_INVALID_ID; @@ -976,12 +973,11 @@ ip_set_destroy_set(struct ip_set *set) kfree(set); } -static int -ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_destroy(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *s; ip_set_id_t i; int ret = 0; @@ -1052,12 +1048,11 @@ ip_set_flush_set(struct ip_set *set) spin_unlock_bh(&set->lock); } -static int -ip_set_flush(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *s; ip_set_id_t i; @@ -1092,12 +1087,11 @@ ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = { .len = IPSET_MAXNAMELEN - 1 }, }; -static int -ip_set_rename(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_rename(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *s; const char *name2; ip_set_id_t i; @@ -1142,12 +1136,11 @@ out: * so the ip_set_list always contains valid pointers to the sets. */ -static int -ip_set_swap(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *from, *to; ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; @@ -1413,10 +1406,9 @@ out: return ret < 0 ? ret : skb->len; } -static int -ip_set_dump(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; @@ -1500,12 +1492,11 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, return ret; } -static int -ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; const struct nlattr *nla; @@ -1555,12 +1546,11 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, return ret; } -static int -ip_set_udel(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; const struct nlattr *nla; @@ -1610,12 +1600,11 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, return ret; } -static int -ip_set_utest(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; int ret = 0; @@ -1646,12 +1635,11 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, /* Get headed data of a set */ -static int -ip_set_header(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_header(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); const struct ip_set *set; struct sk_buff *skb2; struct nlmsghdr *nlh2; @@ -1703,10 +1691,9 @@ static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = { [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, }; -static int -ip_set_type(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { struct sk_buff *skb2; struct nlmsghdr *nlh2; @@ -1762,10 +1749,9 @@ ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = { [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, }; -static int -ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_protocol(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { struct sk_buff *skb2; struct nlmsghdr *nlh2; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9f5272968abb..dbb1bb3edb45 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1113,12 +1113,11 @@ static int ctnetlink_flush_conntrack(struct net *net, return 0; } -static int -ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; struct nf_conn *ct; @@ -1168,12 +1167,11 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, return 0; } -static int -ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; struct nf_conn *ct; @@ -1330,10 +1328,10 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) return ctnetlink_dump_list(skb, cb, true); } -static int -ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_get_ct_dying(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -1352,10 +1350,10 @@ ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) return ctnetlink_dump_list(skb, cb, false); } -static int -ctnetlink_get_ct_unconfirmed(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_get_ct_unconfirmed(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -1865,12 +1863,11 @@ err1: return ERR_PTR(err); } -static int -ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -2034,10 +2031,10 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static int -ctnetlink_stat_ct_cpu(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_stat_ct_cpu(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -2080,10 +2077,9 @@ nlmsg_failure: return -1; } -static int -ctnetlink_stat_ct(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_stat_ct(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { struct sk_buff *skb2; int err; @@ -2729,12 +2725,12 @@ out: return skb->len; } -static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb, +static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { int err; - struct net *net = sock_net(ctnl); struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; struct nf_conntrack_tuple tuple; @@ -2768,12 +2764,10 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb, return err; } -static int -ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_get_expect(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; struct sk_buff *skb2; @@ -2784,7 +2778,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_DUMP) { if (cda[CTA_EXPECT_MASTER]) - return ctnetlink_dump_exp_ct(ctnl, skb, nlh, cda); + return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda); else { struct netlink_dump_control c = { .dump = ctnetlink_exp_dump_table, @@ -2850,12 +2844,10 @@ out: return err == -EAGAIN ? -ENOBUFS : err; } -static int -ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -3136,12 +3128,10 @@ err_ct: return err; } -static int -ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_new_expect(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -3242,10 +3232,10 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static int -ctnetlink_stat_exp_cpu(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_stat_exp_cpu(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 28cbc457f1f3..69cb5be9a174 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -543,15 +543,14 @@ done: return skb->len; } -static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_gettable(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; const struct nft_table *table; struct sk_buff *skb2; - struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -1097,8 +1096,8 @@ done: return skb->len; } -static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_getchain(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -1106,7 +1105,6 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, const struct nft_table *table; const struct nft_chain *chain; struct sk_buff *skb2; - struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -1923,8 +1921,8 @@ done: return skb->len; } -static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_getrule(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -1933,7 +1931,6 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, const struct nft_chain *chain; const struct nft_rule *rule; struct sk_buff *skb2; - struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -2604,11 +2601,10 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb) return 0; } -static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_getset(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { - struct net *net = sock_net(skb->sk); const struct nft_set *set; struct nft_ctx ctx; struct sk_buff *skb2; @@ -3190,11 +3186,10 @@ nla_put_failure: return -ENOSPC; } -static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { - struct net *net = sock_net(skb->sk); const struct nft_set *set; struct nft_ctx ctx; int err; @@ -3723,11 +3718,10 @@ err: return err; } -static int nf_tables_getgen(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_getgen(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { - struct net *net = sock_net(skb->sk); struct sk_buff *skb2; int err; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 9ed453465167..7012154b28ca 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -206,7 +206,7 @@ replay: } if (nc->call_rcu) { - err = nc->call_rcu(net->nfnl, skb, nlh, + err = nc->call_rcu(net, net->nfnl, skb, nlh, (const struct nlattr **)cda); rcu_read_unlock(); } else { @@ -216,8 +216,8 @@ replay: nfnetlink_find_client(type, ss) != nc) err = -EAGAIN; else if (nc->call) - err = nc->call(net->nfnl, skb, nlh, - (const struct nlattr **)cda); + err = nc->call(net, net->nfnl, skb, nlh, + (const struct nlattr **)cda); else err = -EINVAL; nfnl_unlock(subsys_id); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index fefbf5f0b28d..5274b04c42a6 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -46,12 +46,11 @@ struct nfacct_filter { #define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES) #define NFACCT_OVERQUOTA_BIT 2 /* NFACCT_F_OVERQUOTA */ -static int -nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_acct_new(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { struct nf_acct *nfacct, *matching = NULL; - struct net *net = sock_net(nfnl); char *acct_name; unsigned int size = 0; u32 flags = 0; @@ -253,11 +252,10 @@ nfacct_filter_alloc(const struct nlattr * const attr) return filter; } -static int -nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_acct_get(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { - struct net *net = sock_net(nfnl); int ret = -ENOENT; struct nf_acct *cur; char *acct_name; @@ -333,11 +331,10 @@ static int nfnl_acct_try_del(struct nf_acct *cur) return ret; } -static int -nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_acct_del(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { - struct net *net = sock_net(nfnl); char *acct_name; struct nf_acct *cur; int ret = -ENOENT; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 54330fb5efaf..e924e95fcc7f 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -286,9 +286,9 @@ nfnl_cthelper_update(const struct nlattr * const tb[], return 0; } -static int -nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { const char *helper_name; struct nf_conntrack_helper *cur, *helper = NULL; @@ -498,9 +498,9 @@ out: return skb->len; } -static int -nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { int ret = -ENOENT, i; struct nf_conntrack_helper *cur; @@ -570,9 +570,9 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, return ret; } -static int -nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { char *helper_name = NULL; struct nf_conntrack_helper *cur; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 3921d544f5ba..5d010f27ac01 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -65,16 +65,15 @@ ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto, return ret; } -static int -cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { __u16 l3num; __u8 l4num; struct nf_conntrack_l4proto *l4proto; struct ctnl_timeout *timeout, *matching = NULL; - struct net *net = sock_net(skb->sk); char *name; int ret; @@ -239,12 +238,11 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static int -cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int cttimeout_get_timeout(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(skb->sk); int ret = -ENOENT; char *name; struct ctnl_timeout *cur; @@ -339,15 +337,14 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) return ret; } -static int -cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int cttimeout_del_timeout(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(skb->sk); - char *name; struct ctnl_timeout *cur; int ret = -ENOENT; + char *name; if (!cda[CTA_TIMEOUT_NAME]) { list_for_each_entry(cur, &net->nfct_timeout_list, head) @@ -370,15 +367,14 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, return ret; } -static int -cttimeout_default_set(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int cttimeout_default_set(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { __u16 l3num; __u8 l4num; struct nf_conntrack_l4proto *l4proto; - struct net *net = sock_net(skb->sk); unsigned int *timeouts; int ret; @@ -460,14 +456,14 @@ nla_put_failure: return -1; } -static int cttimeout_default_get(struct sock *ctnl, struct sk_buff *skb, +static int cttimeout_default_get(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { __u16 l3num; __u8 l4num; struct nf_conntrack_l4proto *l4proto; - struct net *net = sock_net(skb->sk); struct sk_buff *skb2; int ret, err; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 70b6bd3b781e..6a57f10a4e0b 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -785,10 +785,9 @@ static struct notifier_block nfulnl_rtnl_notifier = { .notifier_call = nfulnl_rcv_nl_event, }; -static int -nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) +static int nfulnl_recv_unsupp(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { return -ENOTSUPP; } @@ -809,16 +808,14 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = { [NFULA_CFG_FLAGS] = { .type = NLA_U16 }, }; -static int -nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfula[]) +static int nfulnl_recv_config(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nfula[]) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int16_t group_num = ntohs(nfmsg->res_id); struct nfulnl_instance *inst; struct nfulnl_msg_config_cmd *cmd = NULL; - struct net *net = sock_net(ctnl); struct nfnl_log_net *log = nfnl_log_pernet(net); int ret = 0; u16 flags = 0; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 861c6615253b..3d1f16cf5cd0 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -957,10 +957,10 @@ static int nfq_id_after(unsigned int id, unsigned int max) return (int)(id - max) > 0; } -static int -nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) +static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nf_queue_entry *entry, *tmp; @@ -969,8 +969,6 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, struct nfqnl_instance *queue; LIST_HEAD(batch_list); u16 queue_num = ntohs(nfmsg->res_id); - - struct net *net = sock_net(ctnl); struct nfnl_queue_net *q = nfnl_queue_pernet(net); queue = verdict_instance_lookup(q, queue_num, @@ -1029,14 +1027,13 @@ static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct, return ct; } -static int -nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) +static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); - struct nfqnl_msg_verdict_hdr *vhdr; struct nfqnl_instance *queue; unsigned int verdict; @@ -1044,8 +1041,6 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, enum ip_conntrack_info uninitialized_var(ctinfo); struct nfnl_ct_hook *nfnl_ct; struct nf_conn *ct = NULL; - - struct net *net = sock_net(ctnl); struct nfnl_queue_net *q = nfnl_queue_pernet(net); queue = instance_lookup(q, queue_num); @@ -1092,10 +1087,9 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, return 0; } -static int -nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) +static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { return -ENOTSUPP; } @@ -1110,16 +1104,14 @@ static const struct nf_queue_handler nfqh = { .nf_hook_drop = &nfqnl_nf_hook_drop, }; -static int -nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) +static int nfqnl_recv_config(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); struct nfqnl_instance *queue; struct nfqnl_msg_config_cmd *cmd = NULL; - struct net *net = sock_net(ctnl); struct nfnl_queue_net *q = nfnl_queue_pernet(net); int ret = 0; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 9c8fab00164b..454841baa4d0 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -519,9 +519,9 @@ nla_put_failure: return -1; } -static int -nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_compat_get(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { int ret = 0, target; struct nfgenmsg *nfmsg; diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index df8801e02a32..4e3c3affd285 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -61,8 +61,8 @@ static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = { [OSF_ATTR_FINGER] = { .len = sizeof(struct xt_osf_user_finger) }, }; -static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int xt_osf_add_callback(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const osf_attrs[]) { struct xt_osf_user_finger *f; @@ -104,7 +104,8 @@ static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb, return err; } -static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb, +static int xt_osf_remove_callback(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const osf_attrs[]) { -- cgit v1.2.3-71-gd317 From 5913beaf0d70f97135ed7191c028fd88b3848864 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Dec 2015 19:41:57 +0100 Subject: netfilter: nfnetlink: pass down netns pointer to commit() and abort() callbacks Adapt callsites to avoid recurrent lookup of the netns pointer. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 4 ++-- net/netfilter/nf_tables_api.c | 6 ++---- net/netfilter/nfnetlink.c | 6 +++--- 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index ceacbf5dcb73..ba0d9789eb6e 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -26,8 +26,8 @@ struct nfnetlink_subsystem { __u8 subsys_id; /* nfnetlink subsystem ID */ __u8 cb_count; /* number of callbacks */ const struct nfnl_callback *cb; /* callback for individual types */ - int (*commit)(struct sk_buff *skb); - int (*abort)(struct sk_buff *skb); + int (*commit)(struct net *net, struct sk_buff *skb); + int (*abort)(struct net *net, struct sk_buff *skb); }; int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 69cb5be9a174..f5c397158e29 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3865,9 +3865,8 @@ static void nf_tables_commit_release(struct nft_trans *trans) kfree(trans); } -static int nf_tables_commit(struct sk_buff *skb) +static int nf_tables_commit(struct net *net, struct sk_buff *skb) { - struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; struct nft_trans_elem *te; @@ -4002,9 +4001,8 @@ static void nf_tables_abort_release(struct nft_trans *trans) kfree(trans); } -static int nf_tables_abort(struct sk_buff *skb) +static int nf_tables_abort(struct net *net, struct sk_buff *skb) { - struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; struct nft_trans_elem *te; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 7012154b28ca..a7ba23353dab 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -425,15 +425,15 @@ next: } done: if (status & NFNL_BATCH_REPLAY) { - ss->abort(oskb); + ss->abort(net, oskb); nfnl_err_reset(&err_list); nfnl_unlock(subsys_id); kfree_skb(skb); goto replay; } else if (status == NFNL_BATCH_DONE) { - ss->commit(oskb); + ss->commit(net, oskb); } else { - ss->abort(oskb); + ss->abort(net, oskb); } nfnl_err_deliver(&err_list, oskb); -- cgit v1.2.3-71-gd317 From 7be4fb643ef2d1058b897ba9dbe17bf5ced04391 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Wed, 23 Dec 2015 23:45:24 +0100 Subject: nfc: microread: Fix header comment microread platform_data header had an NXP header. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/linux/platform_data/microread.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/microread.h b/include/linux/platform_data/microread.h index cfda59b226ee..ca13992089b8 100644 --- a/include/linux/platform_data/microread.h +++ b/include/linux/platform_data/microread.h @@ -1,5 +1,5 @@ /* - * Driver include for the PN544 NFC chip. + * Driver include for the Inside Secure microread NFC Chip. * * Copyright (C) 2011 Tieto Poland * Copyright (C) 2012 Intel Corporation. All rights reserved. -- cgit v1.2.3-71-gd317 From f3a4094558ddf8afa8bb58250d548e15e059c65a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 30 Dec 2015 16:28:25 +0100 Subject: ethtool: Add phy statistics Ethernet PHYs can maintain statistics, for example errors while idle and receive errors. Add an ethtool mechanism to retrieve these statistics, using the same model as MAC statistics. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 6 ++++ include/uapi/linux/ethtool.h | 3 ++ net/core/ethtool.c | 81 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 89 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 05fde31b6dc6..a89cb0eef911 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -589,6 +589,12 @@ struct phy_driver { int (*module_eeprom)(struct phy_device *dev, struct ethtool_eeprom *ee, u8 *data); + /* Get statistics from the phy using ethtool */ + int (*get_sset_count)(struct phy_device *dev); + void (*get_strings)(struct phy_device *dev, u8 *data); + void (*get_stats)(struct phy_device *dev, + struct ethtool_stats *stats, u64 *data); + struct device_driver driver; }; #define to_phy_driver(d) container_of(d, struct phy_driver, driver) diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index cd1629170103..57fa39005e79 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -542,6 +542,7 @@ struct ethtool_pauseparam { * now deprecated * @ETH_SS_FEATURES: Device feature names * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names + * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS */ enum ethtool_stringset { ETH_SS_TEST = 0, @@ -551,6 +552,7 @@ enum ethtool_stringset { ETH_SS_FEATURES, ETH_SS_RSS_HASH_FUNCS, ETH_SS_TUNABLES, + ETH_SS_PHY_STATS, }; /** @@ -1225,6 +1227,7 @@ enum ethtool_sfeatures_retval_bits { #define ETHTOOL_SRSSH 0x00000047 /* Set RX flow hash configuration */ #define ETHTOOL_GTUNABLE 0x00000048 /* Get tunable configuration */ #define ETHTOOL_STUNABLE 0x00000049 /* Set tunable configuration */ +#define ETHTOOL_GPHYSTATS 0x0000004a /* get PHY-specific statistics */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 09948a726347..daf04709dd3c 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -191,6 +191,23 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr) return ret; } +static int phy_get_sset_count(struct phy_device *phydev) +{ + int ret; + + if (phydev->drv->get_sset_count && + phydev->drv->get_strings && + phydev->drv->get_stats) { + mutex_lock(&phydev->lock); + ret = phydev->drv->get_sset_count(phydev); + mutex_unlock(&phydev->lock); + + return ret; + } + + return -EOPNOTSUPP; +} + static int __ethtool_get_sset_count(struct net_device *dev, int sset) { const struct ethtool_ops *ops = dev->ethtool_ops; @@ -204,6 +221,13 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset) if (sset == ETH_SS_TUNABLES) return ARRAY_SIZE(tunable_strings); + if (sset == ETH_SS_PHY_STATS) { + if (dev->phydev) + return phy_get_sset_count(dev->phydev); + else + return -EOPNOTSUPP; + } + if (ops->get_sset_count && ops->get_strings) return ops->get_sset_count(dev, sset); else @@ -223,7 +247,17 @@ static void __ethtool_get_strings(struct net_device *dev, sizeof(rss_hash_func_strings)); else if (stringset == ETH_SS_TUNABLES) memcpy(data, tunable_strings, sizeof(tunable_strings)); - else + else if (stringset == ETH_SS_PHY_STATS) { + struct phy_device *phydev = dev->phydev; + + if (phydev) { + mutex_lock(&phydev->lock); + phydev->drv->get_strings(phydev, data); + mutex_unlock(&phydev->lock); + } else { + return; + } + } else /* ops->get_strings is valid because checked earlier */ ops->get_strings(dev, stringset, data); } @@ -1401,6 +1435,47 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) return ret; } +static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_stats stats; + struct phy_device *phydev = dev->phydev; + u64 *data; + int ret, n_stats; + + if (!phydev) + return -EOPNOTSUPP; + + n_stats = phy_get_sset_count(phydev); + + if (n_stats < 0) + return n_stats; + WARN_ON(n_stats == 0); + + if (copy_from_user(&stats, useraddr, sizeof(stats))) + return -EFAULT; + + stats.n_stats = n_stats; + data = kmalloc_array(n_stats, sizeof(u64), GFP_USER); + if (!data) + return -ENOMEM; + + mutex_lock(&phydev->lock); + phydev->drv->get_stats(phydev, &stats, data); + mutex_unlock(&phydev->lock); + + ret = -EFAULT; + if (copy_to_user(useraddr, &stats, sizeof(stats))) + goto out; + useraddr += sizeof(stats); + if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64))) + goto out; + ret = 0; + + out: + kfree(data); + return ret; +} + static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr) { struct ethtool_perm_addr epaddr; @@ -1779,6 +1854,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GSSET_INFO: case ETHTOOL_GSTRINGS: case ETHTOOL_GSTATS: + case ETHTOOL_GPHYSTATS: case ETHTOOL_GTSO: case ETHTOOL_GPERMADDR: case ETHTOOL_GUFO: @@ -1991,6 +2067,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_STUNABLE: rc = ethtool_set_tunable(dev, useraddr); break; + case ETHTOOL_GPHYSTATS: + rc = ethtool_get_phy_stats(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } -- cgit v1.2.3-71-gd317 From 888cc8c20cf265fcd1302f6c5d6be07628ba66c7 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Mon, 28 Dec 2015 02:07:08 +0300 Subject: sh_eth: remove EDMAC_BIG_ENDIAN Commit 71557a37adb5 ("[netdrvr] sh_eth: Add SH7619 support") added support for the big-endian EDMAC descriptors. However, it was never used and never worked right until the recent driver fixes. I think we now can just remove this support, it was only burdening the driver from the start. It should be easy to do without disturbing the SH platform code, at least for now... Signed-off-by: Sergei Shtylyov Acked-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 18 ++---------------- drivers/net/ethernet/renesas/sh_eth.h | 1 - include/linux/sh_eth.h | 2 +- 3 files changed, 3 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 8ca040113a09..2c7dd8a68d84 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -971,24 +971,12 @@ static void sh_eth_set_receive_align(struct sk_buff *skb) /* CPU <-> EDMAC endian convert */ static inline __u32 cpu_to_edmac(struct sh_eth_private *mdp, u32 x) { - switch (mdp->edmac_endian) { - case EDMAC_LITTLE_ENDIAN: - return cpu_to_le32(x); - case EDMAC_BIG_ENDIAN: - return cpu_to_be32(x); - } - return x; + return cpu_to_le32(x); } static inline __u32 edmac_to_cpu(struct sh_eth_private *mdp, u32 x) { - switch (mdp->edmac_endian) { - case EDMAC_LITTLE_ENDIAN: - return le32_to_cpu(x); - case EDMAC_BIG_ENDIAN: - return be32_to_cpu(x); - } - return x; + return le32_to_cpu(x); } /* Program the hardware MAC address from dev->dev_addr. */ @@ -3097,8 +3085,6 @@ static int sh_eth_drv_probe(struct platform_device *pdev) /* get PHY ID */ mdp->phy_id = pd->phy; mdp->phy_interface = pd->phy_interface; - /* EDMAC endian */ - mdp->edmac_endian = pd->edmac_endian; mdp->no_ether_link = pd->no_ether_link; mdp->ether_link_active_low = pd->ether_link_active_low; diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h index 72fcfc924589..8fa4ef3a7fdd 100644 --- a/drivers/net/ethernet/renesas/sh_eth.h +++ b/drivers/net/ethernet/renesas/sh_eth.h @@ -513,7 +513,6 @@ struct sh_eth_private { u32 cur_rx, dirty_rx; /* Producer/consumer ring indices */ u32 cur_tx, dirty_tx; u32 rx_buf_sz; /* Based on MTU+slack. */ - int edmac_endian; struct napi_struct napi; bool irq_enabled; /* MII transceiver section. */ diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h index 8c9131db2b25..f2e27e078362 100644 --- a/include/linux/sh_eth.h +++ b/include/linux/sh_eth.h @@ -4,7 +4,7 @@ #include #include -enum {EDMAC_LITTLE_ENDIAN, EDMAC_BIG_ENDIAN}; +enum {EDMAC_LITTLE_ENDIAN}; struct sh_eth_plat_data { int phy; -- cgit v1.2.3-71-gd317 From 538950a1b7527a0a52ccd9337e3fcd304f027f13 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 4 Jan 2016 17:41:47 -0500 Subject: soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF Expose socket options for setting a classic or extended BPF program for use when selecting sockets in an SO_REUSEPORT group. These options can be used on the first socket to belong to a group before bind or on any socket in the group after bind. This change includes refactoring of the existing sk_filter code to allow reuse of the existing BPF filter validation checks. Signed-off-by: Craig Gallek Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 3 + arch/avr32/include/uapi/asm/socket.h | 3 + arch/frv/include/uapi/asm/socket.h | 3 + arch/ia64/include/uapi/asm/socket.h | 3 + arch/m32r/include/uapi/asm/socket.h | 3 + arch/mips/include/uapi/asm/socket.h | 3 + arch/mn10300/include/uapi/asm/socket.h | 3 + arch/parisc/include/uapi/asm/socket.h | 3 + arch/powerpc/include/uapi/asm/socket.h | 3 + arch/s390/include/uapi/asm/socket.h | 3 + arch/sparc/include/uapi/asm/socket.h | 3 + arch/xtensa/include/uapi/asm/socket.h | 3 + include/linux/filter.h | 2 + include/net/sock_reuseport.h | 10 ++- include/net/udp.h | 5 +- include/uapi/asm-generic/socket.h | 3 + net/core/filter.c | 121 +++++++++++++++++++++++++++------ net/core/sock.c | 29 ++++++++ net/core/sock_reuseport.c | 88 ++++++++++++++++++++++-- net/ipv4/udp.c | 14 ++-- net/ipv4/udp_diag.c | 4 +- net/ipv6/udp.c | 14 ++-- 22 files changed, 282 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 9a20821b111c..c5fb9e6bc3a5 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -92,4 +92,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 2b65ed6b277c..9de0796240a0 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -85,4 +85,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _UAPI__ASM_AVR32_SOCKET_H */ diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index 4823ad125578..f02e4849ae83 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -85,5 +85,8 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index 59be3d87f86d..bce29166de1b 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -94,4 +94,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 7bc4cb273856..14aa4a6bccf1 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -85,4 +85,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index dec3c850f36b..5910fe294e93 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -103,4 +103,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index cab7d6d50051..58b1aa01ab9f 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -85,4 +85,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index a5cd40cd8ee1..f9cf1223422c 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -84,4 +84,7 @@ #define SO_ATTACH_BPF 0x402B #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 0x402C +#define SO_ATTACH_REUSEPORT_EBPF 0x402D + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index c046666038f8..dd54f28ecdec 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -92,4 +92,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 296942d56e6a..d02e89d14fef 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -91,4 +91,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index e6a16c40be5f..d270ee91968e 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -81,6 +81,9 @@ #define SO_ATTACH_BPF 0x0034 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 0x0035 +#define SO_ATTACH_REUSEPORT_EBPF 0x0036 + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index 4120af086160..fd3b96d1153f 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -96,4 +96,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 4165e9ac9e36..294c3cdf07b3 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -447,6 +447,8 @@ void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); +int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk); +int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h index 67d1eb8fd7af..7dda3d7adba8 100644 --- a/include/net/sock_reuseport.h +++ b/include/net/sock_reuseport.h @@ -1,6 +1,8 @@ #ifndef _SOCK_REUSEPORT_H #define _SOCK_REUSEPORT_H +#include +#include #include #include @@ -9,12 +11,18 @@ struct sock_reuseport { u16 max_socks; /* length of socks */ u16 num_socks; /* elements in socks */ + struct bpf_prog __rcu *prog; /* optional BPF sock selector */ struct sock *socks[0]; /* array of sock pointers */ }; extern int reuseport_alloc(struct sock *sk); extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2); extern void reuseport_detach_sock(struct sock *sk); -extern struct sock *reuseport_select_sock(struct sock *sk, u32 hash); +extern struct sock *reuseport_select_sock(struct sock *sk, + u32 hash, + struct sk_buff *skb, + int hdr_len); +extern struct bpf_prog *reuseport_attach_prog(struct sock *sk, + struct bpf_prog *prog); #endif /* _SOCK_REUSEPORT_H */ diff --git a/include/net/udp.h b/include/net/udp.h index 3b5d7f93bc23..2842541e28e7 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -258,7 +258,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, - struct udp_table *tbl); + struct udp_table *tbl, struct sk_buff *skb); struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, @@ -266,7 +266,8 @@ struct sock *udp6_lib_lookup(struct net *net, struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, - int dif, struct udp_table *tbl); + int dif, struct udp_table *tbl, + struct sk_buff *skb); /* * SNMP statistics for UDP and UDP-Lite diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 5c15c2a5c123..fb8a41668382 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -87,4 +87,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/net/core/filter.c b/net/core/filter.c index c770196ae8d5..35e6fed28709 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -50,6 +50,7 @@ #include #include #include +#include /** * sk_filter - run a packet through a socket filter @@ -1167,17 +1168,32 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) return 0; } -/** - * sk_attach_filter - attach a socket filter - * @fprog: the filter program - * @sk: the socket to use - * - * Attach the user's filter code. We first run some sanity checks on - * it to make sure it does not explode on us later. If an error - * occurs or there is insufficient memory for the filter a negative - * errno code is returned. On success the return is zero. - */ -int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk) +{ + struct bpf_prog *old_prog; + int err; + + if (bpf_prog_size(prog->len) > sysctl_optmem_max) + return -ENOMEM; + + if (sk_unhashed(sk)) { + err = reuseport_alloc(sk); + if (err) + return err; + } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) { + /* The socket wasn't bound with SO_REUSEPORT */ + return -EINVAL; + } + + old_prog = reuseport_attach_prog(sk, prog); + if (old_prog) + bpf_prog_destroy(old_prog); + + return 0; +} + +static +struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk) { unsigned int fsize = bpf_classic_proglen(fprog); unsigned int bpf_fsize = bpf_prog_size(fprog->len); @@ -1185,19 +1201,19 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) int err; if (sock_flag(sk, SOCK_FILTER_LOCKED)) - return -EPERM; + return ERR_PTR(-EPERM); /* Make sure new filter is there and in the right amounts. */ if (fprog->filter == NULL) - return -EINVAL; + return ERR_PTR(-EINVAL); prog = bpf_prog_alloc(bpf_fsize, 0); if (!prog) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (copy_from_user(prog->insns, fprog->filter, fsize)) { __bpf_prog_free(prog); - return -EFAULT; + return ERR_PTR(-EFAULT); } prog->len = fprog->len; @@ -1205,13 +1221,30 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) err = bpf_prog_store_orig_filter(prog, fprog); if (err) { __bpf_prog_free(prog); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - prog = bpf_prepare_filter(prog, NULL); + return bpf_prepare_filter(prog, NULL); +} + +/** + * sk_attach_filter - attach a socket filter + * @fprog: the filter program + * @sk: the socket to use + * + * Attach the user's filter code. We first run some sanity checks on + * it to make sure it does not explode on us later. If an error + * occurs or there is insufficient memory for the filter a negative + * errno code is returned. On success the return is zero. + */ +int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +{ + struct bpf_prog *prog = __get_filter(fprog, sk); + int err; + if (IS_ERR(prog)) return PTR_ERR(prog); @@ -1225,23 +1258,50 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) } EXPORT_SYMBOL_GPL(sk_attach_filter); -int sk_attach_bpf(u32 ufd, struct sock *sk) +int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) { - struct bpf_prog *prog; + struct bpf_prog *prog = __get_filter(fprog, sk); int err; + if (IS_ERR(prog)) + return PTR_ERR(prog); + + err = __reuseport_attach_prog(prog, sk); + if (err < 0) { + __bpf_prog_release(prog); + return err; + } + + return 0; +} + +static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk) +{ + struct bpf_prog *prog; + if (sock_flag(sk, SOCK_FILTER_LOCKED)) - return -EPERM; + return ERR_PTR(-EPERM); prog = bpf_prog_get(ufd); if (IS_ERR(prog)) - return PTR_ERR(prog); + return prog; if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { bpf_prog_put(prog); - return -EINVAL; + return ERR_PTR(-EINVAL); } + return prog; +} + +int sk_attach_bpf(u32 ufd, struct sock *sk) +{ + struct bpf_prog *prog = __get_bpf(ufd, sk); + int err; + + if (IS_ERR(prog)) + return PTR_ERR(prog); + err = __sk_attach_prog(prog, sk); if (err < 0) { bpf_prog_put(prog); @@ -1251,6 +1311,23 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) return 0; } +int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) +{ + struct bpf_prog *prog = __get_bpf(ufd, sk); + int err; + + if (IS_ERR(prog)) + return PTR_ERR(prog); + + err = __reuseport_attach_prog(prog, sk); + if (err < 0) { + bpf_prog_put(prog); + return err; + } + + return 0; +} + #define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) #define BPF_LDST_LEN 16U diff --git a/net/core/sock.c b/net/core/sock.c index 565bab7baca9..51270238e269 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -134,6 +134,7 @@ #include #include +#include #include @@ -932,6 +933,32 @@ set_rcvbuf: } break; + case SO_ATTACH_REUSEPORT_CBPF: + ret = -EINVAL; + if (optlen == sizeof(struct sock_fprog)) { + struct sock_fprog fprog; + + ret = -EFAULT; + if (copy_from_user(&fprog, optval, sizeof(fprog))) + break; + + ret = sk_reuseport_attach_filter(&fprog, sk); + } + break; + + case SO_ATTACH_REUSEPORT_EBPF: + ret = -EINVAL; + if (optlen == sizeof(u32)) { + u32 ufd; + + ret = -EFAULT; + if (copy_from_user(&ufd, optval, sizeof(ufd))) + break; + + ret = sk_reuseport_attach_bpf(ufd, sk); + } + break; + case SO_DETACH_FILTER: ret = sk_detach_filter(sk); break; @@ -1443,6 +1470,8 @@ void sk_destruct(struct sock *sk) sk_filter_uncharge(sk, filter); RCU_INIT_POINTER(sk->sk_filter, NULL); } + if (rcu_access_pointer(sk->sk_reuseport_cb)) + reuseport_detach_sock(sk); sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 963c8d5f3027..ae0969c0fc2e 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -1,10 +1,12 @@ /* * To speed up listener socket lookup, create an array to store all sockets * listening on the same port. This allows a decision to be made after finding - * the first socket. + * the first socket. An optional BPF program can also be configured for + * selecting the socket index from the array of available sockets. */ #include +#include #include #define INIT_SOCKS 128 @@ -22,6 +24,7 @@ static struct sock_reuseport *__reuseport_alloc(u16 max_socks) reuse->max_socks = max_socks; + RCU_INIT_POINTER(reuse->prog, NULL); return reuse; } @@ -67,6 +70,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) more_reuse->max_socks = more_socks_size; more_reuse->num_socks = reuse->num_socks; + more_reuse->prog = reuse->prog; memcpy(more_reuse->socks, reuse->socks, reuse->num_socks * sizeof(struct sock *)); @@ -75,6 +79,10 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb, more_reuse); + /* Note: we use kfree_rcu here instead of reuseport_free_rcu so + * that reuse and more_reuse can temporarily share a reference + * to prog. + */ kfree_rcu(reuse, rcu); return more_reuse; } @@ -116,6 +124,16 @@ int reuseport_add_sock(struct sock *sk, const struct sock *sk2) } EXPORT_SYMBOL(reuseport_add_sock); +static void reuseport_free_rcu(struct rcu_head *head) +{ + struct sock_reuseport *reuse; + + reuse = container_of(head, struct sock_reuseport, rcu); + if (reuse->prog) + bpf_prog_destroy(reuse->prog); + kfree(reuse); +} + void reuseport_detach_sock(struct sock *sk) { struct sock_reuseport *reuse; @@ -131,7 +149,7 @@ void reuseport_detach_sock(struct sock *sk) reuse->socks[i] = reuse->socks[reuse->num_socks - 1]; reuse->num_socks--; if (reuse->num_socks == 0) - kfree_rcu(reuse, rcu); + call_rcu(&reuse->rcu, reuseport_free_rcu); break; } } @@ -139,15 +157,53 @@ void reuseport_detach_sock(struct sock *sk) } EXPORT_SYMBOL(reuseport_detach_sock); +static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks, + struct bpf_prog *prog, struct sk_buff *skb, + int hdr_len) +{ + struct sk_buff *nskb = NULL; + u32 index; + + if (skb_shared(skb)) { + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return NULL; + skb = nskb; + } + + /* temporarily advance data past protocol header */ + if (!pskb_pull(skb, hdr_len)) { + consume_skb(nskb); + return NULL; + } + index = bpf_prog_run_save_cb(prog, skb); + __skb_push(skb, hdr_len); + + consume_skb(nskb); + + if (index >= socks) + return NULL; + + return reuse->socks[index]; +} + /** * reuseport_select_sock - Select a socket from an SO_REUSEPORT group. * @sk: First socket in the group. - * @hash: Use this hash to select. + * @hash: When no BPF filter is available, use this hash to select. + * @skb: skb to run through BPF filter. + * @hdr_len: BPF filter expects skb data pointer at payload data. If + * the skb does not yet point at the payload, this parameter represents + * how far the pointer needs to advance to reach the payload. * Returns a socket that should receive the packet (or NULL on error). */ -struct sock *reuseport_select_sock(struct sock *sk, u32 hash) +struct sock *reuseport_select_sock(struct sock *sk, + u32 hash, + struct sk_buff *skb, + int hdr_len) { struct sock_reuseport *reuse; + struct bpf_prog *prog; struct sock *sk2 = NULL; u16 socks; @@ -158,12 +214,16 @@ struct sock *reuseport_select_sock(struct sock *sk, u32 hash) if (!reuse) goto out; + prog = rcu_dereference(reuse->prog); socks = READ_ONCE(reuse->num_socks); if (likely(socks)) { /* paired with smp_wmb() in reuseport_add_sock() */ smp_rmb(); - sk2 = reuse->socks[reciprocal_scale(hash, socks)]; + if (prog && skb) + sk2 = run_bpf(reuse, socks, prog, skb, hdr_len); + else + sk2 = reuse->socks[reciprocal_scale(hash, socks)]; } out: @@ -171,3 +231,21 @@ out: return sk2; } EXPORT_SYMBOL(reuseport_select_sock); + +struct bpf_prog * +reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog) +{ + struct sock_reuseport *reuse; + struct bpf_prog *old_prog; + + spin_lock_bh(&reuseport_lock); + reuse = rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)); + old_prog = rcu_dereference_protected(reuse->prog, + lockdep_is_held(&reuseport_lock)); + rcu_assign_pointer(reuse->prog, prog); + spin_unlock_bh(&reuseport_lock); + + return old_prog; +} +EXPORT_SYMBOL(reuseport_attach_prog); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 762b01f55707..835378365f25 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -514,7 +514,7 @@ begin: struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash); + sk2 = reuseport_select_sock(sk, hash, NULL, 0); if (sk2) { result = sk2; goto found; @@ -553,7 +553,7 @@ found: */ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, - int dif, struct udp_table *udptable) + int dif, struct udp_table *udptable, struct sk_buff *skb) { struct sock *sk, *result; struct hlist_nulls_node *node; @@ -602,7 +602,8 @@ begin: struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash); + sk2 = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); if (sk2) { result = sk2; goto found; @@ -647,14 +648,14 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, iph->daddr, dport, inet_iif(skb), - udptable); + udptable, skb); } struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, - &udp_table); + &udp_table, NULL); } EXPORT_SYMBOL_GPL(udp4_lib_lookup); @@ -702,7 +703,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) struct net *net = dev_net(skb->dev); sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, - iph->saddr, uh->source, skb->dev->ifindex, udptable); + iph->saddr, uh->source, skb->dev->ifindex, udptable, + NULL); if (!sk) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; /* No socket for error */ diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 6116604bf6e8..df1966f3b6ec 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -44,7 +44,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, sk = __udp4_lib_lookup(net, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_dst[0], req->id.idiag_dport, - req->id.idiag_if, tbl); + req->id.idiag_if, tbl, NULL); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) sk = __udp6_lib_lookup(net, @@ -52,7 +52,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, req->id.idiag_sport, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, - req->id.idiag_if, tbl); + req->id.idiag_if, tbl, NULL); #endif else goto out_nosk; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6204b8992de4..56fcb55fda31 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -272,7 +272,7 @@ begin: struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash); + sk2 = reuseport_select_sock(sk, hash, NULL, 0); if (sk2) { result = sk2; goto found; @@ -310,7 +310,8 @@ found: struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, - int dif, struct udp_table *udptable) + int dif, struct udp_table *udptable, + struct sk_buff *skb) { struct sock *sk, *result; struct hlist_nulls_node *node; @@ -358,7 +359,8 @@ begin: struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash); + sk2 = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); if (sk2) { result = sk2; goto found; @@ -407,13 +409,13 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, return sk; return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), - udptable); + udptable, skb); } struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif) { - return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); + return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL); } EXPORT_SYMBOL_GPL(udp6_lib_lookup); @@ -580,7 +582,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, - inet6_iif(skb), udptable); + inet6_iif(skb), udptable, skb); if (!sk) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); -- cgit v1.2.3-71-gd317 From b0844444590e18704644f707ea88bff1b976b0e7 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 29 Dec 2015 14:58:30 +0200 Subject: net/mlx5_core: Introduce access function to read internal timer A preparation step which adds support for reading the hardware internal timer and the hardware timestamping from the CQE. In addition, advertize device_frequency_khz HCA capability. Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 13 +++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 1 + include/linux/mlx5/device.h | 20 +++++++++++++++++--- include/linux/mlx5/mlx5_ifc.h | 6 +++--- 4 files changed, 34 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 789882b7b711..67676cf0d507 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -504,6 +504,19 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) return mlx5_cmd_status_to_err_v2(out); } +cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev) +{ + u32 timer_h, timer_h1, timer_l; + + timer_h = ioread32be(&dev->iseg->internal_timer_h); + timer_l = ioread32be(&dev->iseg->internal_timer_l); + timer_h1 = ioread32be(&dev->iseg->internal_timer_h); + if (timer_h != timer_h1) /* wrap around */ + timer_l = ioread32be(&dev->iseg->internal_timer_l); + + return (cycle_t)timer_l | (cycle_t)timer_h1 << 32; +} + static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) { struct mlx5_priv *priv = &mdev->priv; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index ea6a137fd76c..0336847ec9a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -98,6 +98,7 @@ int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); +cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev); void mlx5e_init(void); void mlx5e_cleanup(void); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 7d3a85faefb7..df2f79ef3cac 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -443,9 +443,12 @@ struct mlx5_init_seg { __be32 rsvd1[120]; __be32 initializing; struct health_buffer health; - __be32 rsvd2[884]; + __be32 rsvd2[880]; + __be32 internal_timer_h; + __be32 internal_timer_l; + __be32 rsrv3[2]; __be32 health_counter; - __be32 rsvd3[1019]; + __be32 rsvd4[1019]; __be64 ieee1588_clk; __be32 ieee1588_clk_type; __be32 clr_intx; @@ -601,7 +604,8 @@ struct mlx5_cqe64 { __be32 imm_inval_pkey; u8 rsvd40[4]; __be32 byte_cnt; - __be64 timestamp; + __be32 timestamp_h; + __be32 timestamp_l; __be32 sop_drop_qpn; __be16 wqe_counter; u8 signature; @@ -623,6 +627,16 @@ static inline int cqe_has_vlan(struct mlx5_cqe64 *cqe) return !!(cqe->l4_hdr_type_etc & 0x1); } +static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe) +{ + u32 hi, lo; + + hi = be32_to_cpu(cqe->timestamp_h); + lo = be32_to_cpu(cqe->timestamp_l); + + return (u64)lo | ((u64)hi << 32); +} + enum { CQE_L4_HDR_TYPE_NONE = 0x0, CQE_L4_HDR_TYPE_TCP_NO_ACK = 0x1, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 131a2737cfa3..1780a85a8797 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -829,9 +829,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_66[0x8]; u8 log_uar_page_sz[0x10]; - u8 reserved_67[0xe0]; - - u8 reserved_68[0x1f]; + u8 reserved_67[0x40]; + u8 device_frequency_khz[0x20]; + u8 reserved_68[0x5f]; u8 cqe_zip[0x1]; u8 cqe_zip_timeout[0x10]; -- cgit v1.2.3-71-gd317 From cdba756f5803a2f0a8bbc6605acc166dd817979e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Jan 2016 06:53:50 -0800 Subject: net: move ndo_features_check() close to ndo_start_xmit() TX fast path uses ndo_start_xmit(), ndo_features_check() and ndo_select_queue(). Move ndo_features_check() close to ndo_start_xmit() to increase data locality. All "struct net_device_ops" should now be using C99 initializers. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c20b814e46a0..8d8e5ca951b4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -812,6 +812,12 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) * Required can not be NULL. * + * netdev_features_t (*ndo_fix_features)(struct net_device *dev, + * netdev_features_t features); + * Adjusts the requested feature flags according to device-specific + * constraints, and returns the resulting flags. Must not modify + * the device state. + * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, * void *accel_priv, select_queue_fallback_t fallback); * Called to decide which queue to when device supports multiple @@ -959,12 +965,6 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * Called to release previously enslaved netdev. * * Feature/offload setting functions. - * netdev_features_t (*ndo_fix_features)(struct net_device *dev, - * netdev_features_t features); - * Adjusts the requested feature flags according to device-specific - * constraints, and returns the resulting flags. Must not modify - * the device state. - * * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); * Called to update device configuration to new features. Passed * feature set might be less than what was returned by ndo_fix_features()). @@ -1081,8 +1081,11 @@ struct net_device_ops { void (*ndo_uninit)(struct net_device *dev); int (*ndo_open)(struct net_device *dev); int (*ndo_stop)(struct net_device *dev); - netdev_tx_t (*ndo_start_xmit) (struct sk_buff *skb, - struct net_device *dev); + netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, + struct net_device *dev); + netdev_features_t (*ndo_features_check)(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features); u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, void *accel_priv, @@ -1245,9 +1248,6 @@ struct net_device_ops { struct net_device *dev, void *priv); int (*ndo_get_lock_subclass)(struct net_device *dev); - netdev_features_t (*ndo_features_check) (struct sk_buff *skb, - struct net_device *dev, - netdev_features_t features); int (*ndo_set_tx_maxrate)(struct net_device *dev, int queue_index, u32 maxrate); -- cgit v1.2.3-71-gd317 From c7f5d105495a38ed09e70d825f75d9d7d5407264 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 5 Nov 2015 11:34:57 -0500 Subject: net: Add eth_platform_get_mac_address() helper. A repeating pattern in drivers has become to use OF node information and, if not found, platform specific host information to extract the ethernet address for a given device. Currently this is done with a call to of_get_mac_address() and then some ifdef'd stuff for SPARC. Consolidate this into a portable routine, and provide the arch_get_platform_mac_address() weak function hook for all architectures to implement if they want. Signed-off-by: David S. Miller --- arch/sparc/kernel/idprom.c | 7 +++++++ include/linux/etherdevice.h | 3 +++ net/ethernet/eth.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) (limited to 'include/linux') diff --git a/arch/sparc/kernel/idprom.c b/arch/sparc/kernel/idprom.c index 6bd75012109d..f95dd11b75ea 100644 --- a/arch/sparc/kernel/idprom.c +++ b/arch/sparc/kernel/idprom.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -60,6 +61,12 @@ static void __init display_system_type(unsigned char machtype) { } #endif + +unsigned char *arch_get_platform_mac_address(void) +{ + return idprom->id_ethaddr; +} + /* Calculate the IDPROM checksum (xor of the data bytes). */ static unsigned char __init calc_idprom_cksum(struct idprom *idprom) { diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index eb049c622208..37ff4a6faa9a 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -29,6 +29,9 @@ #include #ifdef __KERNEL__ +struct device; +int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr); +unsigned char *arch_get_platform_get_mac_address(void); u32 eth_get_headlen(void *data, unsigned int max_len); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); extern const struct header_ops eth_header_ops; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 9e63f252a89e..103871784e50 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -52,6 +52,8 @@ #include #include #include +#include +#include #include #include #include @@ -485,3 +487,32 @@ static int __init eth_offload_init(void) } fs_initcall(eth_offload_init); + +unsigned char * __weak arch_get_platform_mac_address(void) +{ + return NULL; +} + +int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr) +{ + const unsigned char *addr; + struct device_node *dp; + + if (dev_is_pci(dev)) + dp = pci_device_to_OF_node(to_pci_dev(dev)); + else + dp = dev->of_node; + + addr = NULL; + if (dp) + addr = of_get_mac_address(dp); + if (!addr) + addr = arch_get_platform_mac_address(); + + if (!addr) + return -ENODEV; + + ether_addr_copy(mac_addr, addr); + return 0; +} +EXPORT_SYMBOL(eth_platform_get_mac_address); -- cgit v1.2.3-71-gd317 From ccaa953e9fc7ebb90fba4e4815966683bef4866f Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:06 +0100 Subject: phy: Consistently use addr for address on an MII bus Within phy.h, an address on an MII bus has been called both addr and phy_id. phy_id is particularly confusion, since it also means the ID found in register 3, if the device on the bus is a phy. Consistently use addr. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index a89cb0eef911..77b5e56e2a92 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -158,8 +158,8 @@ struct mii_bus { const char *name; char id[MII_BUS_ID_SIZE]; void *priv; - int (*read)(struct mii_bus *bus, int phy_id, int regnum); - int (*write)(struct mii_bus *bus, int phy_id, int regnum, u16 val); + int (*read)(struct mii_bus *bus, int addr, int regnum); + int (*write)(struct mii_bus *bus, int addr, int regnum, u16 val); int (*reset)(struct mii_bus *bus); /* -- cgit v1.2.3-71-gd317 From bac83c653799d7ea3f6cc4d7396d75adc5e0f778 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:07 +0100 Subject: mdio: Move mdiobus_read/write operatings into mdio.h These are logically MDIO operations, not phy operations, so move them into the mdio header. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/mdio.h | 6 ++++++ include/linux/phy.h | 6 +----- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index b42963bc81dd..0d073c23c10d 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -11,6 +11,7 @@ #include +struct mii_bus; static inline bool mdio_phy_id_is_c45(int phy_id) { @@ -173,4 +174,9 @@ static inline u16 ethtool_adv_to_mmd_eee_adv_t(u32 adv) return reg; } +int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); +int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum); +int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); +int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val); + #endif /* __LINUX_MDIO_H__ */ diff --git a/include/linux/phy.h b/include/linux/phy.h index 77b5e56e2a92..8ca161a37e8a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -212,11 +213,6 @@ static inline struct mii_bus *devm_mdiobus_alloc(struct device *dev) void devm_mdiobus_free(struct device *dev, struct mii_bus *bus); struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); -int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); -int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum); -int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); -int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val); - #define PHY_INTERRUPT_DISABLED 0x0 #define PHY_INTERRUPT_ENABLED 0x80000000 -- cgit v1.2.3-71-gd317 From 72ba48be3ec8e70937ad97d4420ef7144617c64b Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:09 +0100 Subject: phy: Add phydev_err() and phydev_dbg() macros In preparation for moving some of the phy_device structure members, add macros for printing errors and debug information. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/at803x.c | 4 ++-- drivers/net/phy/bcm87xx.c | 5 +++-- drivers/net/phy/micrel.c | 16 +++++++++------- drivers/net/phy/phy.c | 5 +++-- include/linux/phy.h | 6 ++++++ 5 files changed, 23 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 2d020a3ec0b5..62361f8af375 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -281,8 +281,8 @@ static void at803x_link_change_notify(struct phy_device *phydev) at803x_context_restore(phydev, &context); - dev_dbg(&phydev->dev, "%s(): phy was reset\n", - __func__); + phydev_dbg(phydev, "%s(): phy was reset\n", + __func__); priv->phy_reset = true; } } else { diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c index 1eca20452f03..71b491c7bf96 100644 --- a/drivers/net/phy/bcm87xx.c +++ b/drivers/net/phy/bcm87xx.c @@ -163,8 +163,9 @@ static int bcm87xx_did_interrupt(struct phy_device *phydev) reg = phy_read(phydev, BCM87XX_LASI_STATUS); if (reg < 0) { - dev_err(&phydev->dev, - "Error: Read of BCM87XX_LASI_STATUS failed: %d\n", reg); + phydev_err(phydev, + "Error: Read of BCM87XX_LASI_STATUS failed: %d\n", + reg); return 0; } return (reg & 1) != 0; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 1a6048a8c29d..bf72365e90bc 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -224,7 +224,7 @@ static int kszphy_setup_led(struct phy_device *phydev, u32 reg, int val) rc = phy_write(phydev, reg, temp); out: if (rc < 0) - dev_err(&phydev->dev, "failed to set led mode\n"); + phydev_err(phydev, "failed to set led mode\n"); return rc; } @@ -243,7 +243,7 @@ static int kszphy_broadcast_disable(struct phy_device *phydev) ret = phy_write(phydev, MII_KSZPHY_OMSO, ret | KSZPHY_OMSO_B_CAST_OFF); out: if (ret) - dev_err(&phydev->dev, "failed to disable broadcast address\n"); + phydev_err(phydev, "failed to disable broadcast address\n"); return ret; } @@ -263,7 +263,7 @@ static int kszphy_nand_tree_disable(struct phy_device *phydev) ret & ~KSZPHY_OMSO_NAND_TREE_ON); out: if (ret) - dev_err(&phydev->dev, "failed to disable NAND tree mode\n"); + phydev_err(phydev, "failed to disable NAND tree mode\n"); return ret; } @@ -288,7 +288,8 @@ static int kszphy_config_init(struct phy_device *phydev) if (priv->rmii_ref_clk_sel) { ret = kszphy_rmii_clk_sel(phydev, priv->rmii_ref_clk_sel_val); if (ret) { - dev_err(&phydev->dev, "failed to set rmii reference clock\n"); + phydev_err(phydev, + "failed to set rmii reference clock\n"); return ret; } } @@ -649,8 +650,8 @@ static int kszphy_probe(struct phy_device *phydev) priv->led_mode = -1; if (priv->led_mode > 3) { - dev_err(&phydev->dev, "invalid led mode: 0x%02x\n", - priv->led_mode); + phydev_err(phydev, "invalid led mode: 0x%02x\n", + priv->led_mode); priv->led_mode = -1; } } else { @@ -672,7 +673,8 @@ static int kszphy_probe(struct phy_device *phydev) } else if (rate > 49500000 && rate < 50500000) { priv->rmii_ref_clk_sel_val = !rmii_ref_clk_sel_25_mhz; } else { - dev_err(&phydev->dev, "Clock rate out of range: %ld\n", rate); + phydev_err(phydev, "Clock rate out of range: %ld\n", + rate); return -EINVAL; } } diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 47cd306dbb3c..9771941cf0ee 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -995,8 +995,9 @@ void phy_state_machine(struct work_struct *work) if (err < 0) phy_error(phydev); - dev_dbg(&phydev->dev, "PHY state change %s -> %s\n", - phy_state_to_str(old_state), phy_state_to_str(phydev->state)); + phydev_dbg(phydev, "PHY state change %s -> %s\n", + phy_state_to_str(old_state), + phy_state_to_str(phydev->state)); queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, PHY_STATE_TIME * HZ); diff --git a/include/linux/phy.h b/include/linux/phy.h index 8ca161a37e8a..dbcf9fdd960c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -777,6 +777,12 @@ static inline int phy_read_status(struct phy_device *phydev) return phydev->drv->read_status(phydev); } +#define phydev_err(_phydev, format, args...) \ + dev_err(&_phydev->dev, format, ##args) + +#define phydev_dbg(_phydev, format, args...) \ + dev_dbg(&_phydev->dev, format, ##args) + int genphy_config_init(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); -- cgit v1.2.3-71-gd317 From 84eff6d194df442bee62c129f2f47efb0dbd0468 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:10 +0100 Subject: phy: add phydev_name() wrapper Add a phydev_name() function, to help with moving some structure members from phy_device. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/8390/ax88796.c | 2 +- drivers/net/ethernet/adi/bfin_mac.c | 4 ++-- drivers/net/ethernet/agere/et131x.c | 4 ++-- drivers/net/ethernet/amd/au1000_eth.c | 4 ++-- drivers/net/ethernet/broadcom/b44.c | 2 +- drivers/net/ethernet/broadcom/tg3.c | 4 ++-- drivers/net/ethernet/cadence/macb.c | 2 +- drivers/net/ethernet/dnet.c | 6 +++--- drivers/net/ethernet/faraday/ftgmac100.c | 2 +- drivers/net/ethernet/lantiq_etop.c | 4 ++-- drivers/net/ethernet/nxp/lpc_eth.c | 4 ++-- drivers/net/ethernet/rdc/r6040.c | 4 ++-- drivers/net/ethernet/renesas/ravb_main.c | 2 +- drivers/net/ethernet/renesas/sh_eth.c | 2 +- drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c | 2 +- drivers/net/ethernet/smsc/smsc911x.c | 2 +- drivers/net/ethernet/smsc/smsc9420.c | 4 ++-- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 2 +- drivers/net/ethernet/ti/davinci_emac.c | 2 +- drivers/net/ethernet/ti/davinci_mdio.c | 2 +- drivers/net/ethernet/ti/netcp_ethss.c | 4 ++-- drivers/net/ethernet/toshiba/tc35815.c | 4 ++-- drivers/net/phy/bcm7xxx.c | 2 +- drivers/net/phy/phy_device.c | 2 +- drivers/staging/netlogic/xlr_net.c | 6 +++--- include/linux/phy.h | 5 +++++ 26 files changed, 44 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index 0443654f0339..90b540a4a561 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -372,7 +372,7 @@ static int ax_mii_probe(struct net_device *dev) ax->phy_dev = phy_dev; netdev_info(dev, "PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - phy_dev->drv->name, dev_name(&phy_dev->dev), phy_dev->irq); + phy_dev->drv->name, phydev_name(phy_dev), phy_dev->irq); return 0; } diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index e0e95a15cab0..5f8a5182b8dc 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -419,7 +419,7 @@ static int mii_probe(struct net_device *dev, int phy_mode) return -EINVAL; } - phydev = phy_connect(dev, dev_name(&phydev->dev), + phydev = phy_connect(dev, phydev_name(phydev), &bfin_mac_adjust_link, phy_mode); if (IS_ERR(phydev)) { @@ -446,7 +446,7 @@ static int mii_probe(struct net_device *dev, int phy_mode) pr_info("attached PHY driver [%s] " "(mii_bus:phy_addr=%s, irq=%d, mdc_clk=%dHz(mdc_div=%d)@sclk=%dMHz)\n", - phydev->drv->name, dev_name(&phydev->dev), phydev->irq, + phydev->drv->name, phydev_name(phydev), phydev->irq, MDC_CLK, mdc_div, sclk/1000000); return 0; diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index e0f3d197e7f2..80b706f0fc97 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -3265,7 +3265,7 @@ static int et131x_mii_probe(struct net_device *netdev) return -ENODEV; } - phydev = phy_connect(netdev, dev_name(&phydev->dev), + phydev = phy_connect(netdev, phydev_name(phydev), &et131x_adjust_link, PHY_INTERFACE_MODE_MII); if (IS_ERR(phydev)) { @@ -3291,7 +3291,7 @@ static int et131x_mii_probe(struct net_device *netdev) dev_info(&adapter->pdev->dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s)\n", - phydev->drv->name, dev_name(&phydev->dev)); + phydev->drv->name, phydev_name(phydev)); return 0; } diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index 5330bcb8a944..8a8d6f2a0f6f 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -558,7 +558,7 @@ static int au1000_mii_probe(struct net_device *dev) /* now we are supposed to have a proper phydev, to attach to... */ BUG_ON(phydev->attached_dev); - phydev = phy_connect(dev, dev_name(&phydev->dev), + phydev = phy_connect(dev, phydev_name(phydev), &au1000_adjust_link, PHY_INTERFACE_MODE_MII); if (IS_ERR(phydev)) { @@ -585,7 +585,7 @@ static int au1000_mii_probe(struct net_device *dev) netdev_info(dev, "attached PHY driver [%s] " "(mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, dev_name(&phydev->dev), phydev->irq); + phydev->drv->name, phydev_name(phydev), phydev->irq); return 0; } diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index a3b1c07ae0af..928a2210e788 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2316,7 +2316,7 @@ static int b44_register_phy_one(struct b44 *bp) bp->phy_addr = phydev->addr; dev_info(sdev->dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s)\n", - phydev->drv->name, dev_name(&phydev->dev)); + phydev->drv->name, phydev_name(phydev)); return 0; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 79789d8e52da..69d84d67f09a 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -2096,7 +2096,7 @@ static int tg3_phy_init(struct tg3 *tp) phydev = tp->mdio_bus->phy_map[tp->phy_addr]; /* Attach the MAC to the PHY. */ - phydev = phy_connect(tp->dev, dev_name(&phydev->dev), + phydev = phy_connect(tp->dev, phydev_name(phydev), tg3_adjust_link, phydev->interface); if (IS_ERR(phydev)) { dev_err(&tp->pdev->dev, "Could not attach to PHY\n"); @@ -17903,7 +17903,7 @@ static int tg3_init_one(struct pci_dev *pdev, phydev = tp->mdio_bus->phy_map[tp->phy_addr]; netdev_info(dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s)\n", - phydev->drv->name, dev_name(&phydev->dev)); + phydev->drv->name, phydev_name(phydev)); } else { char *ethtype; diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 8b45bc9ac29e..001d60c5521c 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -2951,7 +2951,7 @@ static int macb_probe(struct platform_device *pdev) phydev = bp->phy_dev; netdev_info(dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, dev_name(&phydev->dev), phydev->irq); + phydev->drv->name, phydev_name(phydev), phydev->irq); return 0; diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c index 13d00a38a5bd..136b6010f704 100644 --- a/drivers/net/ethernet/dnet.c +++ b/drivers/net/ethernet/dnet.c @@ -274,11 +274,11 @@ static int dnet_mii_probe(struct net_device *dev) /* attach the mac to the phy */ if (bp->capabilities & DNET_HAS_RMII) { - phydev = phy_connect(dev, dev_name(&phydev->dev), + phydev = phy_connect(dev, phydev_name(phydev), &dnet_handle_link_change, PHY_INTERFACE_MODE_RMII); } else { - phydev = phy_connect(dev, dev_name(&phydev->dev), + phydev = phy_connect(dev, phydev_name(phydev), &dnet_handle_link_change, PHY_INTERFACE_MODE_MII); } @@ -894,7 +894,7 @@ static int dnet_probe(struct platform_device *pdev) phydev = bp->phy_dev; dev_info(&pdev->dev, "attached PHY driver [%s] " "(mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, dev_name(&phydev->dev), phydev->irq); + phydev->drv->name, phydev_name(phydev), phydev->irq); return 0; diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 6d0c5d5eea6d..c2e2ac6a0313 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -854,7 +854,7 @@ static int ftgmac100_mii_probe(struct ftgmac100 *priv) return -ENODEV; } - phydev = phy_connect(netdev, dev_name(&phydev->dev), + phydev = phy_connect(netdev, phydev_name(phydev), &ftgmac100_adjust_link, PHY_INTERFACE_MODE_GMII); if (IS_ERR(phydev)) { diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 581928c068f2..274a3cec84c2 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -390,7 +390,7 @@ ltq_etop_mdio_probe(struct net_device *dev) return -ENODEV; } - phydev = phy_connect(dev, dev_name(&phydev->dev), + phydev = phy_connect(dev, phydev_name(phydev), <q_etop_mdio_link, priv->pldata->mii_mode); if (IS_ERR(phydev)) { @@ -410,7 +410,7 @@ ltq_etop_mdio_probe(struct net_device *dev) priv->phydev = phydev; pr_info("%s: attached PHY [%s] (phy_addr=%s, irq=%d)\n", dev->name, phydev->drv->name, - dev_name(&phydev->dev), phydev->irq); + phydev_name(phydev), phydev->irq); return 0; } diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 057665180f13..5801aa197697 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -797,7 +797,7 @@ static int lpc_mii_probe(struct net_device *ndev) netdev_info(ndev, "using MII interface\n"); else netdev_info(ndev, "using RMII interface\n"); - phydev = phy_connect(ndev, dev_name(&phydev->dev), + phydev = phy_connect(ndev, phydev_name(phydev), &lpc_handle_link_change, lpc_phy_interface_mode(&pldat->pdev->dev)); @@ -818,7 +818,7 @@ static int lpc_mii_probe(struct net_device *ndev) netdev_info(ndev, "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, dev_name(&phydev->dev), phydev->irq); + phydev->drv->name, phydev_name(phydev), phydev->irq); return 0; } diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index 9a37247cf4b8..86a0887811c7 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -1039,7 +1039,7 @@ static int r6040_mii_probe(struct net_device *dev) return -ENODEV; } - phydev = phy_connect(dev, dev_name(&phydev->dev), &r6040_adjust_link, + phydev = phy_connect(dev, phydev_name(phydev), &r6040_adjust_link, PHY_INTERFACE_MODE_MII); if (IS_ERR(phydev)) { @@ -1063,7 +1063,7 @@ static int r6040_mii_probe(struct net_device *dev) dev_info(&lp->pdev->dev, "attached PHY driver [%s] " "(mii_bus:phy_addr=%s)\n", - phydev->drv->name, dev_name(&phydev->dev)); + phydev->drv->name, phydev_name(phydev)); return 0; } diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 0e1ebb39ab46..2f6c974e4a6d 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -928,7 +928,7 @@ static int ravb_phy_init(struct net_device *ndev) phydev->supported &= ~PHY_10BT_FEATURES; netdev_info(ndev, "attached PHY %d (IRQ %d) to driver %s\n", - phydev->addr, phydev->irq, phydev->drv->name); + phydev->addr, phydev->irq, phydev_name(phydev)); priv->phydev = phydev; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index baa81535a8fc..e14d28474b70 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1827,7 +1827,7 @@ static int sh_eth_phy_init(struct net_device *ndev) } netdev_info(ndev, "attached PHY %d (IRQ %d) to driver %s\n", - phydev->addr, phydev->irq, phydev->drv->name); + phydev->addr, phydev->irq, phydev_name(phydev)); mdp->phydev = phydev; diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c index 43ccb4a6de15..5b13b8c11bef 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c @@ -216,7 +216,7 @@ int sxgbe_mdio_register(struct net_device *ndev) } netdev_info(ndev, "PHY ID %08x at %d IRQ %s (%s)%s\n", phy->phy_id, phy_addr, irq_str, - dev_name(&phy->dev), act ? " active" : ""); + phydev_name(phy), act ? " active" : ""); phy_found = true; } } diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 219a99b7a631..067346d3209d 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1033,7 +1033,7 @@ static int smsc911x_mii_probe(struct net_device *dev) netdev_info(dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, dev_name(&phydev->dev), phydev->irq); + phydev->drv->name, phydev_name(phydev), phydev->irq); /* mask with MAC supported features */ phydev->supported &= (PHY_BASIC_FEATURES | SUPPORTED_Pause | diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c index 4a90cdae5444..a02ed6b63064 100644 --- a/drivers/net/ethernet/smsc/smsc9420.c +++ b/drivers/net/ethernet/smsc/smsc9420.c @@ -1167,7 +1167,7 @@ static int smsc9420_mii_probe(struct net_device *dev) netif_info(pd, probe, pd->dev, "PHY addr %d, phy_id 0x%08X\n", phydev->addr, phydev->phy_id); - phydev = phy_connect(dev, dev_name(&phydev->dev), + phydev = phy_connect(dev, phydev_name(phydev), smsc9420_phy_adjust_link, PHY_INTERFACE_MODE_MII); if (IS_ERR(phydev)) { @@ -1176,7 +1176,7 @@ static int smsc9420_mii_probe(struct net_device *dev) } netdev_info(dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, dev_name(&phydev->dev), phydev->irq); + phydev->drv->name, phydev_name(phydev), phydev->irq); /* mask with MAC supported features */ phydev->supported &= (PHY_BASIC_FEATURES | SUPPORTED_Pause | diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 16c85ccd1762..05ba84118f37 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -298,7 +298,7 @@ int stmmac_mdio_register(struct net_device *ndev) } pr_info("%s: PHY ID %08x at %d IRQ %s (%s)%s\n", ndev->name, phydev->phy_id, addr, - irq_str, dev_name(&phydev->dev), + irq_str, phydev_name(phydev), act ? " active" : ""); found = 1; } diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 33bd3b902304..5a40b0256327 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1646,7 +1646,7 @@ static int emac_dev_open(struct net_device *ndev) dev_info(emac_dev, "attached PHY driver [%s] " "(mii_bus:phy_addr=%s, id=%x)\n", - priv->phydev->drv->name, dev_name(&priv->phydev->dev), + priv->phydev->drv->name, phydev_name(priv->phydev), priv->phydev->phy_id); } diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index c00084d689f3..88e8e6055b9f 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -396,7 +396,7 @@ static int davinci_mdio_probe(struct platform_device *pdev) phy = data->bus->phy_map[addr]; if (phy) { dev_info(dev, "phy[%d]: device %s, driver %s\n", - phy->addr, dev_name(&phy->dev), + phy->addr, phydev_name(phy), phy->drv ? phy->drv->name : "unknown"); } } diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 4e70e7586a09..d543298d6750 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -2178,7 +2178,7 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf) return -ENODEV; } dev_dbg(priv->dev, "phy found: id is: 0x%s\n", - dev_name(&slave->phy->dev)); + phydev_name(slave->phy)); phy_start(slave->phy); phy_read_status(slave->phy); } @@ -2681,7 +2681,7 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev, slave->phy = NULL; } else { dev_dbg(dev, "phy found: id is: 0x%s\n", - dev_name(&slave->phy->dev)); + phydev_name(slave->phy)); phy_start(slave->phy); phy_read_status(slave->phy); } diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index 45ac38d29ed8..8df6072ac78d 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -631,7 +631,7 @@ static int tc_mii_probe(struct net_device *dev) } /* attach the mac to the phy */ - phydev = phy_connect(dev, dev_name(&phydev->dev), + phydev = phy_connect(dev, phydev_name(phydev), &tc_handle_link_change, lp->chiptype == TC35815_TX4939 ? PHY_INTERFACE_MODE_RMII : PHY_INTERFACE_MODE_MII); if (IS_ERR(phydev)) { @@ -640,7 +640,7 @@ static int tc_mii_probe(struct net_device *dev) } printk(KERN_INFO "%s: attached PHY driver [%s] " "(mii_bus:phy_addr=%s, id=%x)\n", - dev->name, phydev->drv->name, dev_name(&phydev->dev), + dev->name, phydev->drv->name, phydev_name(phydev), phydev->phy_id); /* mask with MAC supported features */ diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index d4083c381cd1..9f4e6eb886af 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -170,7 +170,7 @@ static int bcm7xxx_28nm_config_init(struct phy_device *phydev) int ret = 0; pr_info_once("%s: %s PHY revision: 0x%02x, patch: %d\n", - dev_name(&phydev->dev), phydev->drv->name, rev, patch); + phydev_name(phydev), phydev->drv->name, rev, patch); /* Dummy read to a register to workaround an issue upon reset where the * internal inverter may not allow the first MDIO transaction to pass diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0bfbabad4431..0f179709a289 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -114,7 +114,7 @@ EXPORT_SYMBOL(phy_register_fixup_for_id); */ static int phy_needs_fixup(struct phy_device *phydev, struct phy_fixup *fixup) { - if (strcmp(fixup->bus_id, dev_name(&phydev->dev)) != 0) + if (strcmp(fixup->bus_id, phydev_name(phydev)) != 0) if (strcmp(fixup->bus_id, PHY_ANY_ID) != 0) return 0; diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c index 8ae01753b011..b939c4b5f229 100644 --- a/drivers/staging/netlogic/xlr_net.c +++ b/drivers/staging/netlogic/xlr_net.c @@ -838,8 +838,8 @@ static int xlr_mii_probe(struct xlr_net_priv *priv) } /* Attach MAC to PHY */ - phydev = phy_connect(priv->ndev, dev_name(&phydev->dev), - &xlr_gmac_link_adjust, priv->nd->phy_interface); + phydev = phy_connect(priv->ndev, phydev_name(phydev), + &xlr_gmac_link_adjust, priv->nd->phy_interface); if (IS_ERR(phydev)) { pr_err("could not attach PHY\n"); @@ -855,7 +855,7 @@ static int xlr_mii_probe(struct xlr_net_priv *priv) phydev->advertising = phydev->supported; pr_info("attached PHY driver [%s] (mii_bus:phy_addr=%s\n", - phydev->drv->name, dev_name(&phydev->dev)); + phydev->drv->name, phydev_name(phydev)); return 0; } diff --git a/include/linux/phy.h b/include/linux/phy.h index dbcf9fdd960c..5f5cc3424b9e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -783,6 +783,11 @@ static inline int phy_read_status(struct phy_device *phydev) #define phydev_dbg(_phydev, format, args...) \ dev_dbg(&_phydev->dev, format, ##args) +static inline const char *phydev_name(const struct phy_device *phydev) +{ + return dev_name(&phydev->dev); +} + int genphy_config_init(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); -- cgit v1.2.3-71-gd317 From 053e7e169229adebbc27fc176c5369398e9f5eba Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:12 +0100 Subject: phy: phy_{read|write}_mmd_indirect: get addr from phydev The address of the device can be determined from the phydev structure, rather than passing it as a parameter. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/bcm-phy-lib.c | 8 ++++---- drivers/net/phy/dp83867.c | 6 +++--- drivers/net/phy/microchip.c | 5 ++--- drivers/net/phy/phy.c | 36 ++++++++++++++---------------------- include/linux/phy.h | 7 ++----- 5 files changed, 25 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c index ddb377e53633..df0416db0b88 100644 --- a/drivers/net/phy/bcm-phy-lib.c +++ b/drivers/net/phy/bcm-phy-lib.c @@ -184,25 +184,25 @@ int bcm_phy_enable_eee(struct phy_device *phydev) /* Enable EEE at PHY level */ val = phy_read_mmd_indirect(phydev, BRCM_CL45VEN_EEE_CONTROL, - MDIO_MMD_AN, phydev->addr); + MDIO_MMD_AN); if (val < 0) return val; val |= LPI_FEATURE_EN | LPI_FEATURE_EN_DIG1000X; phy_write_mmd_indirect(phydev, BRCM_CL45VEN_EEE_CONTROL, - MDIO_MMD_AN, phydev->addr, (u32)val); + MDIO_MMD_AN, (u32)val); /* Advertise EEE */ val = phy_read_mmd_indirect(phydev, BCM_CL45VEN_EEE_ADV, - MDIO_MMD_AN, phydev->addr); + MDIO_MMD_AN); if (val < 0) return val; val |= (MDIO_AN_EEE_ADV_100TX | MDIO_AN_EEE_ADV_1000T); phy_write_mmd_indirect(phydev, BCM_CL45VEN_EEE_ADV, - MDIO_MMD_AN, phydev->addr, (u32)val); + MDIO_MMD_AN, (u32)val); return 0; } diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 4ebf601073d9..e4c0b0c0af02 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -160,7 +160,7 @@ static int dp83867_config_init(struct phy_device *phydev) if ((phydev->interface >= PHY_INTERFACE_MODE_RGMII_ID) && (phydev->interface <= PHY_INTERFACE_MODE_RGMII_RXID)) { val = phy_read_mmd_indirect(phydev, DP83867_RGMIICTL, - DP83867_DEVADDR, phydev->addr); + DP83867_DEVADDR); if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) val |= (DP83867_RGMII_TX_CLK_DELAY_EN | DP83867_RGMII_RX_CLK_DELAY_EN); @@ -172,13 +172,13 @@ static int dp83867_config_init(struct phy_device *phydev) val |= DP83867_RGMII_RX_CLK_DELAY_EN; phy_write_mmd_indirect(phydev, DP83867_RGMIICTL, - DP83867_DEVADDR, phydev->addr, val); + DP83867_DEVADDR, val); delay = (dp83867->rx_id_delay | (dp83867->tx_id_delay << DP83867_RGMII_TX_CLK_DELAY_SHIFT)); phy_write_mmd_indirect(phydev, DP83867_RGMIIDCTL, - DP83867_DEVADDR, phydev->addr, delay); + DP83867_DEVADDR, delay); } return 0; diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index c0a20ebd083b..99df5bc47424 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -78,10 +78,9 @@ static int lan88xx_probe(struct phy_device *phydev) priv->wolopts = 0; /* these values can be used to identify internal PHY */ - priv->chip_id = phy_read_mmd_indirect(phydev, LAN88XX_MMD3_CHIP_ID, - 3, phydev->addr); + priv->chip_id = phy_read_mmd_indirect(phydev, LAN88XX_MMD3_CHIP_ID, 3); priv->chip_rev = phy_read_mmd_indirect(phydev, LAN88XX_MMD3_CHIP_REV, - 3, phydev->addr); + 3); phydev->priv = priv; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 9771941cf0ee..56c8dd8c0c85 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1029,7 +1029,6 @@ static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad, * @phydev: The PHY device bus * @prtad: MMD Address * @devad: MMD DEVAD - * @addr: PHY address on the MII bus * * Description: it reads data from the MMD registers (clause 22 to access to * clause 45) of the specified phy address. @@ -1039,10 +1038,10 @@ static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad, * 3) Write reg 13 // MMD Data Command for MMD DEVAD * 3) Read reg 14 // Read MMD data */ -int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, - int devad, int addr) +int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, int devad) { struct phy_driver *phydrv = phydev->drv; + int addr = phydev->addr; int value = -1; if (!phydrv->read_mmd_indirect) { @@ -1066,7 +1065,6 @@ EXPORT_SYMBOL(phy_read_mmd_indirect); * @phydev: The PHY device * @prtad: MMD Address * @devad: MMD DEVAD - * @addr: PHY address on the MII bus * @data: data to write in the MMD register * * Description: Write data from the MMD registers of the specified @@ -1078,9 +1076,10 @@ EXPORT_SYMBOL(phy_read_mmd_indirect); * 3) Write reg 14 // Write MMD data */ void phy_write_mmd_indirect(struct phy_device *phydev, int prtad, - int devad, int addr, u32 data) + int devad, u32 data) { struct phy_driver *phydrv = phydev->drv; + int addr = phydev->addr; if (!phydrv->write_mmd_indirect) { struct mii_bus *bus = phydev->bus; @@ -1130,7 +1129,7 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) /* First check if the EEE ability is supported */ eee_cap = phy_read_mmd_indirect(phydev, MDIO_PCS_EEE_ABLE, - MDIO_MMD_PCS, phydev->addr); + MDIO_MMD_PCS); if (eee_cap <= 0) goto eee_exit_err; @@ -1142,12 +1141,12 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) * the EEE advertising registers. */ eee_lp = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_LPABLE, - MDIO_MMD_AN, phydev->addr); + MDIO_MMD_AN); if (eee_lp <= 0) goto eee_exit_err; eee_adv = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_ADV, - MDIO_MMD_AN, phydev->addr); + MDIO_MMD_AN); if (eee_adv <= 0) goto eee_exit_err; @@ -1161,15 +1160,13 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) * clock while it is signaling LPI. */ int val = phy_read_mmd_indirect(phydev, MDIO_CTRL1, - MDIO_MMD_PCS, - phydev->addr); + MDIO_MMD_PCS); if (val < 0) return val; val |= MDIO_PCS_CTRL1_CLKSTOP_EN; phy_write_mmd_indirect(phydev, MDIO_CTRL1, - MDIO_MMD_PCS, phydev->addr, - val); + MDIO_MMD_PCS, val); } return 0; /* EEE supported */ @@ -1188,8 +1185,7 @@ EXPORT_SYMBOL(phy_init_eee); */ int phy_get_eee_err(struct phy_device *phydev) { - return phy_read_mmd_indirect(phydev, MDIO_PCS_EEE_WK_ERR, - MDIO_MMD_PCS, phydev->addr); + return phy_read_mmd_indirect(phydev, MDIO_PCS_EEE_WK_ERR, MDIO_MMD_PCS); } EXPORT_SYMBOL(phy_get_eee_err); @@ -1206,22 +1202,19 @@ int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data) int val; /* Get Supported EEE */ - val = phy_read_mmd_indirect(phydev, MDIO_PCS_EEE_ABLE, - MDIO_MMD_PCS, phydev->addr); + val = phy_read_mmd_indirect(phydev, MDIO_PCS_EEE_ABLE, MDIO_MMD_PCS); if (val < 0) return val; data->supported = mmd_eee_cap_to_ethtool_sup_t(val); /* Get advertisement EEE */ - val = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_ADV, - MDIO_MMD_AN, phydev->addr); + val = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_ADV, MDIO_MMD_AN); if (val < 0) return val; data->advertised = mmd_eee_adv_to_ethtool_adv_t(val); /* Get LP advertisement EEE */ - val = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_LPABLE, - MDIO_MMD_AN, phydev->addr); + val = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_LPABLE, MDIO_MMD_AN); if (val < 0) return val; data->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(val); @@ -1241,8 +1234,7 @@ int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data) { int val = ethtool_adv_to_mmd_eee_adv_t(data->advertised); - phy_write_mmd_indirect(phydev, MDIO_AN_EEE_ADV, MDIO_MMD_AN, - phydev->addr, val); + phy_write_mmd_indirect(phydev, MDIO_AN_EEE_ADV, MDIO_MMD_AN, val); return 0; } diff --git a/include/linux/phy.h b/include/linux/phy.h index 5f5cc3424b9e..08198ce98773 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -629,14 +629,12 @@ static inline int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) * phy_read_mmd_indirect - reads data from the MMD registers * @phydev: The PHY device bus * @prtad: MMD Address - * @devad: MMD DEVAD * @addr: PHY address on the MII bus * * Description: it reads data from the MMD registers (clause 22 to access to * clause 45) of the specified phy address. */ -int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, - int devad, int addr); +int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, int devad); /** * phy_read - Convenience function for reading a given PHY register @@ -735,14 +733,13 @@ static inline int phy_write_mmd(struct phy_device *phydev, int devad, * @phydev: The PHY device * @prtad: MMD Address * @devad: MMD DEVAD - * @addr: PHY address on the MII bus * @data: data to write in the MMD register * * Description: Write data from the MMD registers of the specified * phy address. */ void phy_write_mmd_indirect(struct phy_device *phydev, int prtad, - int devad, int addr, u32 data); + int devad, u32 data); struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, bool is_c45, -- cgit v1.2.3-71-gd317 From 2220943a21e26d97d7fd8f83c004b947326b469d Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:13 +0100 Subject: phy: Centralise print about attached phy Many Ethernet drivers contain the same netdev_info() print statement about the attached phy. Move it into the phy device code. Additionally add a varargs function which can be used to append additional information. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/adi/bfin_mac.c | 6 ++---- drivers/net/ethernet/agere/et131x.c | 4 +--- drivers/net/ethernet/amd/au1000_eth.c | 4 +--- drivers/net/ethernet/broadcom/b44.c | 3 +-- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 3 +-- drivers/net/ethernet/broadcom/sb1250-mac.c | 7 +++---- drivers/net/ethernet/broadcom/tg3.c | 10 +++------- drivers/net/ethernet/cadence/macb.c | 3 +-- drivers/net/ethernet/dnet.c | 4 +--- drivers/net/ethernet/freescale/fec_main.c | 4 +--- drivers/net/ethernet/lantiq_etop.c | 4 +--- drivers/net/ethernet/nxp/lpc_eth.c | 5 ++--- drivers/net/ethernet/rdc/r6040.c | 4 +--- drivers/net/ethernet/renesas/ravb_main.c | 3 +-- drivers/net/ethernet/renesas/sh_eth.c | 3 +-- drivers/net/ethernet/smsc/smsc911x.c | 4 +--- drivers/net/ethernet/smsc/smsc9420.c | 9 ++------- drivers/net/ethernet/synopsys/dwc_eth_qos.c | 12 +----------- drivers/net/ethernet/ti/cpsw.c | 4 ++-- drivers/net/ethernet/ti/davinci_emac.c | 5 +---- drivers/net/ethernet/toshiba/tc35815.c | 6 ++---- drivers/net/phy/phy_device.c | 27 +++++++++++++++++++++++++++ drivers/staging/netlogic/xlr_net.c | 3 +-- include/linux/phy.h | 4 ++++ net/dsa/slave.c | 5 ++--- 25 files changed, 64 insertions(+), 82 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index 5f8a5182b8dc..ed5c78cb7239 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -444,10 +444,8 @@ static int mii_probe(struct net_device *dev, int phy_mode) lp->old_duplex = -1; lp->phydev = phydev; - pr_info("attached PHY driver [%s] " - "(mii_bus:phy_addr=%s, irq=%d, mdc_clk=%dHz(mdc_div=%d)@sclk=%dMHz)\n", - phydev->drv->name, phydev_name(phydev), phydev->irq, - MDC_CLK, mdc_div, sclk/1000000); + phy_attached_print(phydev, "mdc_clk=%dHz(mdc_div=%d)@sclk=%dMHz)\n", + MDC_CLK, mdc_div, sclk / 1000000); return 0; } diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index 80b706f0fc97..825da3af806a 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -3289,9 +3289,7 @@ static int et131x_mii_probe(struct net_device *netdev) phydev->autoneg = AUTONEG_ENABLE; adapter->phydev = phydev; - dev_info(&adapter->pdev->dev, - "attached PHY driver [%s] (mii_bus:phy_addr=%s)\n", - phydev->drv->name, phydev_name(phydev)); + phy_attached_info(phydev); return 0; } diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index 8a8d6f2a0f6f..114618d357d5 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -583,9 +583,7 @@ static int au1000_mii_probe(struct net_device *dev) aup->old_duplex = -1; aup->phy_dev = phydev; - netdev_info(dev, "attached PHY driver [%s] " - "(mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, phydev_name(phydev), phydev->irq); + phy_attached_info(phydev); return 0; } diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 928a2210e788..e7d9308d6760 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2315,8 +2315,7 @@ static int b44_register_phy_one(struct b44 *bp) bp->old_link = 0; bp->phy_addr = phydev->addr; - dev_info(sdev->dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s)\n", - phydev->drv->name, phydev_name(phydev)); + phy_attached_info(phydev); return 0; diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index a54bafad3538..55f31faa09e6 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -908,8 +908,7 @@ static int bcm_enet_open(struct net_device *dev) else phydev->advertising &= ~SUPPORTED_Pause; - dev_info(kdev, "attached PHY at address %d [%s]\n", - phydev->addr, phydev->drv->name); + phy_attached_info(phydev); priv->old_link = 0; priv->old_duplex = -1; diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index f557a2aaec23..2470c6084c67 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -2388,11 +2388,10 @@ static int sbmac_mii_probe(struct net_device *dev) SUPPORTED_MII | SUPPORTED_Pause | SUPPORTED_Asym_Pause; - phy_dev->advertising = phy_dev->supported; - pr_info("%s: attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - dev->name, phy_dev->drv->name, - dev_name(&phy_dev->dev), phy_dev->irq); + phy_attached_info(phydev); + + phy_dev->advertising = phy_dev->supported; sc->phy_dev = phy_dev; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 69d84d67f09a..07c067590caa 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -2128,6 +2128,8 @@ static int tg3_phy_init(struct tg3 *tp) phydev->advertising = phydev->supported; + phy_attached_info(phydev); + return 0; } @@ -17898,13 +17900,7 @@ static int tg3_init_one(struct pci_dev *pdev, tg3_bus_string(tp, str), dev->dev_addr); - if (tp->phy_flags & TG3_PHYFLG_IS_CONNECTED) { - struct phy_device *phydev; - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; - netdev_info(dev, - "attached PHY driver [%s] (mii_bus:phy_addr=%s)\n", - phydev->drv->name, phydev_name(phydev)); - } else { + if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) { char *ethtype; if (tp->phy_flags & TG3_PHYFLG_10_100_ONLY) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 001d60c5521c..98df33b7a395 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -2950,8 +2950,7 @@ static int macb_probe(struct platform_device *pdev) dev->base_addr, dev->irq, dev->dev_addr); phydev = bp->phy_dev; - netdev_info(dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, phydev_name(phydev), phydev->irq); + phy_attached_info(phydev); return 0; diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c index 0ec367521354..6557460cf028 100644 --- a/drivers/net/ethernet/dnet.c +++ b/drivers/net/ethernet/dnet.c @@ -886,9 +886,7 @@ static int dnet_probe(struct platform_device *pdev) (bp->capabilities & DNET_HAS_GIGABIT) ? "" : "no ", (bp->capabilities & DNET_HAS_DMA) ? "" : "no "); phydev = bp->phy_dev; - dev_info(&pdev->dev, "attached PHY driver [%s] " - "(mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, phydev_name(phydev), phydev->irq); + phy_attached_info(phydev); return 0; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index d2328fc5da57..ceabe21b3b2c 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1972,9 +1972,7 @@ static int fec_enet_mii_probe(struct net_device *ndev) fep->link = 0; fep->full_duplex = 0; - netdev_info(ndev, "Freescale FEC PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - fep->phy_dev->drv->name, dev_name(&fep->phy_dev->dev), - fep->phy_dev->irq); + phy_attached_info(phy_dev); return 0; } diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 274a3cec84c2..86238a5eaddf 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -408,9 +408,7 @@ ltq_etop_mdio_probe(struct net_device *dev) phydev->advertising = phydev->supported; priv->phydev = phydev; - pr_info("%s: attached PHY [%s] (phy_addr=%s, irq=%d)\n", - dev->name, phydev->drv->name, - phydev_name(phydev), phydev->irq); + phy_attached_info(phydev); return 0; } diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 5801aa197697..024bc3675573 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -816,9 +816,8 @@ static int lpc_mii_probe(struct net_device *ndev) pldat->duplex = -1; pldat->phy_dev = phydev; - netdev_info(ndev, - "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, phydev_name(phydev), phydev->irq); + phy_attached_info(phydev); + return 0; } diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index 86a0887811c7..174dea787caf 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -1061,9 +1061,7 @@ static int r6040_mii_probe(struct net_device *dev) lp->old_link = 0; lp->old_duplex = -1; - dev_info(&lp->pdev->dev, "attached PHY driver [%s] " - "(mii_bus:phy_addr=%s)\n", - phydev->drv->name, phydev_name(phydev)); + phy_attached_info(phydev); return 0; } diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 2f6c974e4a6d..9e20f37a3b6f 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -927,8 +927,7 @@ static int ravb_phy_init(struct net_device *ndev) /* 10BASE is not supported */ phydev->supported &= ~PHY_10BT_FEATURES; - netdev_info(ndev, "attached PHY %d (IRQ %d) to driver %s\n", - phydev->addr, phydev->irq, phydev_name(phydev)); + phy_attached_info(phydev); priv->phydev = phydev; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index e14d28474b70..94581be64d65 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1826,8 +1826,7 @@ static int sh_eth_phy_init(struct net_device *ndev) return PTR_ERR(phydev); } - netdev_info(ndev, "attached PHY %d (IRQ %d) to driver %s\n", - phydev->addr, phydev->irq, phydev_name(phydev)); + phy_attached_info(phydev); mdp->phydev = phydev; diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 067346d3209d..139b99b04099 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1031,9 +1031,7 @@ static int smsc911x_mii_probe(struct net_device *dev) return ret; } - netdev_info(dev, - "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, phydev_name(phydev), phydev->irq); + phy_attached_info(phydev); /* mask with MAC supported features */ phydev->supported &= (PHY_BASIC_FEATURES | SUPPORTED_Pause | diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c index a02ed6b63064..fa8893a804f7 100644 --- a/drivers/net/ethernet/smsc/smsc9420.c +++ b/drivers/net/ethernet/smsc/smsc9420.c @@ -1163,10 +1163,6 @@ static int smsc9420_mii_probe(struct net_device *dev) return -ENODEV; } - phydev = pd->mii_bus->phy_map[1]; - netif_info(pd, probe, pd->dev, "PHY addr %d, phy_id 0x%08X\n", - phydev->addr, phydev->phy_id); - phydev = phy_connect(dev, phydev_name(phydev), smsc9420_phy_adjust_link, PHY_INTERFACE_MODE_MII); @@ -1175,14 +1171,13 @@ static int smsc9420_mii_probe(struct net_device *dev) return PTR_ERR(phydev); } - netdev_info(dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, phydev_name(phydev), phydev->irq); - /* mask with MAC supported features */ phydev->supported &= (PHY_BASIC_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause); phydev->advertising = phydev->supported; + phy_attached_info(phydev); + pd->phy_dev = phydev; pd->last_duplex = -1; pd->last_carrier = -1; diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index 9066d7a8483c..b25ee370254a 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -972,9 +972,7 @@ static int dwceqos_mii_probe(struct net_device *ndev) } if (netif_msg_probe(lp)) - netdev_dbg(lp->ndev, - "phydev %p, phydev->phy_id 0xa%x, phydev->addr 0x%x\n", - phydev, phydev->phy_id, phydev->addr); + phy_attached_info(phydev); phydev->supported &= PHY_GBIT_FEATURES; @@ -983,14 +981,6 @@ static int dwceqos_mii_probe(struct net_device *ndev) lp->duplex = DUPLEX_UNKNOWN; lp->phy_dev = phydev; - if (netif_msg_probe(lp)) { - netdev_dbg(lp->ndev, "phy_addr 0x%x, phy_id 0x%08x\n", - lp->phy_dev->addr, lp->phy_dev->phy_id); - - netdev_dbg(lp->ndev, "attach [%s] phy driver\n", - lp->phy_dev->drv->name); - } - return 0; } diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 34ce7dce8c9d..49544c0fa6a7 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1159,8 +1159,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) slave->data->phy_id, slave->slave_num); slave->phy = NULL; } else { - dev_info(priv->dev, "phy found : id is : 0x%x\n", - slave->phy->phy_id); + phy_attached_info(slave->phy); + phy_start(slave->phy); /* Configure GMII_SEL register */ diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 5a40b0256327..5d9abedd6b75 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1644,10 +1644,7 @@ static int emac_dev_open(struct net_device *ndev) priv->speed = 0; priv->duplex = ~0; - dev_info(emac_dev, "attached PHY driver [%s] " - "(mii_bus:phy_addr=%s, id=%x)\n", - priv->phydev->drv->name, phydev_name(priv->phydev), - priv->phydev->phy_id); + phy_attached_info(priv->phydev); } if (!priv->phydev) { diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index 8df6072ac78d..8fd5e0ba718c 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -638,10 +638,8 @@ static int tc_mii_probe(struct net_device *dev) printk(KERN_ERR "%s: Could not attach to PHY\n", dev->name); return PTR_ERR(phydev); } - printk(KERN_INFO "%s: attached PHY driver [%s] " - "(mii_bus:phy_addr=%s, id=%x)\n", - dev->name, phydev->drv->name, phydev_name(phydev), - phydev->phy_id); + + phy_attached_info(phydev); /* mask with MAC supported features */ phydev->supported &= PHY_BASIC_FEATURES; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0f179709a289..68fe5738daef 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -607,6 +607,33 @@ int phy_init_hw(struct phy_device *phydev) } EXPORT_SYMBOL(phy_init_hw); +void phy_attached_info(struct phy_device *phydev) +{ + phy_attached_print(phydev, NULL); +} +EXPORT_SYMBOL(phy_attached_info); + +#define ATTACHED_FMT "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)" +void phy_attached_print(struct phy_device *phydev, const char *fmt, ...) +{ + if (!fmt) { + dev_info(&phydev->dev, ATTACHED_FMT "\n", + phydev->drv->name, phydev_name(phydev), + phydev->irq); + } else { + va_list ap; + + dev_info(&phydev->dev, ATTACHED_FMT, + phydev->drv->name, phydev_name(phydev), + phydev->irq); + + va_start(ap, fmt); + vprintk(fmt, ap); + va_end(ap); + } +} +EXPORT_SYMBOL(phy_attached_print); + /** * phy_attach_direct - attach a network device to a given PHY device pointer * @dev: network device to attach diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c index b939c4b5f229..cbc25b7e70a2 100644 --- a/drivers/staging/netlogic/xlr_net.c +++ b/drivers/staging/netlogic/xlr_net.c @@ -854,8 +854,7 @@ static int xlr_mii_probe(struct xlr_net_priv *priv) | ADVERTISED_MII); phydev->advertising = phydev->supported; - pr_info("attached PHY driver [%s] (mii_bus:phy_addr=%s\n", - phydev->drv->name, phydev_name(phydev)); + phy_attached_info(phydev); return 0; } diff --git a/include/linux/phy.h b/include/linux/phy.h index 08198ce98773..ecbf6382ba29 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -16,6 +16,7 @@ #ifndef __PHY_H #define __PHY_H +#include #include #include #include @@ -785,6 +786,9 @@ static inline const char *phydev_name(const struct phy_device *phydev) return dev_name(&phydev->dev); } +void phy_attached_print(struct phy_device *phydev, const char *fmt, ...) + __printf(2, 3); +void phy_attached_info(struct phy_device *phydev); int genphy_config_init(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 1e9e9424a33d..5f45e68b52dc 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1080,11 +1080,10 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, netdev_err(slave_dev, "failed to connect to port %d: %d\n", p->port, ret); return ret; } - } else { - netdev_info(slave_dev, "attached PHY at address %d [%s]\n", - p->phy->addr, p->phy->drv->name); } + phy_attached_info(p->phy); + return 0; } -- cgit v1.2.3-71-gd317 From e7f4dc3536a40097f95103ddf98dd55b3a980f5b Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:15 +0100 Subject: mdio: Move allocation of interrupts into core Have mdio_alloc() create the array of interrupt numbers, and initialize it to POLLING. This is what most MDIO drivers want, so allowing code to be removed from the drivers. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- arch/powerpc/platforms/82xx/ep8248e.c | 10 +--------- arch/powerpc/platforms/pasemi/gpio_mdio.c | 3 --- drivers/net/ethernet/8390/ax88796.c | 15 ++------------- drivers/net/ethernet/adi/bfin_mac.c | 11 +---------- drivers/net/ethernet/aeroflex/greth.c | 5 ----- drivers/net/ethernet/aeroflex/greth.h | 1 - drivers/net/ethernet/agere/et131x.c | 13 +------------ drivers/net/ethernet/altera/altera_tse_main.c | 13 +------------ drivers/net/ethernet/amd/au1000_eth.c | 7 ------- drivers/net/ethernet/broadcom/b44.c | 14 +------------- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 9 --------- drivers/net/ethernet/broadcom/bgmac.c | 15 ++------------- drivers/net/ethernet/broadcom/genet/bcmmii.c | 10 ---------- drivers/net/ethernet/broadcom/sb1250-mac.c | 4 ---- drivers/net/ethernet/broadcom/tg3.c | 4 ---- drivers/net/ethernet/broadcom/tg3.h | 1 - drivers/net/ethernet/cadence/macb.c | 14 +------------- drivers/net/ethernet/dnet.c | 12 +----------- drivers/net/ethernet/ethoc.c | 14 +------------- drivers/net/ethernet/faraday/ftgmac100.c | 6 ------ drivers/net/ethernet/freescale/fec_main.c | 16 ++-------------- drivers/net/ethernet/freescale/fec_mpc52xx_phy.c | 4 ---- drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c | 10 +--------- drivers/net/ethernet/freescale/fs_enet/mii-fec.c | 10 +--------- drivers/net/ethernet/freescale/fsl_pq_mdio.c | 2 -- drivers/net/ethernet/hisilicon/hns_mdio.c | 5 ----- drivers/net/ethernet/lantiq_etop.c | 14 +------------- drivers/net/ethernet/marvell/mvmdio.c | 10 +--------- drivers/net/ethernet/nxp/lpc_eth.c | 13 +------------ drivers/net/ethernet/rdc/r6040.c | 14 +------------- drivers/net/ethernet/renesas/sh_eth.c | 12 +----------- drivers/net/ethernet/smsc/smsc911x.c | 6 ++---- drivers/net/ethernet/smsc/smsc9420.c | 6 +----- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 14 ++++---------- drivers/net/ethernet/synopsys/dwc_eth_qos.c | 15 ++------------- drivers/net/ethernet/ti/cpmac.c | 3 --- drivers/net/ethernet/toshiba/tc35815.c | 14 +------------- drivers/net/ethernet/xilinx/ll_temac.h | 1 - drivers/net/ethernet/xilinx/ll_temac_mdio.c | 2 -- drivers/net/ethernet/xilinx/xilinx_axienet.h | 2 -- drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c | 2 -- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 4 ---- drivers/net/phy/fixed_phy.c | 4 +--- drivers/net/phy/mdio-bcm-unimac.c | 11 +---------- drivers/net/phy/mdio-gpio.c | 2 +- drivers/net/phy/mdio-moxart.c | 7 ------- drivers/net/phy/mdio-mux.c | 3 +-- drivers/net/phy/mdio-octeon.c | 2 -- drivers/net/phy/mdio-sun4i.c | 12 +----------- drivers/net/phy/mdio_bus.c | 5 +++++ drivers/net/usb/ax88172a.c | 14 ++------------ drivers/net/usb/lan78xx.c | 11 +---------- drivers/of/of_mdio.c | 7 +------ include/linux/phy.h | 6 +++--- 54 files changed, 48 insertions(+), 396 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c index a0cb8bd41958..6781bda117be 100644 --- a/arch/powerpc/platforms/82xx/ep8248e.c +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -131,23 +131,15 @@ static int ep8248e_mdio_probe(struct platform_device *ofdev) if (!bus) return -ENOMEM; - bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); - if (bus->irq == NULL) { - ret = -ENOMEM; - goto err_free_bus; - } - bus->name = "ep8248e-mdio-bitbang"; bus->parent = &ofdev->dev; snprintf(bus->id, MII_BUS_ID_SIZE, "%x", res.start); ret = of_mdiobus_register(bus, ofdev->dev.of_node); if (ret) - goto err_free_irq; + goto err_free_bus; return 0; -err_free_irq: - kfree(bus->irq); err_free_bus: free_mdio_bitbang(bus); return ret; diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c index ae3f47b25b18..ddf635000c6b 100644 --- a/arch/powerpc/platforms/pasemi/gpio_mdio.c +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -41,7 +41,6 @@ static void __iomem *gpio_regs; struct gpio_priv { int mdc_pin; int mdio_pin; - int mdio_irqs[PHY_MAX_ADDR]; }; #define MDC_PIN(bus) (((struct gpio_priv *)bus->priv)->mdc_pin) @@ -245,8 +244,6 @@ static int gpio_mdio_probe(struct platform_device *ofdev) snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", *prop); new_bus->priv = priv; - new_bus->irq = priv->mdio_irqs; - prop = of_get_property(np, "mdc-pin", NULL); priv->mdc_pin = *prop; diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index 90b540a4a561..c89b9aeeceb6 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -627,7 +627,7 @@ static int ax_mii_init(struct net_device *dev) struct platform_device *pdev = to_platform_device(dev->dev.parent); struct ei_device *ei_local = netdev_priv(dev); struct ax_device *ax = to_ax_dev(dev); - int err, i; + int err; ax->bb_ctrl.ops = &bb_ops; ax->addr_memr = ei_local->mem + AX_MEMR; @@ -642,23 +642,12 @@ static int ax_mii_init(struct net_device *dev) snprintf(ax->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", pdev->name, pdev->id); - ax->mii_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); - if (!ax->mii_bus->irq) { - err = -ENOMEM; - goto out_free_mdio_bitbang; - } - - for (i = 0; i < PHY_MAX_ADDR; i++) - ax->mii_bus->irq[i] = PHY_POLL; - err = mdiobus_register(ax->mii_bus); if (err) - goto out_free_irq; + goto out_free_mdio_bitbang; return 0; - out_free_irq: - kfree(ax->mii_bus->irq); out_free_mdio_bitbang: free_mdio_bitbang(ax->mii_bus); out: diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index ed5c78cb7239..62862744c870 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -1838,12 +1838,6 @@ static int bfin_mii_bus_probe(struct platform_device *pdev) snprintf(miibus->id, MII_BUS_ID_SIZE, "%s-%x", pdev->name, pdev->id); - miibus->irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL); - if (!miibus->irq) - goto out_err_irq_alloc; - - for (i = rc; i < PHY_MAX_ADDR; ++i) - miibus->irq[i] = PHY_POLL; rc = clamp(mii_bus_pd->phydev_number, 0, PHY_MAX_ADDR); if (rc != mii_bus_pd->phydev_number) @@ -1862,14 +1856,12 @@ static int bfin_mii_bus_probe(struct platform_device *pdev) rc = mdiobus_register(miibus); if (rc) { dev_err(&pdev->dev, "Cannot register MDIO bus!\n"); - goto out_err_mdiobus_register; + goto out_err_alloc; } platform_set_drvdata(pdev, miibus); return 0; -out_err_mdiobus_register: - kfree(miibus->irq); out_err_irq_alloc: mdiobus_free(miibus); out_err_alloc: @@ -1885,7 +1877,6 @@ static int bfin_mii_bus_remove(struct platform_device *pdev) dev_get_platdata(&pdev->dev); mdiobus_unregister(miibus); - kfree(miibus->irq); mdiobus_free(miibus); peripheral_free_list(mii_bus_pd->mac_peripherals); diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index 20bf55dbd76f..b873531c5575 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -1337,11 +1337,6 @@ static int greth_mdio_init(struct greth_private *greth) greth->mdio->write = greth_mdio_write; greth->mdio->priv = greth; - greth->mdio->irq = greth->mdio_irqs; - - for (phy = 0; phy < PHY_MAX_ADDR; phy++) - greth->mdio->irq[phy] = PHY_POLL; - ret = mdiobus_register(greth->mdio); if (ret) { goto error; diff --git a/drivers/net/ethernet/aeroflex/greth.h b/drivers/net/ethernet/aeroflex/greth.h index ae16ac94daf8..92dd918e4a83 100644 --- a/drivers/net/ethernet/aeroflex/greth.h +++ b/drivers/net/ethernet/aeroflex/greth.h @@ -125,7 +125,6 @@ struct greth_private { struct phy_device *phy; struct mii_bus *mdio; - int mdio_irqs[PHY_MAX_ADDR]; unsigned int link; unsigned int speed; unsigned int duplex; diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index 825da3af806a..f29d45eea1d9 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -3325,7 +3325,6 @@ static void et131x_pci_remove(struct pci_dev *pdev) netif_napi_del(&adapter->napi); phy_disconnect(adapter->phydev); mdiobus_unregister(adapter->mii_bus); - kfree(adapter->mii_bus->irq); mdiobus_free(adapter->mii_bus); et131x_adapter_memory_free(adapter); @@ -3944,7 +3943,6 @@ static int et131x_pci_setup(struct pci_dev *pdev, struct net_device *netdev; struct et131x_adapter *adapter; int rc; - int ii; rc = pci_enable_device(pdev); if (rc < 0) { @@ -4034,18 +4032,11 @@ static int et131x_pci_setup(struct pci_dev *pdev, adapter->mii_bus->priv = netdev; adapter->mii_bus->read = et131x_mdio_read; adapter->mii_bus->write = et131x_mdio_write; - adapter->mii_bus->irq = kmalloc_array(PHY_MAX_ADDR, sizeof(int), - GFP_KERNEL); - if (!adapter->mii_bus->irq) - goto err_mdio_free; - - for (ii = 0; ii < PHY_MAX_ADDR; ii++) - adapter->mii_bus->irq[ii] = PHY_POLL; rc = mdiobus_register(adapter->mii_bus); if (rc < 0) { dev_err(&pdev->dev, "failed to register MII bus\n"); - goto err_mdio_free_irq; + goto err_mdio_free; } rc = et131x_mii_probe(netdev); @@ -4085,8 +4076,6 @@ err_phy_disconnect: phy_disconnect(adapter->phydev); err_mdio_unregister: mdiobus_unregister(adapter->mii_bus); -err_mdio_free_irq: - kfree(adapter->mii_bus->irq); err_mdio_free: mdiobus_free(adapter->mii_bus); err_mem_free: diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index fe644823ceaf..10d51e8aefe0 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -131,7 +131,6 @@ static int altera_tse_mdio_create(struct net_device *dev, unsigned int id) { struct altera_tse_private *priv = netdev_priv(dev); int ret; - int i; struct device_node *mdio_node = NULL; struct mii_bus *mdio = NULL; struct device_node *child_node = NULL; @@ -161,14 +160,6 @@ static int altera_tse_mdio_create(struct net_device *dev, unsigned int id) mdio->write = &altera_tse_mdio_write; snprintf(mdio->id, MII_BUS_ID_SIZE, "%s-%u", mdio->name, id); - mdio->irq = kcalloc(PHY_MAX_ADDR, sizeof(int), GFP_KERNEL); - if (mdio->irq == NULL) { - ret = -ENOMEM; - goto out_free_mdio; - } - for (i = 0; i < PHY_MAX_ADDR; i++) - mdio->irq[i] = PHY_POLL; - mdio->priv = dev; mdio->parent = priv->device; @@ -176,7 +167,7 @@ static int altera_tse_mdio_create(struct net_device *dev, unsigned int id) if (ret != 0) { netdev_err(dev, "Cannot register MDIO bus %s\n", mdio->id); - goto out_free_mdio_irq; + goto out_free_mdio; } if (netif_msg_drv(priv)) @@ -184,8 +175,6 @@ static int altera_tse_mdio_create(struct net_device *dev, unsigned int id) priv->mdio = mdio; return 0; -out_free_mdio_irq: - kfree(mdio->irq); out_free_mdio: mdiobus_free(mdio); mdio = NULL; diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index 114618d357d5..982b581d3484 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -1291,14 +1291,7 @@ static int au1000_probe(struct platform_device *pdev) aup->mii_bus->name = "au1000_eth_mii"; snprintf(aup->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", pdev->name, aup->mac_id); - aup->mii_bus->irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL); - if (aup->mii_bus->irq == NULL) { - err = -ENOMEM; - goto err_out; - } - for (i = 0; i < PHY_MAX_ADDR; ++i) - aup->mii_bus->irq[i] = PHY_POLL; /* if known, set corresponding PHY IRQs */ if (aup->phy_static_config) if (aup->phy_irq && aup->phy_busid == aup->mac_id) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index e7d9308d6760..4d08bc02c7a8 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2263,21 +2263,13 @@ static int b44_register_phy_one(struct b44 *bp) mii_bus->parent = sdev->dev; mii_bus->phy_mask = ~(1 << bp->phy_addr); snprintf(mii_bus->id, MII_BUS_ID_SIZE, "%x", instance); - mii_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); - if (!mii_bus->irq) { - dev_err(sdev->dev, "mii_bus irq allocation failed\n"); - err = -ENOMEM; - goto err_out_mdiobus; - } - - memset(mii_bus->irq, PHY_POLL, sizeof(int) * PHY_MAX_ADDR); bp->mii_bus = mii_bus; err = mdiobus_register(mii_bus); if (err) { dev_err(sdev->dev, "failed to register MII bus\n"); - goto err_out_mdiobus_irq; + goto err_out_mdiobus; } if (!bp->mii_bus->phy_map[bp->phy_addr] && @@ -2322,9 +2314,6 @@ static int b44_register_phy_one(struct b44 *bp) err_out_mdiobus_unregister: mdiobus_unregister(mii_bus); -err_out_mdiobus_irq: - kfree(mii_bus->irq); - err_out_mdiobus: mdiobus_free(mii_bus); @@ -2338,7 +2327,6 @@ static void b44_unregister_phy_one(struct b44 *bp) phy_disconnect(bp->phydev); mdiobus_unregister(mii_bus); - kfree(mii_bus->irq); mdiobus_free(mii_bus); } diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 55f31faa09e6..87c6b5bdd616 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1848,17 +1848,8 @@ static int bcm_enet_probe(struct platform_device *pdev) * if a slave is not present on hw */ bus->phy_mask = ~(1 << priv->phy_id); - bus->irq = devm_kzalloc(&pdev->dev, sizeof(int) * PHY_MAX_ADDR, - GFP_KERNEL); - if (!bus->irq) { - ret = -ENOMEM; - goto out_free_mdio; - } - if (priv->has_phy_interrupt) bus->irq[priv->phy_id] = priv->phy_interrupt; - else - bus->irq[priv->phy_id] = PHY_POLL; ret = mdiobus_register(bus); if (ret) { diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 28f7610b03fe..c7798d360512 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1471,7 +1471,7 @@ static int bgmac_mii_register(struct bgmac *bgmac) struct mii_bus *mii_bus; struct phy_device *phy_dev; char bus_id[MII_BUS_ID_SIZE + 3]; - int i, err = 0; + int err = 0; if (ci->id == BCMA_CHIP_ID_BCM4707 || ci->id == BCMA_CHIP_ID_BCM53018) @@ -1490,18 +1490,10 @@ static int bgmac_mii_register(struct bgmac *bgmac) mii_bus->parent = &bgmac->core->dev; mii_bus->phy_mask = ~(1 << bgmac->phyaddr); - mii_bus->irq = kmalloc_array(PHY_MAX_ADDR, sizeof(int), GFP_KERNEL); - if (!mii_bus->irq) { - err = -ENOMEM; - goto err_free_bus; - } - for (i = 0; i < PHY_MAX_ADDR; i++) - mii_bus->irq[i] = PHY_POLL; - err = mdiobus_register(mii_bus); if (err) { bgmac_err(bgmac, "Registration of mii bus failed\n"); - goto err_free_irq; + goto err_free_bus; } bgmac->mii_bus = mii_bus; @@ -1522,8 +1514,6 @@ static int bgmac_mii_register(struct bgmac *bgmac) err_unregister_bus: mdiobus_unregister(mii_bus); -err_free_irq: - kfree(mii_bus->irq); err_free_bus: mdiobus_free(mii_bus); return err; @@ -1534,7 +1524,6 @@ static void bgmac_mii_unregister(struct bgmac *bgmac) struct mii_bus *mii_bus = bgmac->mii_bus; mdiobus_unregister(mii_bus); - kfree(mii_bus->irq); mdiobus_free(mii_bus); } diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 8bdfe53754ba..4523acd8c1c2 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -402,8 +402,6 @@ int bcmgenet_mii_probe(struct net_device *dev) */ if (priv->internal_phy) priv->mii_bus->irq[phydev->addr] = PHY_IGNORE_INTERRUPT; - else - priv->mii_bus->irq[phydev->addr] = PHY_POLL; return 0; } @@ -477,12 +475,6 @@ static int bcmgenet_mii_alloc(struct bcmgenet_priv *priv) snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d", priv->pdev->name, priv->pdev->id); - bus->irq = kcalloc(PHY_MAX_ADDR, sizeof(int), GFP_KERNEL); - if (!bus->irq) { - mdiobus_free(priv->mii_bus); - return -ENOMEM; - } - return 0; } @@ -648,7 +640,6 @@ int bcmgenet_mii_init(struct net_device *dev) out: of_node_put(priv->phy_dn); mdiobus_unregister(priv->mii_bus); - kfree(priv->mii_bus->irq); mdiobus_free(priv->mii_bus); return ret; } @@ -659,6 +650,5 @@ void bcmgenet_mii_exit(struct net_device *dev) of_node_put(priv->phy_dn); mdiobus_unregister(priv->mii_bus); - kfree(priv->mii_bus->irq); mdiobus_free(priv->mii_bus); } diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index 2470c6084c67..68a363708d27 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -238,7 +238,6 @@ struct sbmac_softc { struct napi_struct napi; struct phy_device *phy_dev; /* the associated PHY device */ struct mii_bus *mii_bus; /* the MII bus */ - int phy_irq[PHY_MAX_ADDR]; spinlock_t sbm_lock; /* spin lock */ int sbm_devflags; /* current device flags */ @@ -2250,9 +2249,6 @@ static int sbmac_init(struct platform_device *pldev, long long base) sc->mii_bus->priv = sc; sc->mii_bus->read = sbmac_mii_read; sc->mii_bus->write = sbmac_mii_write; - sc->mii_bus->irq = sc->phy_irq; - for (i = 0; i < PHY_MAX_ADDR; ++i) - sc->mii_bus->irq[i] = SBMAC_PHY_INT; sc->mii_bus->parent = &pldev->dev; /* diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 07c067590caa..04e7d0d0e5b1 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -1538,10 +1538,6 @@ static int tg3_mdio_init(struct tg3 *tp) tp->mdio_bus->read = &tg3_mdio_read; tp->mdio_bus->write = &tg3_mdio_write; tp->mdio_bus->phy_mask = ~(1 << tp->phy_addr); - tp->mdio_bus->irq = &tp->mdio_irq[0]; - - for (i = 0; i < PHY_MAX_ADDR; i++) - tp->mdio_bus->irq[i] = PHY_POLL; /* The bus registration will look for all the PHYs on the mdio bus. * Unfortunately, it does not ensure the PHY is powered up before diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index 31c9f8295953..3b5e98ecba00 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -3254,7 +3254,6 @@ struct tg3 { int pcie_readrq; struct mii_bus *mdio_bus; - int mdio_irq[PHY_MAX_ADDR]; int old_link; u8 phy_addr; diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 98df33b7a395..eb1397484eef 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -441,12 +441,6 @@ static int macb_mii_init(struct macb *bp) bp->mii_bus->parent = &bp->dev->dev; pdata = dev_get_platdata(&bp->pdev->dev); - bp->mii_bus->irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL); - if (!bp->mii_bus->irq) { - err = -ENOMEM; - goto err_out_free_mdiobus; - } - dev_set_drvdata(&bp->dev->dev, bp->mii_bus); np = bp->pdev->dev.of_node; @@ -471,9 +465,6 @@ static int macb_mii_init(struct macb *bp) goto err_out_unregister_bus; } } else { - for (i = 0; i < PHY_MAX_ADDR; i++) - bp->mii_bus->irq[i] = PHY_POLL; - if (pdata) bp->mii_bus->phy_mask = pdata->phy_mask; @@ -481,7 +472,7 @@ static int macb_mii_init(struct macb *bp) } if (err) - goto err_out_free_mdio_irq; + goto err_out_free_mdiobus; err = macb_mii_probe(bp->dev); if (err) @@ -491,8 +482,6 @@ static int macb_mii_init(struct macb *bp) err_out_unregister_bus: mdiobus_unregister(bp->mii_bus); -err_out_free_mdio_irq: - kfree(bp->mii_bus->irq); err_out_free_mdiobus: mdiobus_free(bp->mii_bus); err_out: @@ -2980,7 +2969,6 @@ static int macb_remove(struct platform_device *pdev) if (bp->phy_dev) phy_disconnect(bp->phy_dev); mdiobus_unregister(bp->mii_bus); - kfree(bp->mii_bus->irq); mdiobus_free(bp->mii_bus); /* Shutdown the PHY if there is a GPIO reset */ diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c index 6557460cf028..b69a9eacc531 100644 --- a/drivers/net/ethernet/dnet.c +++ b/drivers/net/ethernet/dnet.c @@ -302,7 +302,7 @@ static int dnet_mii_probe(struct net_device *dev) static int dnet_mii_init(struct dnet *bp) { - int err, i; + int err; bp->mii_bus = mdiobus_alloc(); if (bp->mii_bus == NULL) @@ -317,16 +317,6 @@ static int dnet_mii_init(struct dnet *bp) bp->mii_bus->priv = bp; - bp->mii_bus->irq = devm_kmalloc(&bp->pdev->dev, - sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); - if (!bp->mii_bus->irq) { - err = -ENOMEM; - goto err_out; - } - - for (i = 0; i < PHY_MAX_ADDR; i++) - bp->mii_bus->irq[i] = PHY_POLL; - if (mdiobus_register(bp->mii_bus)) { err = -ENXIO; goto err_out; diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index ff665493ca97..c028b299ab3f 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -1015,7 +1015,6 @@ static int ethoc_probe(struct platform_device *pdev) struct resource *mmio = NULL; struct resource *mem = NULL; struct ethoc *priv = NULL; - unsigned int phy; int num_bd; int ret = 0; bool random_mac = false; @@ -1206,19 +1205,10 @@ static int ethoc_probe(struct platform_device *pdev) priv->mdio->write = ethoc_mdio_write; priv->mdio->priv = priv; - priv->mdio->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); - if (!priv->mdio->irq) { - ret = -ENOMEM; - goto free_mdio; - } - - for (phy = 0; phy < PHY_MAX_ADDR; phy++) - priv->mdio->irq[phy] = PHY_POLL; - ret = mdiobus_register(priv->mdio); if (ret) { dev_err(&netdev->dev, "failed to register MDIO bus\n"); - goto free_mdio; + goto free; } ret = ethoc_mdio_probe(netdev); @@ -1250,8 +1240,6 @@ error2: netif_napi_del(&priv->napi); error: mdiobus_unregister(priv->mdio); -free_mdio: - kfree(priv->mdio->irq); mdiobus_free(priv->mdio); free: if (priv->clk) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index c2e2ac6a0313..8f3f2cf0dcbf 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -71,7 +71,6 @@ struct ftgmac100 { struct napi_struct napi; struct mii_bus *mii_bus; - int phy_irq[PHY_MAX_ADDR]; struct phy_device *phydev; int old_speed; }; @@ -1188,7 +1187,6 @@ static int ftgmac100_probe(struct platform_device *pdev) struct net_device *netdev; struct ftgmac100 *priv; int err; - int i; if (!pdev) return -ENODEV; @@ -1257,10 +1255,6 @@ static int ftgmac100_probe(struct platform_device *pdev) priv->mii_bus->priv = netdev; priv->mii_bus->read = ftgmac100_mdiobus_read; priv->mii_bus->write = ftgmac100_mdiobus_write; - priv->mii_bus->irq = priv->phy_irq; - - for (i = 0; i < PHY_MAX_ADDR; i++) - priv->mii_bus->irq[i] = PHY_POLL; err = mdiobus_register(priv->mii_bus); if (err) { diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index ceabe21b3b2c..da255fb4f1d5 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1983,7 +1983,7 @@ static int fec_enet_mii_init(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); struct device_node *node; - int err = -ENXIO, i; + int err = -ENXIO; u32 mii_speed, holdtime; /* @@ -2065,15 +2065,6 @@ static int fec_enet_mii_init(struct platform_device *pdev) fep->mii_bus->priv = fep; fep->mii_bus->parent = &pdev->dev; - fep->mii_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); - if (!fep->mii_bus->irq) { - err = -ENOMEM; - goto err_out_free_mdiobus; - } - - for (i = 0; i < PHY_MAX_ADDR; i++) - fep->mii_bus->irq[i] = PHY_POLL; - node = of_get_child_by_name(pdev->dev.of_node, "mdio"); if (node) { err = of_mdiobus_register(fep->mii_bus, node); @@ -2083,7 +2074,7 @@ static int fec_enet_mii_init(struct platform_device *pdev) } if (err) - goto err_out_free_mdio_irq; + goto err_out_free_mdiobus; mii_cnt++; @@ -2093,8 +2084,6 @@ static int fec_enet_mii_init(struct platform_device *pdev) return 0; -err_out_free_mdio_irq: - kfree(fep->mii_bus->irq); err_out_free_mdiobus: mdiobus_free(fep->mii_bus); err_out: @@ -2105,7 +2094,6 @@ static void fec_enet_mii_remove(struct fec_enet_private *fep) { if (--mii_cnt == 0) { mdiobus_unregister(fep->mii_bus); - kfree(fep->mii_bus->irq); mdiobus_free(fep->mii_bus); } } diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c b/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c index 1e647beaf989..b5497e308302 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c @@ -22,7 +22,6 @@ struct mpc52xx_fec_mdio_priv { struct mpc52xx_fec __iomem *regs; - int mdio_irqs[PHY_MAX_ADDR]; }; static int mpc52xx_fec_mdio_transfer(struct mii_bus *bus, int phy_id, @@ -83,9 +82,6 @@ static int mpc52xx_fec_mdio_probe(struct platform_device *of) bus->read = mpc52xx_fec_mdio_read; bus->write = mpc52xx_fec_mdio_write; - /* setup irqs */ - bus->irq = priv->mdio_irqs; - /* setup registers */ err = of_address_to_resource(np, 0, &res); if (err) diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c index 68a428de0bc0..1f015edcca22 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c +++ b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c @@ -172,23 +172,16 @@ static int fs_enet_mdio_probe(struct platform_device *ofdev) goto out_free_bus; new_bus->phy_mask = ~0; - new_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); - if (!new_bus->irq) { - ret = -ENOMEM; - goto out_unmap_regs; - } new_bus->parent = &ofdev->dev; platform_set_drvdata(ofdev, new_bus); ret = of_mdiobus_register(new_bus, ofdev->dev.of_node); if (ret) - goto out_free_irqs; + goto out_unmap_regs; return 0; -out_free_irqs: - kfree(new_bus->irq); out_unmap_regs: iounmap(bitbang->dir); out_free_bus: @@ -205,7 +198,6 @@ static int fs_enet_mdio_remove(struct platform_device *ofdev) struct bb_info *bitbang = bus->priv; mdiobus_unregister(bus); - kfree(bus->irq); free_mdio_bitbang(bus); iounmap(bitbang->dir); kfree(bitbang); diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c index 2be383e6d258..a89267b94352 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c @@ -166,23 +166,16 @@ static int fs_enet_mdio_probe(struct platform_device *ofdev) clrsetbits_be32(&fec->fecp->fec_mii_speed, 0x7E, fec->mii_speed); new_bus->phy_mask = ~0; - new_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); - if (!new_bus->irq) { - ret = -ENOMEM; - goto out_unmap_regs; - } new_bus->parent = &ofdev->dev; platform_set_drvdata(ofdev, new_bus); ret = of_mdiobus_register(new_bus, ofdev->dev.of_node); if (ret) - goto out_free_irqs; + goto out_unmap_regs; return 0; -out_free_irqs: - kfree(new_bus->irq); out_unmap_regs: iounmap(fec->fecp); out_res: @@ -200,7 +193,6 @@ static int fs_enet_mdio_remove(struct platform_device *ofdev) struct fec_info *fec = bus->priv; mdiobus_unregister(bus); - kfree(bus->irq); iounmap(fec->fecp); kfree(fec); mdiobus_free(bus); diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c index 40071dad1c57..622005abf859 100644 --- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c +++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c @@ -69,7 +69,6 @@ struct fsl_pq_mdio { struct fsl_pq_mdio_priv { void __iomem *map; struct fsl_pq_mii __iomem *regs; - int irqs[PHY_MAX_ADDR]; }; /* @@ -401,7 +400,6 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev) new_bus->read = &fsl_pq_mdio_read; new_bus->write = &fsl_pq_mdio_write; new_bus->reset = &fsl_pq_mdio_reset; - new_bus->irq = priv->irqs; err = of_address_to_resource(np, 0, &res); if (err < 0) { diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c index 37491c85bc42..58c96c412fe8 100644 --- a/drivers/net/ethernet/hisilicon/hns_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -463,11 +463,6 @@ static int hns_mdio_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "no syscon hisilicon,peri-c-subctrl\n"); mdio_dev->subctrl_vbase = NULL; } - new_bus->irq = devm_kcalloc(&pdev->dev, PHY_MAX_ADDR, - sizeof(int), GFP_KERNEL); - if (!new_bus->irq) - return -ENOMEM; - new_bus->parent = &pdev->dev; platform_set_drvdata(pdev, new_bus); diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 86238a5eaddf..fb61f7f96bb4 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -433,18 +433,9 @@ ltq_etop_mdio_init(struct net_device *dev) priv->mii_bus->name = "ltq_mii"; snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", priv->pdev->name, priv->pdev->id); - priv->mii_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); - if (!priv->mii_bus->irq) { - err = -ENOMEM; - goto err_out_free_mdiobus; - } - - for (i = 0; i < PHY_MAX_ADDR; ++i) - priv->mii_bus->irq[i] = PHY_POLL; - if (mdiobus_register(priv->mii_bus)) { err = -ENXIO; - goto err_out_free_mdio_irq; + goto err_out_free_mdiobus; } if (ltq_etop_mdio_probe(dev)) { @@ -455,8 +446,6 @@ ltq_etop_mdio_init(struct net_device *dev) err_out_unregister_bus: mdiobus_unregister(priv->mii_bus); -err_out_free_mdio_irq: - kfree(priv->mii_bus->irq); err_out_free_mdiobus: mdiobus_free(priv->mii_bus); err_out: @@ -470,7 +459,6 @@ ltq_etop_mdio_cleanup(struct net_device *dev) phy_disconnect(priv->phydev); mdiobus_unregister(priv->mii_bus); - kfree(priv->mii_bus->irq); mdiobus_free(priv->mii_bus); } diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index fc2fb25343f4..8982c882af1b 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -187,7 +187,7 @@ static int orion_mdio_probe(struct platform_device *pdev) struct resource *r; struct mii_bus *bus; struct orion_mdio_dev *dev; - int i, ret; + int ret; r = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!r) { @@ -207,14 +207,6 @@ static int orion_mdio_probe(struct platform_device *pdev) dev_name(&pdev->dev)); bus->parent = &pdev->dev; - bus->irq = devm_kmalloc_array(&pdev->dev, PHY_MAX_ADDR, sizeof(int), - GFP_KERNEL); - if (!bus->irq) - return -ENOMEM; - - for (i = 0; i < PHY_MAX_ADDR; i++) - bus->irq[i] = PHY_POLL; - dev = bus->priv; dev->regs = devm_ioremap(&pdev->dev, r->start, resource_size(r)); if (!dev->regs) { diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 024bc3675573..a9ce37f9bb2e 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -850,19 +850,10 @@ static int lpc_mii_init(struct netdata_local *pldat) pldat->mii_bus->priv = pldat; pldat->mii_bus->parent = &pldat->pdev->dev; - pldat->mii_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); - if (!pldat->mii_bus->irq) { - err = -ENOMEM; - goto err_out_1; - } - - for (i = 0; i < PHY_MAX_ADDR; i++) - pldat->mii_bus->irq[i] = PHY_POLL; - platform_set_drvdata(pldat->pdev, pldat->mii_bus); if (mdiobus_register(pldat->mii_bus)) - goto err_out_free_mdio_irq; + goto err_out_unregister_bus; if (lpc_mii_probe(pldat->ndev) != 0) goto err_out_unregister_bus; @@ -871,8 +862,6 @@ static int lpc_mii_init(struct netdata_local *pldat) err_out_unregister_bus: mdiobus_unregister(pldat->mii_bus); -err_out_free_mdio_irq: - kfree(pldat->mii_bus->irq); err_out_1: mdiobus_free(pldat->mii_bus); err_out: diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index 174dea787caf..6b541e57c96a 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -1075,7 +1075,6 @@ static int r6040_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) static int card_idx = -1; int bar = 0; u16 *adrp; - int i; pr_info("%s\n", version); @@ -1187,19 +1186,11 @@ static int r6040_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) lp->mii_bus->name = "r6040_eth_mii"; snprintf(lp->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", dev_name(&pdev->dev), card_idx); - lp->mii_bus->irq = kmalloc_array(PHY_MAX_ADDR, sizeof(int), GFP_KERNEL); - if (!lp->mii_bus->irq) { - err = -ENOMEM; - goto err_out_mdio; - } - - for (i = 0; i < PHY_MAX_ADDR; i++) - lp->mii_bus->irq[i] = PHY_POLL; err = mdiobus_register(lp->mii_bus); if (err) { dev_err(&pdev->dev, "failed to register MII bus\n"); - goto err_out_mdio_irq; + goto err_out_mdio; } err = r6040_mii_probe(dev); @@ -1218,8 +1209,6 @@ static int r6040_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) err_out_mdio_unregister: mdiobus_unregister(lp->mii_bus); -err_out_mdio_irq: - kfree(lp->mii_bus->irq); err_out_mdio: mdiobus_free(lp->mii_bus); err_out_unmap: @@ -1242,7 +1231,6 @@ static void r6040_remove_one(struct pci_dev *pdev) unregister_netdev(dev); mdiobus_unregister(lp->mii_bus); - kfree(lp->mii_bus->irq); mdiobus_free(lp->mii_bus); netif_napi_del(&lp->napi); pci_iounmap(pdev, lp->base); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 94581be64d65..c5ec57ce3ddc 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2859,7 +2859,7 @@ static int sh_mdio_release(struct sh_eth_private *mdp) static int sh_mdio_init(struct sh_eth_private *mdp, struct sh_eth_plat_data *pd) { - int ret, i; + int ret; struct bb_info *bitbang; struct platform_device *pdev = mdp->pdev; struct device *dev = &mdp->pdev->dev; @@ -2885,20 +2885,10 @@ static int sh_mdio_init(struct sh_eth_private *mdp, snprintf(mdp->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", pdev->name, pdev->id); - /* PHY IRQ */ - mdp->mii_bus->irq = devm_kmalloc_array(dev, PHY_MAX_ADDR, sizeof(int), - GFP_KERNEL); - if (!mdp->mii_bus->irq) { - ret = -ENOMEM; - goto out_free_bus; - } - /* register MDIO bus */ if (dev->of_node) { ret = of_mdiobus_register(mdp->mii_bus, dev->of_node); } else { - for (i = 0; i < PHY_MAX_ADDR; i++) - mdp->mii_bus->irq[i] = PHY_POLL; if (pd->phy_irq > 0) mdp->mii_bus->irq[pd->phy] = pd->phy_irq; diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 139b99b04099..c74e78dd989a 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1059,7 +1059,7 @@ static int smsc911x_mii_init(struct platform_device *pdev, struct net_device *dev) { struct smsc911x_data *pdata = netdev_priv(dev); - int err = -ENXIO, i; + int err = -ENXIO; pdata->mii_bus = mdiobus_alloc(); if (!pdata->mii_bus) { @@ -1073,9 +1073,7 @@ static int smsc911x_mii_init(struct platform_device *pdev, pdata->mii_bus->priv = pdata; pdata->mii_bus->read = smsc911x_mii_read; pdata->mii_bus->write = smsc911x_mii_write; - pdata->mii_bus->irq = pdata->phy_irq; - for (i = 0; i < PHY_MAX_ADDR; ++i) - pdata->mii_bus->irq[i] = PHY_POLL; + memcpy(pdata->mii_bus->irq, pdata->phy_irq, sizeof(pdata->mii_bus)); pdata->mii_bus->parent = &pdev->dev; diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c index fa8893a804f7..59bf4c353d50 100644 --- a/drivers/net/ethernet/smsc/smsc9420.c +++ b/drivers/net/ethernet/smsc/smsc9420.c @@ -78,7 +78,6 @@ struct smsc9420_pdata { struct phy_device *phy_dev; struct mii_bus *mii_bus; - int phy_irq[PHY_MAX_ADDR]; int last_duplex; int last_carrier; }; @@ -1188,7 +1187,7 @@ static int smsc9420_mii_probe(struct net_device *dev) static int smsc9420_mii_init(struct net_device *dev) { struct smsc9420_pdata *pd = netdev_priv(dev); - int err = -ENXIO, i; + int err = -ENXIO; pd->mii_bus = mdiobus_alloc(); if (!pd->mii_bus) { @@ -1201,9 +1200,6 @@ static int smsc9420_mii_init(struct net_device *dev) pd->mii_bus->priv = pd; pd->mii_bus->read = smsc9420_mii_read; pd->mii_bus->write = smsc9420_mii_write; - pd->mii_bus->irq = pd->phy_irq; - for (i = 0; i < PHY_MAX_ADDR; ++i) - pd->mii_bus->irq[i] = PHY_POLL; /* Mask all PHYs except ID 1 (internal) */ pd->mii_bus->phy_mask = ~(1 << 1); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 05ba84118f37..f0990eb9460f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -196,7 +196,6 @@ int stmmac_mdio_register(struct net_device *ndev) { int err = 0; struct mii_bus *new_bus; - int *irqlist; struct stmmac_priv *priv = netdev_priv(ndev); struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data; int addr, found; @@ -227,13 +226,8 @@ int stmmac_mdio_register(struct net_device *ndev) if (new_bus == NULL) return -ENOMEM; - if (mdio_bus_data->irqs) { - irqlist = mdio_bus_data->irqs; - } else { - for (addr = 0; addr < PHY_MAX_ADDR; addr++) - priv->mii_irq[addr] = PHY_POLL; - irqlist = priv->mii_irq; - } + if (mdio_bus_data->irqs) + memcpy(new_bus->irq, mdio_bus_data, sizeof(new_bus->irq)); #ifdef CONFIG_OF if (priv->device->of_node) @@ -247,7 +241,6 @@ int stmmac_mdio_register(struct net_device *ndev) snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s-%x", new_bus->name, priv->plat->bus_id); new_bus->priv = ndev; - new_bus->irq = irqlist; new_bus->phy_mask = mdio_bus_data->phy_mask; new_bus->parent = priv->device; @@ -271,7 +264,8 @@ int stmmac_mdio_register(struct net_device *ndev) */ if ((mdio_bus_data->irqs == NULL) && (mdio_bus_data->probed_phy_irq > 0)) { - irqlist[addr] = mdio_bus_data->probed_phy_irq; + new_bus->irq[addr] = + mdio_bus_data->probed_phy_irq; phydev->irq = mdio_bus_data->probed_phy_irq; } diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index b25ee370254a..6dbb681912f2 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -1220,7 +1220,7 @@ static void dwceqos_enable_mmc_interrupt(struct net_local *lp) static int dwceqos_mii_init(struct net_local *lp) { - int ret = -ENXIO, i; + int ret = -ENXIO; struct resource res; struct device_node *mdionode; @@ -1241,24 +1241,14 @@ static int dwceqos_mii_init(struct net_local *lp) lp->mii_bus->priv = lp; lp->mii_bus->parent = &lp->ndev->dev; - lp->mii_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); - if (!lp->mii_bus->irq) { - ret = -ENOMEM; - goto err_out_free_mdiobus; - } - - for (i = 0; i < PHY_MAX_ADDR; i++) - lp->mii_bus->irq[i] = PHY_POLL; of_address_to_resource(lp->pdev->dev.of_node, 0, &res); snprintf(lp->mii_bus->id, MII_BUS_ID_SIZE, "%.8llx", (unsigned long long)res.start); if (of_mdiobus_register(lp->mii_bus, mdionode)) - goto err_out_free_mdio_irq; + goto err_out_free_mdiobus; return 0; -err_out_free_mdio_irq: - kfree(lp->mii_bus->irq); err_out_free_mdiobus: mdiobus_free(lp->mii_bus); err_out: @@ -2977,7 +2967,6 @@ static int dwceqos_remove(struct platform_device *pdev) if (lp->phy_dev) phy_disconnect(lp->phy_dev); mdiobus_unregister(lp->mii_bus); - kfree(lp->mii_bus->irq); mdiobus_free(lp->mii_bus); unregister_netdev(ndev); diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index 77d26fe286c0..0b483301767d 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -316,8 +316,6 @@ static int cpmac_mdio_reset(struct mii_bus *bus) return 0; } -static int mii_irqs[PHY_MAX_ADDR] = { PHY_POLL, }; - static struct mii_bus *cpmac_mii; static void cpmac_set_multicast_list(struct net_device *dev) @@ -1226,7 +1224,6 @@ int cpmac_init(void) cpmac_mii->read = cpmac_mdio_read; cpmac_mii->write = cpmac_mdio_write; cpmac_mii->reset = cpmac_mdio_reset; - cpmac_mii->irq = mii_irqs; cpmac_mii->priv = ioremap(AR7_REGS_MDIO, 256); diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index 8fd5e0ba718c..fed5e3dfbc8f 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -682,18 +682,9 @@ static int tc_mii_init(struct net_device *dev) (lp->pci_dev->bus->number << 8) | lp->pci_dev->devfn); lp->mii_bus->priv = dev; lp->mii_bus->parent = &lp->pci_dev->dev; - lp->mii_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); - if (!lp->mii_bus->irq) { - err = -ENOMEM; - goto err_out_free_mii_bus; - } - - for (i = 0; i < PHY_MAX_ADDR; i++) - lp->mii_bus->irq[i] = PHY_POLL; - err = mdiobus_register(lp->mii_bus); if (err) - goto err_out_free_mdio_irq; + goto err_out_free_mii_bus; err = tc_mii_probe(dev); if (err) goto err_out_unregister_bus; @@ -701,8 +692,6 @@ static int tc_mii_init(struct net_device *dev) err_out_unregister_bus: mdiobus_unregister(lp->mii_bus); -err_out_free_mdio_irq: - kfree(lp->mii_bus->irq); err_out_free_mii_bus: mdiobus_free(lp->mii_bus); err_out: @@ -880,7 +869,6 @@ static void tc35815_remove_one(struct pci_dev *pdev) phy_disconnect(lp->phy_dev); mdiobus_unregister(lp->mii_bus); - kfree(lp->mii_bus->irq); mdiobus_free(lp->mii_bus); unregister_netdev(dev); free_netdev(dev); diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h index 522abe2ff25a..902457e43628 100644 --- a/drivers/net/ethernet/xilinx/ll_temac.h +++ b/drivers/net/ethernet/xilinx/ll_temac.h @@ -337,7 +337,6 @@ struct temac_local { /* MDIO bus data */ struct mii_bus *mii_bus; /* MII bus reference */ - int mdio_irqs[PHY_MAX_ADDR]; /* IRQs table for MDIO bus */ /* IO registers, dma functions and IRQs */ void __iomem *regs; diff --git a/drivers/net/ethernet/xilinx/ll_temac_mdio.c b/drivers/net/ethernet/xilinx/ll_temac_mdio.c index 415de1eaf641..7714aff78b7d 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_mdio.c +++ b/drivers/net/ethernet/xilinx/ll_temac_mdio.c @@ -92,7 +92,6 @@ int temac_mdio_setup(struct temac_local *lp, struct device_node *np) bus->read = temac_mdio_read; bus->write = temac_mdio_write; bus->parent = lp->dev; - bus->irq = lp->mdio_irqs; /* preallocated IRQ table */ lp->mii_bus = bus; @@ -114,7 +113,6 @@ int temac_mdio_setup(struct temac_local *lp, struct device_node *np) void temac_mdio_teardown(struct temac_local *lp) { mdiobus_unregister(lp->mii_bus); - kfree(lp->mii_bus->irq); mdiobus_free(lp->mii_bus); lp->mii_bus = NULL; } diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 7cb9abac95c8..9ead4e269409 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -385,7 +385,6 @@ struct axidma_bd { * @phy_dev: Pointer to PHY device structure attached to the axienet_local * @phy_node: Pointer to device node structure * @mii_bus: Pointer to MII bus structure - * @mdio_irqs: IRQs table for MDIO bus required in mii_bus structure * @regs: Base address for the axienet_local device address space * @dma_regs: Base address for the axidma device address space * @dma_err_tasklet: Tasklet structure to process Axi DMA errors @@ -426,7 +425,6 @@ struct axienet_local { /* MDIO bus data */ struct mii_bus *mii_bus; /* MII bus reference */ - int mdio_irqs[PHY_MAX_ADDR]; /* IRQs table for MDIO bus */ /* IO registers, dma functions and IRQs */ void __iomem *regs; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c index 507bbb0355c2..63307ea97846 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c @@ -212,7 +212,6 @@ issue: bus->read = axienet_mdio_read; bus->write = axienet_mdio_write; bus->parent = lp->dev; - bus->irq = lp->mdio_irqs; /* preallocated IRQ table */ lp->mii_bus = bus; ret = of_mdiobus_register(bus, np1); @@ -232,7 +231,6 @@ issue: void axienet_mdio_teardown(struct axienet_local *lp) { mdiobus_unregister(lp->mii_bus); - kfree(lp->mii_bus->irq); mdiobus_free(lp->mii_bus); lp->mii_bus = NULL; } diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index cf468c87ce57..d1a0167dff84 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -114,7 +114,6 @@ * @phy_dev: pointer to the PHY device * @phy_node: pointer to the PHY device node * @mii_bus: pointer to the MII bus - * @mdio_irqs: IRQs table for MDIO bus * @last_link: last link status * @has_mdio: indicates whether MDIO is included in the HW */ @@ -135,7 +134,6 @@ struct net_local { struct device_node *phy_node; struct mii_bus *mii_bus; - int mdio_irqs[PHY_MAX_ADDR]; int last_link; bool has_mdio; @@ -852,7 +850,6 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) bus->read = xemaclite_mdio_read; bus->write = xemaclite_mdio_write; bus->parent = dev; - bus->irq = lp->mdio_irqs; /* preallocated IRQ table */ lp->mii_bus = bus; @@ -1196,7 +1193,6 @@ static int xemaclite_of_remove(struct platform_device *of_dev) /* Un-register the mii_bus, if configured */ if (lp->has_mdio) { mdiobus_unregister(lp->mii_bus); - kfree(lp->mii_bus->irq); mdiobus_free(lp->mii_bus); lp->mii_bus = NULL; } diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index e23bf5b90e17..0a1cde6803b0 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -27,7 +27,6 @@ #define MII_REGS_NUM 29 struct fixed_mdio_bus { - int irqs[PHY_MAX_ADDR]; struct mii_bus *mii_bus; struct list_head phys; }; @@ -256,7 +255,7 @@ int fixed_phy_add(unsigned int irq, int phy_addr, memset(fp->regs, 0xFF, sizeof(fp->regs[0]) * MII_REGS_NUM); - fmb->irqs[phy_addr] = irq; + fmb->mii_bus->irq[phy_addr] = irq; fp->addr = phy_addr; fp->status = *status; @@ -395,7 +394,6 @@ static int __init fixed_mdio_bus_init(void) fmb->mii_bus->parent = &pdev->dev; fmb->mii_bus->read = &fixed_mdio_read; fmb->mii_bus->write = &fixed_mdio_write; - fmb->mii_bus->irq = fmb->irqs; ret = mdiobus_register(fmb->mii_bus); if (ret) diff --git a/drivers/net/phy/mdio-bcm-unimac.c b/drivers/net/phy/mdio-bcm-unimac.c index 4bde5e728fe0..8c73b2e771dd 100644 --- a/drivers/net/phy/mdio-bcm-unimac.c +++ b/drivers/net/phy/mdio-bcm-unimac.c @@ -200,16 +200,10 @@ static int unimac_mdio_probe(struct platform_device *pdev) bus->reset = unimac_mdio_reset; snprintf(bus->id, MII_BUS_ID_SIZE, "%s", pdev->name); - bus->irq = kcalloc(PHY_MAX_ADDR, sizeof(int), GFP_KERNEL); - if (!bus->irq) { - ret = -ENOMEM; - goto out_mdio_free; - } - ret = of_mdiobus_register(bus, np); if (ret) { dev_err(&pdev->dev, "MDIO bus registration failed\n"); - goto out_mdio_irq; + goto out_mdio_free; } platform_set_drvdata(pdev, priv); @@ -218,8 +212,6 @@ static int unimac_mdio_probe(struct platform_device *pdev) return 0; -out_mdio_irq: - kfree(bus->irq); out_mdio_free: mdiobus_free(bus); return ret; @@ -230,7 +222,6 @@ static int unimac_mdio_remove(struct platform_device *pdev) struct unimac_mdio_priv *priv = platform_get_drvdata(pdev); mdiobus_unregister(priv->mii_bus); - kfree(priv->mii_bus->irq); mdiobus_free(priv->mii_bus); return 0; diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 95f51d7267b3..27ab63064f95 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -159,7 +159,7 @@ static struct mii_bus *mdio_gpio_bus_init(struct device *dev, new_bus->phy_mask = pdata->phy_mask; new_bus->phy_ignore_ta_mask = pdata->phy_ignore_ta_mask; - new_bus->irq = pdata->irqs; + memcpy(new_bus->irq, pdata->irqs, sizeof(new_bus->irq)); new_bus->parent = dev; if (new_bus->phy_mask == ~0) diff --git a/drivers/net/phy/mdio-moxart.c b/drivers/net/phy/mdio-moxart.c index f1fc51f655d9..5bb56d126693 100644 --- a/drivers/net/phy/mdio-moxart.c +++ b/drivers/net/phy/mdio-moxart.c @@ -130,13 +130,6 @@ static int moxart_mdio_probe(struct platform_device *pdev) snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d-mii", pdev->name, pdev->id); bus->parent = &pdev->dev; - bus->irq = devm_kzalloc(&pdev->dev, sizeof(int) * PHY_MAX_ADDR, - GFP_KERNEL); - if (!bus->irq) { - ret = -ENOMEM; - goto err_out_free_mdiobus; - } - /* Setting PHY_IGNORE_INTERRUPT here even if it has no effect, * of_mdiobus_register() sets these PHY_POLL. * Ideally, the interrupt from MAC controller could be used to diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c index 7f8e7662e28c..308ade0eb1b6 100644 --- a/drivers/net/phy/mdio-mux.c +++ b/drivers/net/phy/mdio-mux.c @@ -34,7 +34,6 @@ struct mdio_mux_child_bus { struct mdio_mux_parent_bus *parent; struct mdio_mux_child_bus *next; int bus_number; - int phy_irq[PHY_MAX_ADDR]; }; /* @@ -157,7 +156,7 @@ int mdio_mux_init(struct device *dev, break; } cb->mii_bus->priv = cb; - cb->mii_bus->irq = cb->phy_irq; + cb->mii_bus->name = "mdio_mux"; snprintf(cb->mii_bus->id, MII_BUS_ID_SIZE, "%x.%x", pb->parent_id, v); diff --git a/drivers/net/phy/mdio-octeon.c b/drivers/net/phy/mdio-octeon.c index 0d5da1312dd3..47d4f2f263d1 100644 --- a/drivers/net/phy/mdio-octeon.c +++ b/drivers/net/phy/mdio-octeon.c @@ -113,7 +113,6 @@ struct octeon_mdiobus { resource_size_t mdio_phys; resource_size_t regsize; enum octeon_mdiobus_mode mode; - int phy_irq[PHY_MAX_ADDR]; }; #ifdef CONFIG_CAVIUM_OCTEON_SOC @@ -306,7 +305,6 @@ static int octeon_mdiobus_probe(struct platform_device *pdev) oct_mdio_writeq(smi_en.u64, bus->register_base + SMI_EN); bus->mii_bus->priv = bus; - bus->mii_bus->irq = bus->phy_irq; bus->mii_bus->name = "mdio-octeon"; snprintf(bus->mii_bus->id, MII_BUS_ID_SIZE, "%llx", bus->register_base); bus->mii_bus->parent = &pdev->dev; diff --git a/drivers/net/phy/mdio-sun4i.c b/drivers/net/phy/mdio-sun4i.c index 15bc7f9ea224..f70522c35163 100644 --- a/drivers/net/phy/mdio-sun4i.c +++ b/drivers/net/phy/mdio-sun4i.c @@ -96,7 +96,7 @@ static int sun4i_mdio_probe(struct platform_device *pdev) struct mii_bus *bus; struct sun4i_mdio_data *data; struct resource *res; - int ret, i; + int ret; bus = mdiobus_alloc_size(sizeof(*data)); if (!bus) @@ -108,16 +108,6 @@ static int sun4i_mdio_probe(struct platform_device *pdev) snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mii", dev_name(&pdev->dev)); bus->parent = &pdev->dev; - bus->irq = devm_kzalloc(&pdev->dev, sizeof(int) * PHY_MAX_ADDR, - GFP_KERNEL); - if (!bus->irq) { - ret = -ENOMEM; - goto err_out_free_mdiobus; - } - - for (i = 0; i < PHY_MAX_ADDR; i++) - bus->irq[i] = PHY_POLL; - data = bus->priv; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); data->membase = devm_ioremap_resource(&pdev->dev, res); diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 88cb4592b6fb..05381d0f559c 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -51,6 +51,7 @@ struct mii_bus *mdiobus_alloc_size(size_t size) struct mii_bus *bus; size_t aligned_size = ALIGN(sizeof(*bus), NETDEV_ALIGN); size_t alloc_size; + int i; /* If we alloc extra space, it should be aligned */ if (size) @@ -65,6 +66,10 @@ struct mii_bus *mdiobus_alloc_size(size_t size) bus->priv = (void *)bus + aligned_size; } + /* Initialise the interrupts to polling */ + for (i = 0; i < PHY_MAX_ADDR; i++) + bus->irq[i] = PHY_POLL; + return bus; } EXPORT_SYMBOL(mdiobus_alloc_size); diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c index 5f18fcb8dcc7..224e7d82de6d 100644 --- a/drivers/net/usb/ax88172a.c +++ b/drivers/net/usb/ax88172a.c @@ -98,7 +98,7 @@ static void ax88172a_status(struct usbnet *dev, struct urb *urb) static int ax88172a_init_mdio(struct usbnet *dev) { struct ax88172a_private *priv = dev->driver_priv; - int ret, i; + int ret; priv->mdio = mdiobus_alloc(); if (!priv->mdio) { @@ -114,25 +114,15 @@ static int ax88172a_init_mdio(struct usbnet *dev) snprintf(priv->mdio->id, MII_BUS_ID_SIZE, "usb-%03d:%03d", dev->udev->bus->busnum, dev->udev->devnum); - priv->mdio->irq = kzalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); - if (!priv->mdio->irq) { - ret = -ENOMEM; - goto mfree; - } - for (i = 0; i < PHY_MAX_ADDR; i++) - priv->mdio->irq[i] = PHY_POLL; - ret = mdiobus_register(priv->mdio); if (ret) { netdev_err(dev->net, "Could not register MDIO bus\n"); - goto ifree; + goto mfree; } netdev_info(dev->net, "registered mdio bus %s\n", priv->mdio->id); return 0; -ifree: - kfree(priv->mdio->irq); mfree: mdiobus_free(priv->mdio); return ret; diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 226668ead0d8..1662b7b144a8 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1458,12 +1458,6 @@ static int lan78xx_mdio_init(struct lan78xx_net *dev) snprintf(dev->mdiobus->id, MII_BUS_ID_SIZE, "usb-%03d:%03d", dev->udev->bus->busnum, dev->udev->devnum); - dev->mdiobus->irq = kzalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); - if (!dev->mdiobus->irq) { - ret = -ENOMEM; - goto exit1; - } - /* handle our own interrupt */ for (i = 0; i < PHY_MAX_ADDR; i++) dev->mdiobus->irq[i] = PHY_IGNORE_INTERRUPT; @@ -1479,13 +1473,11 @@ static int lan78xx_mdio_init(struct lan78xx_net *dev) ret = mdiobus_register(dev->mdiobus); if (ret) { netdev_err(dev->net, "can't register MDIO bus\n"); - goto exit2; + goto exit1; } netdev_dbg(dev->net, "registered mdiobus bus %s\n", dev->mdiobus->id); return 0; -exit2: - kfree(dev->mdiobus->irq); exit1: mdiobus_free(dev->mdiobus); return ret; @@ -1494,7 +1486,6 @@ exit1: static void lan78xx_remove_mdio(struct lan78xx_net *dev) { mdiobus_unregister(dev->mdiobus); - kfree(dev->mdiobus->irq); mdiobus_free(dev->mdiobus); } diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index a87a868fed64..2f88ff4654da 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -127,17 +127,12 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) struct device_node *child; const __be32 *paddr; bool scanphys = false; - int addr, rc, i; + int addr, rc; /* Mask out all PHYs from auto probing. Instead the PHYs listed in * the device tree are populated after the bus has been registered */ mdio->phy_mask = ~0; - /* Clear all the IRQ properties */ - if (mdio->irq) - for (i=0; iirq[i] = PHY_POLL; - mdio->dev.of_node = np; /* Register the MDIO bus */ diff --git a/include/linux/phy.h b/include/linux/phy.h index ecbf6382ba29..a5473c9e19de 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -189,10 +189,10 @@ struct mii_bus { u32 phy_ignore_ta_mask; /* - * Pointer to an array of interrupts, each PHY's - * interrupt at the index matching its address + * An array of interrupts, each PHY's interrupt at the index + * matching its address */ - int *irq; + int irq[PHY_MAX_ADDR]; }; #define to_mii_bus(d) container_of(d, struct mii_bus, dev) -- cgit v1.2.3-71-gd317 From e5a03bfd873c29eb786655ef2e95e53ed242b404 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:16 +0100 Subject: phy: Add an mdio_device structure Not all devices attached to an MDIO bus are phys. So add an mdio_device structure to represent the generic parts of an mdio device, and place this structure into the phy_device. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/agere/et131x.c | 30 ++++++------ drivers/net/ethernet/altera/altera_tse_main.c | 2 +- drivers/net/ethernet/broadcom/b44.c | 2 +- drivers/net/ethernet/broadcom/genet/bcmmii.c | 2 +- drivers/net/ethernet/broadcom/sb1250-mac.c | 4 +- drivers/net/ethernet/freescale/fman/fman_dtsec.c | 6 +-- drivers/net/ethernet/freescale/fman/fman_memac.c | 6 +-- drivers/net/ethernet/freescale/fs_enet/mac-fec.c | 2 +- drivers/net/ethernet/freescale/gianfar.c | 4 +- drivers/net/ethernet/freescale/ucc_geth.c | 4 +- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 2 +- drivers/net/ethernet/marvell/mv643xx_eth.c | 2 +- drivers/net/ethernet/marvell/mvneta.c | 2 +- drivers/net/ethernet/smsc/smsc911x.c | 11 +++-- drivers/net/ethernet/smsc/smsc9420.c | 3 +- drivers/net/ethernet/ti/cpsw.c | 3 +- drivers/net/ethernet/ti/davinci_mdio.c | 2 +- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 2 +- drivers/net/phy/at803x.c | 2 +- drivers/net/phy/bcm87xx.c | 4 +- drivers/net/phy/dp83640.c | 22 +++++---- drivers/net/phy/dp83867.c | 4 +- drivers/net/phy/fixed_phy.c | 10 ++-- drivers/net/phy/icplus.c | 18 +++---- drivers/net/phy/marvell.c | 7 +-- drivers/net/phy/mdio_bus.c | 12 ++--- drivers/net/phy/micrel.c | 12 ++--- drivers/net/phy/microchip.c | 4 +- drivers/net/phy/phy.c | 25 +++++----- drivers/net/phy/phy_device.c | 62 ++++++++++++------------ drivers/net/phy/smsc.c | 2 +- drivers/of/of_mdio.c | 6 +-- include/linux/mdio.h | 9 ++++ include/linux/phy.h | 26 +++++----- net/dsa/dsa.c | 2 +- 35 files changed, 165 insertions(+), 151 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index f29d45eea1d9..3f3bcbea15bd 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -1235,7 +1235,7 @@ static int et131x_mii_read(struct et131x_adapter *adapter, u8 reg, u16 *value) if (!phydev) return -EIO; - return et131x_phy_mii_read(adapter, phydev->addr, reg, value); + return et131x_phy_mii_read(adapter, phydev->mdio.addr, reg, value); } static int et131x_mii_write(struct et131x_adapter *adapter, u8 addr, u8 reg, @@ -1462,7 +1462,7 @@ static void et1310_phy_power_switch(struct et131x_adapter *adapter, bool down) data &= ~BMCR_PDOWN; if (down) data |= BMCR_PDOWN; - et131x_mii_write(adapter, phydev->addr, MII_BMCR, data); + et131x_mii_write(adapter, phydev->mdio.addr, MII_BMCR, data); } /* et131x_xcvr_init - Init the phy if we are setting it into force mode */ @@ -1490,7 +1490,7 @@ static void et131x_xcvr_init(struct et131x_adapter *adapter) else lcr2 |= (LED_VAL_LINKON << LED_TXRX_SHIFT); - et131x_mii_write(adapter, phydev->addr, PHY_LED_2, lcr2); + et131x_mii_write(adapter, phydev->mdio.addr, PHY_LED_2, lcr2); } } @@ -3192,14 +3192,14 @@ static void et131x_adjust_link(struct net_device *netdev) et131x_mii_read(adapter, PHY_MPHY_CONTROL_REG, ®ister18); - et131x_mii_write(adapter, phydev->addr, + et131x_mii_write(adapter, phydev->mdio.addr, PHY_MPHY_CONTROL_REG, register18 | 0x4); - et131x_mii_write(adapter, phydev->addr, PHY_INDEX_REG, - register18 | 0x8402); - et131x_mii_write(adapter, phydev->addr, PHY_DATA_REG, - register18 | 511); - et131x_mii_write(adapter, phydev->addr, + et131x_mii_write(adapter, phydev->mdio.addr, + PHY_INDEX_REG, register18 | 0x8402); + et131x_mii_write(adapter, phydev->mdio.addr, + PHY_DATA_REG, register18 | 511); + et131x_mii_write(adapter, phydev->mdio.addr, PHY_MPHY_CONTROL_REG, register18); } @@ -3212,8 +3212,8 @@ static void et131x_adjust_link(struct net_device *netdev) et131x_mii_read(adapter, PHY_CONFIG, ®); reg &= ~ET_PHY_CONFIG_TX_FIFO_DEPTH; reg |= ET_PHY_CONFIG_FIFO_DEPTH_32; - et131x_mii_write(adapter, phydev->addr, PHY_CONFIG, - reg); + et131x_mii_write(adapter, phydev->mdio.addr, + PHY_CONFIG, reg); } et131x_set_rx_dma_timer(adapter); @@ -3226,14 +3226,14 @@ static void et131x_adjust_link(struct net_device *netdev) et131x_mii_read(adapter, PHY_MPHY_CONTROL_REG, ®ister18); - et131x_mii_write(adapter, phydev->addr, + et131x_mii_write(adapter, phydev->mdio.addr, PHY_MPHY_CONTROL_REG, register18 | 0x4); - et131x_mii_write(adapter, phydev->addr, + et131x_mii_write(adapter, phydev->mdio.addr, PHY_INDEX_REG, register18 | 0x8402); - et131x_mii_write(adapter, phydev->addr, + et131x_mii_write(adapter, phydev->mdio.addr, PHY_DATA_REG, register18 | 511); - et131x_mii_write(adapter, phydev->addr, + et131x_mii_write(adapter, phydev->mdio.addr, PHY_MPHY_CONTROL_REG, register18); } diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 10d51e8aefe0..17472851674f 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -844,7 +844,7 @@ static int init_phy(struct net_device *dev) } netdev_dbg(dev, "attached to PHY %d UID 0x%08x Link = %d\n", - phydev->addr, phydev->phy_id, phydev->link); + phydev->mdio.addr, phydev->phy_id, phydev->link); priv->phydev = phydev; return 0; diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 4d08bc02c7a8..843a4a5864fc 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2305,7 +2305,7 @@ static int b44_register_phy_one(struct b44 *bp) bp->phydev = phydev; bp->old_link = 0; - bp->phy_addr = phydev->addr; + bp->phy_addr = phydev->mdio.addr; phy_attached_info(phydev); diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 4523acd8c1c2..633b59db813a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -401,7 +401,7 @@ int bcmgenet_mii_probe(struct net_device *dev) * Ethernet MAC ISRs */ if (priv->internal_phy) - priv->mii_bus->irq[phydev->addr] = PHY_IGNORE_INTERRUPT; + priv->mii_bus->irq[phydev->mdio.addr] = PHY_IGNORE_INTERRUPT; return 0; } diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index 68a363708d27..768c18da510c 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -2366,8 +2366,8 @@ static int sbmac_mii_probe(struct net_device *dev) return -ENXIO; } - phy_dev = phy_connect(dev, dev_name(&phy_dev->dev), &sbmac_mii_poll, - PHY_INTERFACE_MODE_GMII); + phy_dev = phy_connect(dev, dev_name(&phy_dev->mdio.dev), + &sbmac_mii_poll, PHY_INTERFACE_MODE_GMII); if (IS_ERR(phy_dev)) { printk(KERN_ERR "%s: could not attach to PHY\n", dev->name); return PTR_ERR(phy_dev); diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c index 587f9b40cfaa..6b1261c0b1c2 100644 --- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c +++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c @@ -1295,7 +1295,7 @@ int dtsec_init(struct fman_mac *dtsec) err = init(dtsec->regs, dtsec_drv_param, dtsec->phy_if, dtsec->max_speed, (u8 *)eth_addr, dtsec->exceptions, - dtsec->tbiphy->addr); + dtsec->tbiphy->mdio.addr); if (err) { free_init_resources(dtsec); pr_err("DTSEC version doesn't support this i/f mode\n"); @@ -1434,11 +1434,11 @@ struct fman_mac *dtsec_config(struct fman_mac_params *params) dtsec->tbiphy = of_phy_find_device(params->internal_phy_node); if (!dtsec->tbiphy) { pr_err("of_phy_find_device (TBI PHY) failed\n"); - put_device(&dtsec->tbiphy->dev); + put_device(&dtsec->tbiphy->mdio.dev); goto err_dtsec_drv_param; } - put_device(&dtsec->tbiphy->dev); + put_device(&dtsec->tbiphy->mdio.dev); /* Save FMan revision */ fman_get_revision(dtsec->fm, &dtsec->fm_rev_info); diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 58bb72071c14..45e98fd8b79e 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -1054,15 +1054,15 @@ int memac_init(struct fman_mac *memac) * register address space and access each one of 4 * ports inside QSGMII. */ - phy_addr = memac->pcsphy->addr; + phy_addr = memac->pcsphy->mdio.addr; qsmgii_phy_addr = (u8)((phy_addr << 2) | i); - memac->pcsphy->addr = qsmgii_phy_addr; + memac->pcsphy->mdio.addr = qsmgii_phy_addr; if (memac->basex_if) setup_sgmii_internal_phy_base_x(memac); else setup_sgmii_internal_phy(memac, fixed_link); - memac->pcsphy->addr = phy_addr; + memac->pcsphy->mdio.addr = phy_addr; } } diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c index 016743e355de..c158d409f6af 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c @@ -254,7 +254,7 @@ static void restart(struct net_device *dev) int r; u32 addrhi, addrlo; - struct mii_bus* mii = fep->phydev->bus; + struct mii_bus *mii = fep->phydev->mdio.bus; struct fec_info* fec_inf = mii->priv; r = whack_reset(fep->fec.fecp); diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 4e394f75261e..2aa7b401cc3b 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1834,7 +1834,7 @@ static void gfar_configure_serdes(struct net_device *dev) * several seconds for it to come back. */ if (phy_read(tbiphy, MII_BMSR) & BMSR_LSTATUS) { - put_device(&tbiphy->dev); + put_device(&tbiphy->mdio.dev); return; } @@ -1849,7 +1849,7 @@ static void gfar_configure_serdes(struct net_device *dev) BMCR_ANENABLE | BMCR_ANRESTART | BMCR_FULLDPLX | BMCR_SPEED1000); - put_device(&tbiphy->dev); + put_device(&tbiphy->mdio.dev); } static int __gfar_is_rx_idle(struct gfar_private *priv) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 650f7888e32b..0e7f24ec3239 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -1385,7 +1385,7 @@ static int adjust_enet_interface(struct ucc_geth_private *ugeth) value &= ~0x1000; /* Turn off autonegotiation */ phy_write(tbiphy, ENET_TBI_MII_CR, value); - put_device(&tbiphy->dev); + put_device(&tbiphy->mdio.dev); } init_check_frame_length_mode(ug_info->lengthCheckRx, &ug_regs->maccfg2); @@ -1705,7 +1705,7 @@ static void uec_configure_serdes(struct net_device *dev) * several seconds for it to come back. */ if (phy_read(tbiphy, ENET_TBI_MII_SR) & TBISR_LSTATUS) { - put_device(&tbiphy->dev); + put_device(&tbiphy->mdio.dev); return; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 4eddbeb19307..3df22840fcd1 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -71,7 +71,7 @@ static void hns_get_mdix_mode(struct net_device *net_dev, struct hns_nic_priv *priv = netdev_priv(net_dev); struct phy_device *phy_dev = priv->phy; - if (!phy_dev || !phy_dev->bus) { + if (!phy_dev || !phy_dev->mdio.bus) { cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; cmd->eth_tp_mdix = ETH_TP_MDI_INVALID; return; diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 4eba2ed53052..a0c03834a2f7 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -3133,7 +3133,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev) if (!mp->phy) err = -ENODEV; else - phy_addr_set(mp, mp->phy->addr); + phy_addr_set(mp, mp->phy->mdio.addr); } else if (pd->phy_addr != MV643XX_ETH_PHY_NONE) { mp->phy = phy_scan(mp, pd->phy_addr); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 15b1f6bbd92d..fabc8df40392 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3714,7 +3714,7 @@ static int mvneta_probe(struct platform_device *pdev) mvneta_fixed_link_update(pp, phy); - put_device(&phy->dev); + put_device(&phy->mdio.dev); } return 0; diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index c74e78dd989a..8af25563f627 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -864,8 +864,8 @@ static int smsc911x_phy_loopbacktest(struct net_device *dev) for (i = 0; i < 10; i++) { /* Set PHY to 10/FD, no ANEG, and loopback mode */ - smsc911x_mii_write(phy_dev->bus, phy_dev->addr, MII_BMCR, - BMCR_LOOPBACK | BMCR_FULLDPLX); + smsc911x_mii_write(phy_dev->mdio.bus, phy_dev->mdio.addr, + MII_BMCR, BMCR_LOOPBACK | BMCR_FULLDPLX); /* Enable MAC tx/rx, FD */ spin_lock_irqsave(&pdata->mac_lock, flags); @@ -893,7 +893,7 @@ static int smsc911x_phy_loopbacktest(struct net_device *dev) spin_unlock_irqrestore(&pdata->mac_lock, flags); /* Cancel PHY loopback mode */ - smsc911x_mii_write(phy_dev->bus, phy_dev->addr, MII_BMCR, 0); + smsc911x_mii_write(phy_dev->mdio.bus, phy_dev->mdio.addr, MII_BMCR, 0); smsc911x_reg_write(pdata, TX_CFG, 0); smsc911x_reg_write(pdata, RX_CFG, 0); @@ -1021,7 +1021,7 @@ static int smsc911x_mii_probe(struct net_device *dev) } SMSC_TRACE(pdata, probe, "PHY: addr %d, phy_id 0x%08X", - phydev->addr, phydev->phy_id); + phydev->mdio.addr, phydev->phy_id); ret = phy_connect_direct(dev, phydev, &smsc911x_phy_adjust_link, pdata->config.phy_interface); @@ -1988,7 +1988,8 @@ smsc911x_ethtool_getregs(struct net_device *dev, struct ethtool_regs *regs, } for (i = 0; i <= 31; i++) - data[j++] = smsc911x_mii_read(phy_dev->bus, phy_dev->addr, i); + data[j++] = smsc911x_mii_read(phy_dev->mdio.bus, + phy_dev->mdio.addr, i); } static void smsc911x_eeprom_enable_access(struct smsc911x_data *pdata) diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c index 59bf4c353d50..53355c323f54 100644 --- a/drivers/net/ethernet/smsc/smsc9420.c +++ b/drivers/net/ethernet/smsc/smsc9420.c @@ -315,7 +315,8 @@ smsc9420_ethtool_getregs(struct net_device *dev, struct ethtool_regs *regs, return; for (i = 0; i <= 31; i++) - data[j++] = smsc9420_mii_read(phy_dev->bus, phy_dev->addr, i); + data[j++] = smsc9420_mii_read(phy_dev->mdio.bus, + phy_dev->mdio.addr, i); } static void smsc9420_eeprom_enable_access(struct smsc9420_pdata *pd) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 49544c0fa6a7..42fdfd4d9d4f 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2050,7 +2050,8 @@ static int cpsw_probe_dt(struct cpsw_priv *priv, if (!phy_dev) return -ENODEV; snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), - PHY_ID_FMT, phy_dev->bus->id, phy_dev->addr); + PHY_ID_FMT, phy_dev->mdio.bus->id, + phy_dev->mdio.addr); } else if (parp) { u32 phyid; struct device_node *mdio_node; diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index 88e8e6055b9f..78299c1592c1 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -396,7 +396,7 @@ static int davinci_mdio_probe(struct platform_device *pdev) phy = data->bus->phy_map[addr]; if (phy) { dev_info(dev, "phy[%d]: device %s, driver %s\n", - phy->addr, phydev_name(phy), + phy->mdio.addr, phydev_name(phy), phy->drv ? phy->drv->name : "unknown"); } } diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index d1a0167dff84..e324b3092380 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -827,7 +827,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) dev_info(dev, "MDIO of the phy is not registered yet\n"); else - put_device(&phydev->dev); + put_device(&phydev->mdio.dev); return 0; } diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 62361f8af375..b76ac09a554f 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -190,7 +190,7 @@ static int at803x_resume(struct phy_device *phydev) static int at803x_probe(struct phy_device *phydev) { - struct device *dev = &phydev->dev; + struct device *dev = &phydev->mdio.dev; struct at803x_priv *priv; struct gpio_desc *gpiod_reset; diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c index 71b491c7bf96..e536e30d1643 100644 --- a/drivers/net/phy/bcm87xx.c +++ b/drivers/net/phy/bcm87xx.c @@ -40,10 +40,10 @@ static int bcm87xx_of_reg_init(struct phy_device *phydev) const __be32 *paddr_end; int len, ret; - if (!phydev->dev.of_node) + if (!phydev->mdio.dev.of_node) return 0; - paddr = of_get_property(phydev->dev.of_node, + paddr = of_get_property(phydev->mdio.dev.of_node, "broadcom,c45-reg-init", &len); if (!paddr) return 0; diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 47b711739ba9..39da6fc6a85e 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -220,9 +220,10 @@ static void rx_timestamp_work(struct work_struct *work); #define BROADCAST_ADDR 31 -static inline int broadcast_write(struct mii_bus *bus, u32 regnum, u16 val) +static inline int broadcast_write(struct phy_device *phydev, u32 regnum, + u16 val) { - return mdiobus_write(bus, BROADCAST_ADDR, regnum, val); + return mdiobus_write(phydev->mdio.bus, BROADCAST_ADDR, regnum, val); } /* Caller must hold extreg_lock. */ @@ -232,7 +233,7 @@ static int ext_read(struct phy_device *phydev, int page, u32 regnum) int val; if (dp83640->clock->page != page) { - broadcast_write(phydev->bus, PAGESEL, page); + broadcast_write(phydev, PAGESEL, page); dp83640->clock->page = page; } val = phy_read(phydev, regnum); @@ -247,11 +248,11 @@ static void ext_write(int broadcast, struct phy_device *phydev, struct dp83640_private *dp83640 = phydev->priv; if (dp83640->clock->page != page) { - broadcast_write(phydev->bus, PAGESEL, page); + broadcast_write(phydev, PAGESEL, page); dp83640->clock->page = page; } if (broadcast) - broadcast_write(phydev->bus, regnum, val); + broadcast_write(phydev, regnum, val); else phy_write(phydev, regnum, val); } @@ -1039,7 +1040,7 @@ static int choose_this_phy(struct dp83640_clock *clock, if (chosen_phy == -1 && !clock->chosen) return 1; - if (chosen_phy == phydev->addr) + if (chosen_phy == phydev->mdio.addr) return 1; return 0; @@ -1103,10 +1104,10 @@ static int dp83640_probe(struct phy_device *phydev) struct dp83640_private *dp83640; int err = -ENOMEM, i; - if (phydev->addr == BROADCAST_ADDR) + if (phydev->mdio.addr == BROADCAST_ADDR) return 0; - clock = dp83640_clock_get_bus(phydev->bus); + clock = dp83640_clock_get_bus(phydev->mdio.bus); if (!clock) goto no_clock; @@ -1132,7 +1133,8 @@ static int dp83640_probe(struct phy_device *phydev) if (choose_this_phy(clock, phydev)) { clock->chosen = dp83640; - clock->ptp_clock = ptp_clock_register(&clock->caps, &phydev->dev); + clock->ptp_clock = ptp_clock_register(&clock->caps, + &phydev->mdio.dev); if (IS_ERR(clock->ptp_clock)) { err = PTR_ERR(clock->ptp_clock); goto no_register; @@ -1158,7 +1160,7 @@ static void dp83640_remove(struct phy_device *phydev) struct list_head *this, *next; struct dp83640_private *tmp, *dp83640 = phydev->priv; - if (phydev->addr == BROADCAST_ADDR) + if (phydev->mdio.addr == BROADCAST_ADDR) return; enable_status_frames(phydev, false); diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index e4c0b0c0af02..74e4521bd2d3 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -103,7 +103,7 @@ static int dp83867_config_intr(struct phy_device *phydev) static int dp83867_of_init(struct phy_device *phydev) { struct dp83867_private *dp83867 = phydev->priv; - struct device *dev = &phydev->dev; + struct device *dev = &phydev->mdio.dev; struct device_node *of_node = dev->of_node; int ret; @@ -137,7 +137,7 @@ static int dp83867_config_init(struct phy_device *phydev) u16 val, delay; if (!phydev->priv) { - dp83867 = devm_kzalloc(&phydev->dev, sizeof(*dp83867), + dp83867 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83867), GFP_KERNEL); if (!dp83867) return -ENOMEM; diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index 0a1cde6803b0..ab9c473d75ea 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -197,11 +197,11 @@ int fixed_phy_set_link_update(struct phy_device *phydev, struct fixed_mdio_bus *fmb = &platform_fmb; struct fixed_phy *fp; - if (!phydev || !phydev->bus) + if (!phydev || !phydev->mdio.bus) return -EINVAL; list_for_each_entry(fp, &fmb->phys, node) { - if (fp->addr == phydev->addr) { + if (fp->addr == phydev->mdio.addr) { fp->link_update = link_update; fp->phydev = phydev; return 0; @@ -219,11 +219,11 @@ int fixed_phy_update_state(struct phy_device *phydev, struct fixed_mdio_bus *fmb = &platform_fmb; struct fixed_phy *fp; - if (!phydev || phydev->bus != fmb->mii_bus) + if (!phydev || phydev->mdio.bus != fmb->mii_bus) return -EINVAL; list_for_each_entry(fp, &fmb->phys, node) { - if (fp->addr == phydev->addr) { + if (fp->addr == phydev->mdio.addr) { #define _UPD(x) if (changed->x) \ fp->status.x = status->x _UPD(link); @@ -344,7 +344,7 @@ struct phy_device *fixed_phy_register(unsigned int irq, } of_node_get(np); - phy->dev.of_node = np; + phy->mdio.dev.of_node = np; phy->is_pseudo_fixed_link = true; switch (status->speed) { diff --git a/drivers/net/phy/icplus.c b/drivers/net/phy/icplus.c index 0dbc445a5fa0..c12170d07b62 100644 --- a/drivers/net/phy/icplus.c +++ b/drivers/net/phy/icplus.c @@ -53,43 +53,43 @@ static int ip175c_config_init(struct phy_device *phydev) if (full_reset_performed == 0) { /* master reset */ - err = mdiobus_write(phydev->bus, 30, 0, 0x175c); + err = mdiobus_write(phydev->mdio.bus, 30, 0, 0x175c); if (err < 0) return err; /* ensure no bus delays overlap reset period */ - err = mdiobus_read(phydev->bus, 30, 0); + err = mdiobus_read(phydev->mdio.bus, 30, 0); /* data sheet specifies reset period is 2 msec */ mdelay(2); /* enable IP175C mode */ - err = mdiobus_write(phydev->bus, 29, 31, 0x175c); + err = mdiobus_write(phydev->mdio.bus, 29, 31, 0x175c); if (err < 0) return err; /* Set MII0 speed and duplex (in PHY mode) */ - err = mdiobus_write(phydev->bus, 29, 22, 0x420); + err = mdiobus_write(phydev->mdio.bus, 29, 22, 0x420); if (err < 0) return err; /* reset switch ports */ for (i = 0; i < 5; i++) { - err = mdiobus_write(phydev->bus, i, + err = mdiobus_write(phydev->mdio.bus, i, MII_BMCR, BMCR_RESET); if (err < 0) return err; } for (i = 0; i < 5; i++) - err = mdiobus_read(phydev->bus, i, MII_BMCR); + err = mdiobus_read(phydev->mdio.bus, i, MII_BMCR); mdelay(2); full_reset_performed = 1; } - if (phydev->addr != 4) { + if (phydev->mdio.addr != 4) { phydev->state = PHY_RUNNING; phydev->speed = SPEED_100; phydev->duplex = DUPLEX_FULL; @@ -184,7 +184,7 @@ static int ip101a_g_config_init(struct phy_device *phydev) static int ip175c_read_status(struct phy_device *phydev) { - if (phydev->addr == 4) /* WAN port */ + if (phydev->mdio.addr == 4) /* WAN port */ genphy_read_status(phydev); else /* Don't need to read status for switch ports */ @@ -195,7 +195,7 @@ static int ip175c_read_status(struct phy_device *phydev) static int ip175c_config_aneg(struct phy_device *phydev) { - if (phydev->addr == 4) /* WAN port */ + if (phydev->mdio.addr == 4) /* WAN port */ genphy_config_aneg(phydev); return 0; diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 50b5eac75854..f96c93c9819a 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -300,10 +300,11 @@ static int marvell_of_reg_init(struct phy_device *phydev) const __be32 *paddr; int len, i, saved_page, current_page, page_changed, ret; - if (!phydev->dev.of_node) + if (!phydev->mdio.dev.of_node) return 0; - paddr = of_get_property(phydev->dev.of_node, "marvell,reg-init", &len); + paddr = of_get_property(phydev->mdio.dev.of_node, + "marvell,reg-init", &len); if (!paddr || len < (4 * sizeof(*paddr))) return 0; @@ -1060,7 +1061,7 @@ static int marvell_probe(struct phy_device *phydev) { struct marvell_priv *priv; - priv = devm_kzalloc(&phydev->dev, sizeof(*priv), GFP_KERNEL); + priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 05381d0f559c..e5b1ccde835b 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -200,16 +200,16 @@ EXPORT_SYMBOL(of_mdio_find_bus); * the phy. This allows auto-probed pyh devices to be supplied with information * passed in via DT. */ -static void of_mdiobus_link_phydev(struct mii_bus *mdio, +static void of_mdiobus_link_phydev(struct mii_bus *bus, struct phy_device *phydev) { - struct device *dev = &phydev->dev; + struct device *dev = &phydev->mdio.dev; struct device_node *child; - if (dev->of_node || !mdio->dev.of_node) + if (dev->of_node || !bus->dev.of_node) return; - for_each_available_child_of_node(mdio->dev.of_node, child) { + for_each_available_child_of_node(bus->dev.of_node, child) { int addr; int ret; @@ -227,7 +227,7 @@ static void of_mdiobus_link_phydev(struct mii_bus *mdio, continue; } - if (addr == phydev->addr) { + if (addr == phydev->mdio.addr) { dev->of_node = child; return; } @@ -522,7 +522,7 @@ static int mdio_bus_match(struct device *dev, struct device_driver *drv) static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) { - struct device_driver *drv = phydev->dev.driver; + struct device_driver *drv = phydev->mdio.dev.driver; struct phy_driver *phydrv = to_phy_driver(drv); struct net_device *netdev = phydev->attached_dev; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index bf72365e90bc..b51505be1fa9 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -350,7 +350,7 @@ static int ksz9021_load_values_from_of(struct phy_device *phydev, static int ksz9021_config_init(struct phy_device *phydev) { - const struct device *dev = &phydev->dev; + const struct device *dev = &phydev->mdio.dev; const struct device_node *of_node = dev->of_node; const struct device *dev_walker; @@ -358,7 +358,7 @@ static int ksz9021_config_init(struct phy_device *phydev) * properties in the MAC node. Walk up the tree of devices to * find a device with an OF node. */ - dev_walker = &phydev->dev; + dev_walker = &phydev->mdio.dev; do { of_node = dev_walker->of_node; dev_walker = dev_walker->parent; @@ -471,7 +471,7 @@ static int ksz9031_center_flp_timing(struct phy_device *phydev) static int ksz9031_config_init(struct phy_device *phydev) { - const struct device *dev = &phydev->dev; + const struct device *dev = &phydev->mdio.dev; const struct device_node *of_node = dev->of_node; static const char *clk_skews[2] = {"rxc-skew-ps", "txc-skew-ps"}; static const char *rx_data_skews[4] = { @@ -630,12 +630,12 @@ static void kszphy_get_stats(struct phy_device *phydev, static int kszphy_probe(struct phy_device *phydev) { const struct kszphy_type *type = phydev->drv->driver_data; - const struct device_node *np = phydev->dev.of_node; + const struct device_node *np = phydev->mdio.dev.of_node; struct kszphy_priv *priv; struct clk *clk; int ret; - priv = devm_kzalloc(&phydev->dev, sizeof(*priv), GFP_KERNEL); + priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; @@ -658,7 +658,7 @@ static int kszphy_probe(struct phy_device *phydev) priv->led_mode = -1; } - clk = devm_clk_get(&phydev->dev, "rmii-ref"); + clk = devm_clk_get(&phydev->mdio.dev, "rmii-ref"); /* NOTE: clk may be NULL if building without CONFIG_HAVE_CLK */ if (!IS_ERR_OR_NULL(clk)) { unsigned long rate = clk_get_rate(clk); diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 99df5bc47424..5e34b49be0b3 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -68,7 +68,7 @@ int lan88xx_suspend(struct phy_device *phydev) static int lan88xx_probe(struct phy_device *phydev) { - struct device *dev = &phydev->dev; + struct device *dev = &phydev->mdio.dev; struct lan88xx_priv *priv; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -89,7 +89,7 @@ static int lan88xx_probe(struct phy_device *phydev) static void lan88xx_remove(struct phy_device *phydev) { - struct device *dev = &phydev->dev; + struct device *dev = &phydev->mdio.dev; struct lan88xx_priv *priv = phydev->priv; if (priv) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 56c8dd8c0c85..8763bb20988a 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -319,7 +319,7 @@ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd) { u32 speed = ethtool_cmd_speed(cmd); - if (cmd->phy_address != phydev->addr) + if (cmd->phy_address != phydev->mdio.addr) return -EINVAL; /* We make sure that we don't pass unsupported values in to the PHY */ @@ -375,7 +375,7 @@ int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd) cmd->port = PORT_BNC; else cmd->port = PORT_MII; - cmd->phy_address = phydev->addr; + cmd->phy_address = phydev->mdio.addr; cmd->transceiver = phy_is_internal(phydev) ? XCVR_INTERNAL : XCVR_EXTERNAL; cmd->autoneg = phydev->autoneg; @@ -403,16 +403,17 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGMIIPHY: - mii_data->phy_id = phydev->addr; + mii_data->phy_id = phydev->mdio.addr; /* fall through */ case SIOCGMIIREG: - mii_data->val_out = mdiobus_read(phydev->bus, mii_data->phy_id, + mii_data->val_out = mdiobus_read(phydev->mdio.bus, + mii_data->phy_id, mii_data->reg_num); return 0; case SIOCSMIIREG: - if (mii_data->phy_id == phydev->addr) { + if (mii_data->phy_id == phydev->mdio.addr) { switch (mii_data->reg_num) { case MII_BMCR: if ((val & (BMCR_RESET | BMCR_ANENABLE)) == 0) { @@ -445,10 +446,10 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) } } - mdiobus_write(phydev->bus, mii_data->phy_id, + mdiobus_write(phydev->mdio.bus, mii_data->phy_id, mii_data->reg_num, val); - if (mii_data->phy_id == phydev->addr && + if (mii_data->phy_id == phydev->mdio.addr && mii_data->reg_num == MII_BMCR && val & BMCR_RESET) return phy_init_hw(phydev); @@ -643,7 +644,7 @@ int phy_start_interrupts(struct phy_device *phydev) if (request_irq(phydev->irq, phy_interrupt, 0, "phy_interrupt", phydev) < 0) { pr_warn("%s: Can't get IRQ %d (PHY)\n", - phydev->bus->name, phydev->irq); + phydev->mdio.bus->name, phydev->irq); phydev->irq = PHY_POLL; return 0; } @@ -1041,11 +1042,11 @@ static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad, int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, int devad) { struct phy_driver *phydrv = phydev->drv; - int addr = phydev->addr; + int addr = phydev->mdio.addr; int value = -1; if (!phydrv->read_mmd_indirect) { - struct mii_bus *bus = phydev->bus; + struct mii_bus *bus = phydev->mdio.bus; mutex_lock(&bus->mdio_lock); mmd_phy_indirect(bus, prtad, devad, addr); @@ -1079,10 +1080,10 @@ void phy_write_mmd_indirect(struct phy_device *phydev, int prtad, int devad, u32 data) { struct phy_driver *phydrv = phydev->drv; - int addr = phydev->addr; + int addr = phydev->mdio.addr; if (!phydrv->write_mmd_indirect) { - struct mii_bus *bus = phydev->bus; + struct mii_bus *bus = phydev->mdio.bus; mutex_lock(&bus->mdio_lock); mmd_phy_indirect(bus, prtad, devad, addr); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 68fe5738daef..01e5d52dc37c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -43,7 +43,7 @@ MODULE_LICENSE("GPL"); void phy_device_free(struct phy_device *phydev) { - put_device(&phydev->dev); + put_device(&phydev->mdio.dev); } EXPORT_SYMBOL(phy_device_free); @@ -65,7 +65,7 @@ static DEFINE_MUTEX(phy_fixup_lock); /** * phy_register_fixup - creates a new phy_fixup and adds it to the list - * @bus_id: A string which matches phydev->dev.bus_id (or PHY_ANY_ID) + * @bus_id: A string which matches phydev->mdio.dev.bus_id (or PHY_ANY_ID) * @phy_uid: Used to match against phydev->phy_id (the UID of the PHY) * It can also be PHY_ANY_UID * @phy_uid_mask: Applied to phydev->phy_id and fixup->phy_uid before @@ -153,13 +153,19 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, struct phy_c45_device_ids *c45_ids) { struct phy_device *dev; + struct mdio_device *mdiodev; /* We allocate the device, and initialize the default values */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return ERR_PTR(-ENOMEM); - dev->dev.release = phy_device_release; + mdiodev = &dev->mdio; + mdiodev->dev.release = phy_device_release; + mdiodev->dev.parent = &bus->dev; + mdiodev->dev.bus = &mdio_bus_type; + mdiodev->bus = bus; + mdiodev->addr = addr; dev->speed = 0; dev->duplex = -1; @@ -171,15 +177,11 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, dev->autoneg = AUTONEG_ENABLE; dev->is_c45 = is_c45; - dev->addr = addr; dev->phy_id = phy_id; if (c45_ids) dev->c45_ids = *c45_ids; - dev->bus = bus; - dev->dev.parent = &bus->dev; - dev->dev.bus = &mdio_bus_type; dev->irq = bus->irq ? bus->irq[addr] : PHY_POLL; - dev_set_name(&dev->dev, PHY_ID_FMT, bus->id, addr); + dev_set_name(&mdiodev->dev, PHY_ID_FMT, bus->id, addr); dev->state = PHY_DOWN; @@ -199,7 +201,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, */ request_module(MDIO_MODULE_PREFIX MDIO_ID_FMT, MDIO_ID_ARGS(phy_id)); - device_initialize(&dev->dev); + device_initialize(&mdiodev->dev); return dev; } @@ -382,27 +384,27 @@ int phy_device_register(struct phy_device *phydev) int err; /* Don't register a phy if one is already registered at this address */ - if (phydev->bus->phy_map[phydev->addr]) + if (phydev->mdio.bus->phy_map[phydev->mdio.addr]) return -EINVAL; - phydev->bus->phy_map[phydev->addr] = phydev; + phydev->mdio.bus->phy_map[phydev->mdio.addr] = phydev; /* Run all of the fixups for this PHY */ err = phy_scan_fixups(phydev); if (err) { - pr_err("PHY %d failed to initialize\n", phydev->addr); + pr_err("PHY %d failed to initialize\n", phydev->mdio.addr); goto out; } - err = device_add(&phydev->dev); + err = device_add(&phydev->mdio.dev); if (err) { - pr_err("PHY %d failed to add\n", phydev->addr); + pr_err("PHY %d failed to add\n", phydev->mdio.addr); goto out; } return 0; out: - phydev->bus->phy_map[phydev->addr] = NULL; + phydev->mdio.bus->phy_map[phydev->mdio.addr] = NULL; return err; } EXPORT_SYMBOL(phy_device_register); @@ -417,10 +419,10 @@ EXPORT_SYMBOL(phy_device_register); */ void phy_device_remove(struct phy_device *phydev) { - struct mii_bus *bus = phydev->bus; - int addr = phydev->addr; + struct mii_bus *bus = phydev->mdio.bus; + int addr = phydev->mdio.addr; - device_del(&phydev->dev); + device_del(&phydev->mdio.dev); bus->phy_map[addr] = NULL; } EXPORT_SYMBOL(phy_device_remove); @@ -617,13 +619,13 @@ EXPORT_SYMBOL(phy_attached_info); void phy_attached_print(struct phy_device *phydev, const char *fmt, ...) { if (!fmt) { - dev_info(&phydev->dev, ATTACHED_FMT "\n", + dev_info(&phydev->mdio.dev, ATTACHED_FMT "\n", phydev->drv->name, phydev_name(phydev), phydev->irq); } else { va_list ap; - dev_info(&phydev->dev, ATTACHED_FMT, + dev_info(&phydev->mdio.dev, ATTACHED_FMT, phydev->drv->name, phydev_name(phydev), phydev->irq); @@ -652,8 +654,8 @@ EXPORT_SYMBOL(phy_attached_print); int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, u32 flags, phy_interface_t interface) { - struct mii_bus *bus = phydev->bus; - struct device *d = &phydev->dev; + struct mii_bus *bus = phydev->mdio.bus; + struct device *d = &phydev->mdio.dev; int err; if (!try_module_get(bus->owner)) { @@ -771,8 +773,8 @@ void phy_detach(struct phy_device *phydev) * real driver could be loaded */ for (i = 0; i < ARRAY_SIZE(genphy_driver); i++) { - if (phydev->dev.driver == &genphy_driver[i].driver) { - device_release_driver(&phydev->dev); + if (phydev->mdio.dev.driver == &genphy_driver[i].driver) { + device_release_driver(&phydev->mdio.dev); break; } } @@ -781,16 +783,16 @@ void phy_detach(struct phy_device *phydev) * The phydev might go away on the put_device() below, so avoid * a use-after-free bug by reading the underlying bus first. */ - bus = phydev->bus; + bus = phydev->mdio.bus; - put_device(&phydev->dev); + put_device(&phydev->mdio.dev); module_put(bus->owner); } EXPORT_SYMBOL(phy_detach); int phy_suspend(struct phy_device *phydev) { - struct phy_driver *phydrv = to_phy_driver(phydev->dev.driver); + struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver); struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; int ret = 0; @@ -813,7 +815,7 @@ EXPORT_SYMBOL(phy_suspend); int phy_resume(struct phy_device *phydev) { - struct phy_driver *phydrv = to_phy_driver(phydev->dev.driver); + struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver); int ret = 0; if (phydrv->resume) @@ -1330,7 +1332,7 @@ EXPORT_SYMBOL(phy_set_max_speed); static void of_set_phy_supported(struct phy_device *phydev) { - struct device_node *node = phydev->dev.of_node; + struct device_node *node = phydev->mdio.dev.of_node; u32 max_speed; if (!IS_ENABLED(CONFIG_OF_MDIO)) @@ -1354,7 +1356,7 @@ static void of_set_phy_supported(struct phy_device *phydev) static int phy_probe(struct device *dev) { struct phy_device *phydev = to_phy_device(dev); - struct device_driver *drv = phydev->dev.driver; + struct device_driver *drv = phydev->mdio.dev.driver; struct phy_driver *phydrv = to_phy_driver(drv); int err = 0; diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index dc2da8770918..18c981b95910 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -44,7 +44,7 @@ static int smsc_phy_ack_interrupt(struct phy_device *phydev) static int smsc_phy_config_init(struct phy_device *phydev) { int __maybe_unused len; - struct device *dev __maybe_unused = &phydev->dev; + struct device *dev __maybe_unused = &phydev->mdio.dev; struct device_node *of_node __maybe_unused = dev->of_node; int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); int enable_energy = 1; diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 2f88ff4654da..bc9d76329435 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -75,7 +75,7 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio, struct device_node *chi /* Associate the OF node with the device structure so it * can be looked up later */ of_node_get(child); - phy->dev.of_node = child; + phy->mdio.dev.of_node = child; /* All data is now stored in the phy struct; * register it */ @@ -233,7 +233,7 @@ struct phy_device *of_phy_connect(struct net_device *dev, ret = phy_connect_direct(dev, phy, hndlr, iface); /* refcount is held by phy_connect_direct() on success */ - put_device(&phy->dev); + put_device(&phy->mdio.dev); return ret ? NULL : phy; } @@ -263,7 +263,7 @@ struct phy_device *of_phy_attach(struct net_device *dev, ret = phy_attach_direct(dev, phy, flags, iface); /* refcount is held by phy_attach_direct() on success */ - put_device(&phy->dev); + put_device(&phy->mdio.dev); return ret ? NULL : phy; } diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 0d073c23c10d..94f9f1491cde 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -13,6 +13,15 @@ struct mii_bus; +struct mdio_device { + struct device dev; + + struct mii_bus *bus; + /* Bus address of the MDIO device (0-31) */ + int addr; +}; +#define to_mdio_device(d) container_of(d, struct mdio_device, dev) + static inline bool mdio_phy_id_is_c45(int phy_id) { return (phy_id & MDIO_PHY_ID_C45) && !(phy_id & ~MDIO_PHY_ID_C45_MASK); diff --git a/include/linux/phy.h b/include/linux/phy.h index a5473c9e19de..239a0c2bc49d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -358,14 +358,12 @@ struct phy_c45_device_ids { * handling, as well as handling shifts in PHY hardware state */ struct phy_device { + struct mdio_device mdio; + /* Information about the PHY type */ /* And management functions */ struct phy_driver *drv; - struct mii_bus *bus; - - struct device dev; - u32 phy_id; struct phy_c45_device_ids c45_ids; @@ -381,9 +379,6 @@ struct phy_device { phy_interface_t interface; - /* Bus address of the PHY (0-31) */ - int addr; - /* * forced speed & duplex (no autoneg) * partner speed & duplex & pause (autoneg) @@ -432,7 +427,8 @@ struct phy_device { void (*adjust_link)(struct net_device *dev); }; -#define to_phy_device(d) container_of(d, struct phy_device, dev) +#define to_phy_device(d) container_of(to_mdio_device(d), \ + struct phy_device, mdio) /* struct phy_driver: Driver structure for a particular PHY type * @@ -622,7 +618,7 @@ static inline int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) if (!phydev->is_c45) return -EOPNOTSUPP; - return mdiobus_read(phydev->bus, phydev->addr, + return mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff)); } @@ -648,7 +644,7 @@ int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, int devad); */ static inline int phy_read(struct phy_device *phydev, u32 regnum) { - return mdiobus_read(phydev->bus, phydev->addr, regnum); + return mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, regnum); } /** @@ -663,7 +659,7 @@ static inline int phy_read(struct phy_device *phydev, u32 regnum) */ static inline int phy_write(struct phy_device *phydev, u32 regnum, u16 val) { - return mdiobus_write(phydev->bus, phydev->addr, regnum, val); + return mdiobus_write(phydev->mdio.bus, phydev->mdio.addr, regnum, val); } /** @@ -726,7 +722,7 @@ static inline int phy_write_mmd(struct phy_device *phydev, int devad, regnum = MII_ADDR_C45 | ((devad & 0x1f) << 16) | (regnum & 0xffff); - return mdiobus_write(phydev->bus, phydev->addr, regnum, val); + return mdiobus_write(phydev->mdio.bus, phydev->mdio.addr, regnum, val); } /** @@ -776,14 +772,14 @@ static inline int phy_read_status(struct phy_device *phydev) } #define phydev_err(_phydev, format, args...) \ - dev_err(&_phydev->dev, format, ##args) + dev_err(&_phydev->mdio.dev, format, ##args) #define phydev_dbg(_phydev, format, args...) \ - dev_dbg(&_phydev->dev, format, ##args) + dev_dbg(&_phydev->mdio.dev, format, ##args); static inline const char *phydev_name(const struct phy_device *phydev) { - return dev_name(&phydev->dev); + return dev_name(&phydev->mdio.dev); } void phy_attached_print(struct phy_device *phydev, const char *fmt, ...) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 208d1b257194..fa4daba8db55 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -439,7 +439,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) if (of_phy_is_fixed_link(port_dn)) { phydev = of_phy_find_device(port_dn); if (phydev) { - int addr = phydev->addr; + int addr = phydev->mdio.addr; phy_device_free(phydev); of_node_put(port_dn); -- cgit v1.2.3-71-gd317 From 7f854420fbfe9d49afe2ffb1df052cfe8e215541 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:18 +0100 Subject: phy: Add API for {un}registering an mdio device to a bus. Rather than have drivers directly manipulate the mii_bus structure, provide and API for registering and unregistering devices on an MDIO bus, and performing lookups. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/au1000_eth.c | 9 +++-- drivers/net/ethernet/broadcom/b44.c | 2 +- drivers/net/ethernet/broadcom/genet/bcmmii.c | 2 +- drivers/net/ethernet/broadcom/tg3.c | 30 +++++++-------- drivers/net/ethernet/ethoc.c | 4 +- drivers/net/ethernet/faraday/ftgmac100.c | 2 +- drivers/net/ethernet/freescale/fec_main.c | 7 +--- drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c | 2 +- drivers/net/ethernet/smsc/smsc9420.c | 3 +- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 2 +- drivers/net/ethernet/ti/davinci_mdio.c | 2 +- drivers/net/phy/mdio_bus.c | 46 ++++++++++++++++++++++- drivers/net/phy/phy_device.c | 21 +++++------ drivers/of/of_mdio.c | 2 +- include/linux/mdio.h | 8 ++++ include/linux/phy.h | 2 +- net/dsa/slave.c | 3 +- 17 files changed, 98 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index 982b581d3484..c8640418fc37 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -502,7 +502,7 @@ static int au1000_mii_probe(struct net_device *dev) BUG_ON(aup->mac_id < 0 || aup->mac_id > 1); if (aup->phy_addr) - phydev = aup->mii_bus->phy_map[aup->phy_addr]; + phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr); else netdev_info(dev, "using PHY-less setup\n"); return 0; @@ -512,8 +512,8 @@ static int au1000_mii_probe(struct net_device *dev) * on the current MAC's MII bus */ for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) - if (aup->mii_bus->phy_map[phy_addr]) { - phydev = aup->mii_bus->phy_map[phy_addr]; + if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) { + phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr); if (!aup->phy_search_highest_addr) /* break out with first one found */ break; @@ -531,7 +531,8 @@ static int au1000_mii_probe(struct net_device *dev) */ for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) { struct phy_device *const tmp_phydev = - aup->mii_bus->phy_map[phy_addr]; + mdiobus_get_phy(aup->mii_bus, + phy_addr); if (aup->mac_id == 1) break; diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 843a4a5864fc..74f0a37c4eb6 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2272,7 +2272,7 @@ static int b44_register_phy_one(struct b44 *bp) goto err_out_mdiobus; } - if (!bp->mii_bus->phy_map[bp->phy_addr] && + if (!mdiobus_is_registered_device(bp->mii_bus, bp->phy_addr) && (sprom->boardflags_lo & (B44_BOARDFLAG_ROBO | B44_BOARDFLAG_ADM))) { dev_info(sdev->dev, diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 633b59db813a..0d775964b060 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -573,7 +573,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) } if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR) - phydev = mdio->phy_map[pd->phy_address]; + phydev = mdiobus_get_phy(mdio, pd->phy_address); else phydev = phy_find_first(mdio); diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 04e7d0d0e5b1..9293675df7ba 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -1406,7 +1406,7 @@ static void tg3_mdio_config_5785(struct tg3 *tp) u32 val; struct phy_device *phydev; - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); switch (phydev->drv->phy_id & phydev->drv->phy_id_mask) { case PHY_ID_BCM50610: case PHY_ID_BCM50610M: @@ -1554,7 +1554,7 @@ static int tg3_mdio_init(struct tg3 *tp) return i; } - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); if (!phydev || !phydev->drv) { dev_warn(&tp->pdev->dev, "No PHY devices\n"); @@ -1964,7 +1964,7 @@ static void tg3_setup_flow_control(struct tg3 *tp, u32 lcladv, u32 rmtadv) u32 old_tx_mode = tp->tx_mode; if (tg3_flag(tp, USE_PHYLIB)) - autoneg = tp->mdio_bus->phy_map[tp->phy_addr]->autoneg; + autoneg = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr)->autoneg; else autoneg = tp->link_config.autoneg; @@ -2000,7 +2000,7 @@ static void tg3_adjust_link(struct net_device *dev) u8 oldflowctrl, linkmesg = 0; u32 mac_mode, lcl_adv, rmt_adv; struct tg3 *tp = netdev_priv(dev); - struct phy_device *phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + struct phy_device *phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); spin_lock_bh(&tp->lock); @@ -2089,7 +2089,7 @@ static int tg3_phy_init(struct tg3 *tp) /* Bring the PHY back to a known state. */ tg3_bmcr_reset(tp); - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); /* Attach the MAC to the PHY. */ phydev = phy_connect(tp->dev, phydev_name(phydev), @@ -2116,7 +2116,7 @@ static int tg3_phy_init(struct tg3 *tp) SUPPORTED_Asym_Pause); break; default: - phy_disconnect(tp->mdio_bus->phy_map[tp->phy_addr]); + phy_disconnect(mdiobus_get_phy(tp->mdio_bus, tp->phy_addr)); return -EINVAL; } @@ -2136,7 +2136,7 @@ static void tg3_phy_start(struct tg3 *tp) if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return; - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); if (tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER) { tp->phy_flags &= ~TG3_PHYFLG_IS_LOW_POWER; @@ -2156,13 +2156,13 @@ static void tg3_phy_stop(struct tg3 *tp) if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return; - phy_stop(tp->mdio_bus->phy_map[tp->phy_addr]); + phy_stop(mdiobus_get_phy(tp->mdio_bus, tp->phy_addr)); } static void tg3_phy_fini(struct tg3 *tp) { if (tp->phy_flags & TG3_PHYFLG_IS_CONNECTED) { - phy_disconnect(tp->mdio_bus->phy_map[tp->phy_addr]); + phy_disconnect(mdiobus_get_phy(tp->mdio_bus, tp->phy_addr)); tp->phy_flags &= ~TG3_PHYFLG_IS_CONNECTED; } } @@ -4046,7 +4046,7 @@ static int tg3_power_down_prepare(struct tg3 *tp) struct phy_device *phydev; u32 phyid, advertising; - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); tp->phy_flags |= TG3_PHYFLG_IS_LOW_POWER; @@ -12074,7 +12074,7 @@ static int tg3_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) struct phy_device *phydev; if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return -EAGAIN; - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); return phy_ethtool_gset(phydev, cmd); } @@ -12141,7 +12141,7 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) struct phy_device *phydev; if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return -EAGAIN; - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); return phy_ethtool_sset(phydev, cmd); } @@ -12296,7 +12296,7 @@ static int tg3_nway_reset(struct net_device *dev) if (tg3_flag(tp, USE_PHYLIB)) { if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return -EAGAIN; - r = phy_start_aneg(tp->mdio_bus->phy_map[tp->phy_addr]); + r = phy_start_aneg(mdiobus_get_phy(tp->mdio_bus, tp->phy_addr)); } else { u32 bmcr; @@ -12414,7 +12414,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam u32 newadv; struct phy_device *phydev; - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); if (!(phydev->supported & SUPPORTED_Pause) || (!(phydev->supported & SUPPORTED_Asym_Pause) && @@ -13924,7 +13924,7 @@ static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) struct phy_device *phydev; if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return -EAGAIN; - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); return phy_mii_ioctl(phydev, ifr, cmd); } diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index c028b299ab3f..62fa136554ac 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -678,7 +678,7 @@ static int ethoc_mdio_probe(struct net_device *dev) int err; if (priv->phy_id != -1) - phy = priv->mdio->phy_map[priv->phy_id]; + phy = mdiobus_get_phy(priv->mdio, priv->phy_id); else phy = phy_find_first(priv->mdio); @@ -766,7 +766,7 @@ static int ethoc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (mdio->phy_id >= PHY_MAX_ADDR) return -ERANGE; - phy = priv->mdio->phy_map[mdio->phy_id]; + phy = mdiobus_get_phy(priv->mdio, mdio->phy_id); if (!phy) return -ENODEV; } else { diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 8f3f2cf0dcbf..bb116ad646f6 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -839,7 +839,7 @@ static int ftgmac100_mii_probe(struct ftgmac100 *priv) /* search for connect PHY device */ for (i = 0; i < PHY_MAX_ADDR; i++) { - struct phy_device *tmp = priv->mii_bus->phy_map[i]; + struct phy_device *tmp = mdiobus_get_phy(priv->mii_bus, i); if (tmp) { phydev = tmp; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index da255fb4f1d5..502da6f48f95 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -1926,11 +1927,7 @@ static int fec_enet_mii_probe(struct net_device *ndev) } else { /* check for attached phy */ for (phy_id = 0; (phy_id < PHY_MAX_ADDR); phy_id++) { - if ((fep->mii_bus->phy_mask & (1 << phy_id))) - continue; - if (fep->mii_bus->phy_map[phy_id] == NULL) - continue; - if (fep->mii_bus->phy_map[phy_id]->phy_id == 0) + if (!mdiobus_is_registered_device(fep->mii_bus, phy_id)) continue; if (dev_id--) continue; diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c index 5b13b8c11bef..467ff7033606 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c @@ -180,7 +180,7 @@ int sxgbe_mdio_register(struct net_device *ndev) } for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) { - struct phy_device *phy = mdio_bus->phy_map[phy_addr]; + struct phy_device *phy = mdiobus_get_phy(mdio_bus, phy_addr); if (phy) { char irq_num[4]; diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c index 53355c323f54..8594b9e8b28b 100644 --- a/drivers/net/ethernet/smsc/smsc9420.c +++ b/drivers/net/ethernet/smsc/smsc9420.c @@ -1158,7 +1158,8 @@ static int smsc9420_mii_probe(struct net_device *dev) BUG_ON(pd->phy_dev); /* Device only supports internal PHY at address 1 */ - if (!pd->mii_bus->phy_map[1]) { + phydev = mdiobus_get_phy(pd->mii_bus, 1); + if (!phydev) { netdev_err(dev, "no PHY found at address 1\n"); return -ENODEV; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index f0990eb9460f..bff28595b427 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -252,7 +252,7 @@ int stmmac_mdio_register(struct net_device *ndev) found = 0; for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - struct phy_device *phydev = new_bus->phy_map[addr]; + struct phy_device *phydev = mdiobus_get_phy(new_bus, addr); if (phydev) { int act = 0; char irq_num[4]; diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index 78299c1592c1..4e7c9b9b042a 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -393,7 +393,7 @@ static int davinci_mdio_probe(struct platform_device *pdev) /* scan and dump the bus */ for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - phy = data->bus->phy_map[addr]; + phy = mdiobus_get_phy(data->bus, addr); if (phy) { dev_info(dev, "phy[%d]: device %s, driver %s\n", phy->mdio.addr, phydev_name(phy), diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index e5b1ccde835b..f28f89e109ba 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -38,6 +38,48 @@ #include +int mdiobus_register_device(struct mdio_device *mdiodev) +{ + if (mdiodev->bus->mdio_map[mdiodev->addr]) + return -EBUSY; + + mdiodev->bus->mdio_map[mdiodev->addr] = mdiodev; + + return 0; +} +EXPORT_SYMBOL(mdiobus_register_device); + +int mdiobus_unregister_device(struct mdio_device *mdiodev) +{ + if (mdiodev->bus->mdio_map[mdiodev->addr] != mdiodev) + return -EINVAL; + + mdiodev->bus->mdio_map[mdiodev->addr] = NULL; + + return 0; +} +EXPORT_SYMBOL(mdiobus_unregister_device); + +struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr) +{ + struct mdio_device *mdiodev = bus->mdio_map[addr]; + + if (!mdiodev) + return NULL; + + if (!(mdiodev->flags & MDIO_DEVICE_FLAG_PHY)) + return NULL; + + return container_of(mdiodev, struct phy_device, mdio); +} +EXPORT_SYMBOL(mdiobus_get_phy); + +bool mdiobus_is_registered_device(struct mii_bus *bus, int addr) +{ + return bus->mdio_map[addr]; +} +EXPORT_SYMBOL(mdiobus_is_registered_device); + /** * mdiobus_alloc_size - allocate a mii_bus structure * @size: extra amount of memory to allocate for private storage. @@ -299,7 +341,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) error: while (--i >= 0) { - struct phy_device *phydev = bus->phy_map[i]; + struct phy_device *phydev = mdiobus_get_phy(bus, i); if (phydev) { phy_device_remove(phydev); phy_device_free(phydev); @@ -318,7 +360,7 @@ void mdiobus_unregister(struct mii_bus *bus) bus->state = MDIOBUS_UNREGISTERED; for (i = 0; i < PHY_MAX_ADDR; i++) { - struct phy_device *phydev = bus->phy_map[i]; + struct phy_device *phydev = mdiobus_get_phy(bus, i); if (phydev) { phy_device_remove(phydev); phy_device_free(phydev); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 01e5d52dc37c..e0d5dbb96700 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -166,6 +166,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, mdiodev->dev.bus = &mdio_bus_type; mdiodev->bus = bus; mdiodev->addr = addr; + mdiodev->flags = MDIO_DEVICE_FLAG_PHY; dev->speed = 0; dev->duplex = -1; @@ -383,10 +384,9 @@ int phy_device_register(struct phy_device *phydev) { int err; - /* Don't register a phy if one is already registered at this address */ - if (phydev->mdio.bus->phy_map[phydev->mdio.addr]) - return -EINVAL; - phydev->mdio.bus->phy_map[phydev->mdio.addr] = phydev; + err = mdiobus_register_device(&phydev->mdio); + if (err) + return err; /* Run all of the fixups for this PHY */ err = phy_scan_fixups(phydev); @@ -404,7 +404,7 @@ int phy_device_register(struct phy_device *phydev) return 0; out: - phydev->mdio.bus->phy_map[phydev->mdio.addr] = NULL; + mdiobus_unregister_device(&phydev->mdio); return err; } EXPORT_SYMBOL(phy_device_register); @@ -419,11 +419,8 @@ EXPORT_SYMBOL(phy_device_register); */ void phy_device_remove(struct phy_device *phydev) { - struct mii_bus *bus = phydev->mdio.bus; - int addr = phydev->mdio.addr; - device_del(&phydev->mdio.dev); - bus->phy_map[addr] = NULL; + mdiobus_unregister_device(&phydev->mdio); } EXPORT_SYMBOL(phy_device_remove); @@ -433,11 +430,13 @@ EXPORT_SYMBOL(phy_device_remove); */ struct phy_device *phy_find_first(struct mii_bus *bus) { + struct phy_device *phydev; int addr; for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - if (bus->phy_map[addr]) - return bus->phy_map[addr]; + phydev = mdiobus_get_phy(bus, addr); + if (phydev) + return phydev; } return NULL; } diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index c0292051392e..6febe2df76f9 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -193,7 +193,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) for (addr = 0; addr < PHY_MAX_ADDR; addr++) { /* skip already registered PHYs */ - if (mdio->phy_map[addr]) + if (mdiobus_is_registered_device(mdio, addr)) continue; /* be noisy to encourage people to set reg property */ diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 94f9f1491cde..8cd9579e18ea 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -19,9 +19,12 @@ struct mdio_device { struct mii_bus *bus; /* Bus address of the MDIO device (0-31) */ int addr; + int flags; }; #define to_mdio_device(d) container_of(d, struct mdio_device, dev) +#define MDIO_DEVICE_FLAG_PHY 1 + static inline bool mdio_phy_id_is_c45(int phy_id) { return (phy_id & MDIO_PHY_ID_C45) && !(phy_id & ~MDIO_PHY_ID_C45_MASK); @@ -188,4 +191,9 @@ int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum); int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val); +int mdiobus_register_device(struct mdio_device *mdiodev); +int mdiobus_unregister_device(struct mdio_device *mdiodev); +bool mdiobus_is_registered_device(struct mii_bus *bus, int addr); +struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr); + #endif /* __LINUX_MDIO_H__ */ diff --git a/include/linux/phy.h b/include/linux/phy.h index 239a0c2bc49d..2d7beef20825 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -180,7 +180,7 @@ struct mii_bus { struct device dev; /* list of all PHYs on bus */ - struct phy_device *phy_map[PHY_MAX_ADDR]; + struct mdio_device *mdio_map[PHY_MAX_ADDR]; /* PHY addresses to be ignored when probing */ u32 phy_mask; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 5f45e68b52dc..2771713714f1 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -997,7 +998,7 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p, { struct dsa_switch *ds = p->parent; - p->phy = ds->slave_mii_bus->phy_map[addr]; + p->phy = mdiobus_get_phy(ds->slave_mii_bus, addr); if (!p->phy) { netdev_err(slave_dev, "no phy at %d\n", addr); return -ENODEV; -- cgit v1.2.3-71-gd317 From bc87922ff59d364a33e9bce0febdef21a7fbd2af Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:21 +0100 Subject: phy: Move PHY PM operations into phy_device The MDIO PM operations are really PHY device PM operations. So move them into phy_device. This will be needed when we support devices on the mdio bus which are not PHYs. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 83 ++++---------------------------- drivers/net/phy/phy_device.c | 110 +++++++++++++++++++++++++++++++++++++++++++ include/linux/mdio.h | 2 +- 3 files changed, 121 insertions(+), 74 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index e6dddb086265..65ff8199bd09 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -561,95 +561,32 @@ static int mdio_bus_match(struct device *dev, struct device_driver *drv) } #ifdef CONFIG_PM - -static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) -{ - struct device_driver *drv = phydev->mdio.dev.driver; - struct phy_driver *phydrv = to_phy_driver(drv); - struct net_device *netdev = phydev->attached_dev; - - if (!drv || !phydrv->suspend) - return false; - - /* PHY not attached? May suspend if the PHY has not already been - * suspended as part of a prior call to phy_disconnect() -> - * phy_detach() -> phy_suspend() because the parent netdev might be the - * MDIO bus driver and clock gated at this point. - */ - if (!netdev) - return !phydev->suspended; - - /* Don't suspend PHY if the attched netdev parent may wakeup. - * The parent may point to a PCI device, as in tg3 driver. - */ - if (netdev->dev.parent && device_may_wakeup(netdev->dev.parent)) - return false; - - /* Also don't suspend PHY if the netdev itself may wakeup. This - * is the case for devices w/o underlaying pwr. mgmt. aware bus, - * e.g. SoC devices. - */ - if (device_may_wakeup(&netdev->dev)) - return false; - - return true; -} - static int mdio_bus_suspend(struct device *dev) { - struct phy_device *phydev = to_phy_device(dev); + struct mdio_device *mdio = to_mdio_device(dev); - /* We must stop the state machine manually, otherwise it stops out of - * control, possibly with the phydev->lock held. Upon resume, netdev - * may call phy routines that try to grab the same lock, and that may - * lead to a deadlock. - */ - if (phydev->attached_dev && phydev->adjust_link) - phy_stop_machine(phydev); - - if (!mdio_bus_phy_may_suspend(phydev)) - return 0; + if (mdio->pm_ops && mdio->pm_ops->suspend) + return mdio->pm_ops->suspend(dev); - return phy_suspend(phydev); + return 0; } static int mdio_bus_resume(struct device *dev) { - struct phy_device *phydev = to_phy_device(dev); - int ret; - - if (!mdio_bus_phy_may_suspend(phydev)) - goto no_resume; + struct mdio_device *mdio = to_mdio_device(dev); - ret = phy_resume(phydev); - if (ret < 0) - return ret; - -no_resume: - if (phydev->attached_dev && phydev->adjust_link) - phy_start_machine(phydev); + if (mdio->pm_ops && mdio->pm_ops->resume) + return mdio->pm_ops->resume(dev); return 0; } static int mdio_bus_restore(struct device *dev) { - struct phy_device *phydev = to_phy_device(dev); - struct net_device *netdev = phydev->attached_dev; - int ret; - - if (!netdev) - return 0; - - ret = phy_init_hw(phydev); - if (ret < 0) - return ret; - - /* The PHY needs to renegotiate. */ - phydev->link = 0; - phydev->state = PHY_UP; + struct mdio_device *mdio = to_mdio_device(dev); - phy_start_machine(phydev); + if (mdio->pm_ops && mdio->pm_ops->restore) + return mdio->pm_ops->restore(dev); return 0; } diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 7a5222daff93..eb0b0ed32662 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -63,6 +63,115 @@ static struct phy_driver genphy_driver[GENPHY_DRV_MAX]; static LIST_HEAD(phy_fixup_list); static DEFINE_MUTEX(phy_fixup_lock); +#ifdef CONFIG_PM +static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) +{ + struct device_driver *drv = phydev->mdio.dev.driver; + struct phy_driver *phydrv = to_phy_driver(drv); + struct net_device *netdev = phydev->attached_dev; + + if (!drv || !phydrv->suspend) + return false; + + /* PHY not attached? May suspend if the PHY has not already been + * suspended as part of a prior call to phy_disconnect() -> + * phy_detach() -> phy_suspend() because the parent netdev might be the + * MDIO bus driver and clock gated at this point. + */ + if (!netdev) + return !phydev->suspended; + + /* Don't suspend PHY if the attached netdev parent may wakeup. + * The parent may point to a PCI device, as in tg3 driver. + */ + if (netdev->dev.parent && device_may_wakeup(netdev->dev.parent)) + return false; + + /* Also don't suspend PHY if the netdev itself may wakeup. This + * is the case for devices w/o underlaying pwr. mgmt. aware bus, + * e.g. SoC devices. + */ + if (device_may_wakeup(&netdev->dev)) + return false; + + return true; +} + +static int mdio_bus_phy_suspend(struct device *dev) +{ + struct phy_device *phydev = to_phy_device(dev); + + /* We must stop the state machine manually, otherwise it stops out of + * control, possibly with the phydev->lock held. Upon resume, netdev + * may call phy routines that try to grab the same lock, and that may + * lead to a deadlock. + */ + if (phydev->attached_dev && phydev->adjust_link) + phy_stop_machine(phydev); + + if (!mdio_bus_phy_may_suspend(phydev)) + return 0; + + return phy_suspend(phydev); +} + +static int mdio_bus_phy_resume(struct device *dev) +{ + struct phy_device *phydev = to_phy_device(dev); + int ret; + + if (!mdio_bus_phy_may_suspend(phydev)) + goto no_resume; + + ret = phy_resume(phydev); + if (ret < 0) + return ret; + +no_resume: + if (phydev->attached_dev && phydev->adjust_link) + phy_start_machine(phydev); + + return 0; +} + +static int mdio_bus_phy_restore(struct device *dev) +{ + struct phy_device *phydev = to_phy_device(dev); + struct net_device *netdev = phydev->attached_dev; + int ret; + + if (!netdev) + return 0; + + ret = phy_init_hw(phydev); + if (ret < 0) + return ret; + + /* The PHY needs to renegotiate. */ + phydev->link = 0; + phydev->state = PHY_UP; + + phy_start_machine(phydev); + + return 0; +} + +static const struct dev_pm_ops mdio_bus_phy_pm_ops = { + .suspend = mdio_bus_phy_suspend, + .resume = mdio_bus_phy_resume, + .freeze = mdio_bus_phy_suspend, + .thaw = mdio_bus_phy_resume, + .restore = mdio_bus_phy_restore, +}; + +#define MDIO_BUS_PHY_PM_OPS (&mdio_bus_phy_pm_ops) + +#else + +#define MDIO_BUS_PHY_PM_OPS NULL + +#endif /* CONFIG_PM */ + /** * phy_register_fixup - creates a new phy_fixup and adds it to the list * @bus_id: A string which matches phydev->mdio.dev.bus_id (or PHY_ANY_ID) @@ -165,6 +274,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, mdiodev->dev.parent = &bus->dev; mdiodev->dev.bus = &mdio_bus_type; mdiodev->bus = bus; + mdiodev->pm_ops = MDIO_BUS_PHY_PM_OPS; mdiodev->addr = addr; mdiodev->flags = MDIO_DEVICE_FLAG_PHY; diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 8cd9579e18ea..9f844d372ed5 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -15,7 +15,7 @@ struct mii_bus; struct mdio_device { struct device dev; - + const struct dev_pm_ops *pm_ops; struct mii_bus *bus; /* Bus address of the MDIO device (0-31) */ int addr; -- cgit v1.2.3-71-gd317 From be01da72b1b832b89fbdf59ae6f1b60e53ca2987 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:22 +0100 Subject: phy: Centralize setting driver module owner Rather than have each driver set the driver owner field, do it once in the core code. This will also help with later changes, when the device structure will move. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/amd.c | 1 - drivers/net/phy/aquantia.c | 4 ---- drivers/net/phy/at803x.c | 9 --------- drivers/net/phy/bcm63xx.c | 2 -- drivers/net/phy/bcm7xxx.c | 6 ------ drivers/net/phy/bcm87xx.c | 2 -- drivers/net/phy/broadcom.c | 12 ------------ drivers/net/phy/cicada.c | 2 -- drivers/net/phy/davicom.c | 4 ---- drivers/net/phy/dp83640.c | 3 +-- drivers/net/phy/dp83848.c | 2 -- drivers/net/phy/dp83867.c | 2 -- drivers/net/phy/et1011c.c | 1 - drivers/net/phy/icplus.c | 3 --- drivers/net/phy/lxt.c | 4 ---- drivers/net/phy/marvell.c | 13 ------------- drivers/net/phy/micrel.c | 13 ------------- drivers/net/phy/microchip.c | 2 -- drivers/net/phy/national.c | 1 - drivers/net/phy/phy_device.c | 13 +++++++------ drivers/net/phy/qsemi.c | 1 - drivers/net/phy/realtek.c | 5 ----- drivers/net/phy/smsc.c | 10 ---------- drivers/net/phy/ste10Xp.c | 2 -- drivers/net/phy/teranetics.c | 1 - drivers/net/phy/vitesse.c | 8 -------- include/linux/phy.h | 7 ++++--- 27 files changed, 12 insertions(+), 121 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/amd.c b/drivers/net/phy/amd.c index 65a488f82eb8..18141c022b13 100644 --- a/drivers/net/phy/amd.c +++ b/drivers/net/phy/amd.c @@ -72,7 +72,6 @@ static struct phy_driver am79c_driver[] = { { .read_status = genphy_read_status, .ack_interrupt = am79c_ack_interrupt, .config_intr = am79c_config_intr, - .driver = { .owner = THIS_MODULE,}, } }; module_phy_driver(am79c_driver); diff --git a/drivers/net/phy/aquantia.c b/drivers/net/phy/aquantia.c index f1936b7a7af6..09b0b0aa8d68 100644 --- a/drivers/net/phy/aquantia.c +++ b/drivers/net/phy/aquantia.c @@ -128,7 +128,6 @@ static struct phy_driver aquantia_driver[] = { .config_intr = aquantia_config_intr, .ack_interrupt = aquantia_ack_interrupt, .read_status = aquantia_read_status, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_AQ2104, @@ -141,7 +140,6 @@ static struct phy_driver aquantia_driver[] = { .config_intr = aquantia_config_intr, .ack_interrupt = aquantia_ack_interrupt, .read_status = aquantia_read_status, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_AQR105, @@ -154,7 +152,6 @@ static struct phy_driver aquantia_driver[] = { .config_intr = aquantia_config_intr, .ack_interrupt = aquantia_ack_interrupt, .read_status = aquantia_read_status, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_AQR405, @@ -167,7 +164,6 @@ static struct phy_driver aquantia_driver[] = { .config_intr = aquantia_config_intr, .ack_interrupt = aquantia_ack_interrupt, .read_status = aquantia_read_status, - .driver = { .owner = THIS_MODULE,}, }, }; diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index b76ac09a554f..8a8f6fb2880d 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -310,9 +310,6 @@ static struct phy_driver at803x_driver[] = { .read_status = genphy_read_status, .ack_interrupt = at803x_ack_interrupt, .config_intr = at803x_config_intr, - .driver = { - .owner = THIS_MODULE, - }, }, { /* ATHEROS 8030 */ .phy_id = ATH8030_PHY_ID, @@ -331,9 +328,6 @@ static struct phy_driver at803x_driver[] = { .read_status = genphy_read_status, .ack_interrupt = at803x_ack_interrupt, .config_intr = at803x_config_intr, - .driver = { - .owner = THIS_MODULE, - }, }, { /* ATHEROS 8031 */ .phy_id = ATH8031_PHY_ID, @@ -352,9 +346,6 @@ static struct phy_driver at803x_driver[] = { .read_status = genphy_read_status, .ack_interrupt = &at803x_ack_interrupt, .config_intr = &at803x_config_intr, - .driver = { - .owner = THIS_MODULE, - }, } }; module_phy_driver(at803x_driver); diff --git a/drivers/net/phy/bcm63xx.c b/drivers/net/phy/bcm63xx.c index 86b28052bf06..e741bf614c4e 100644 --- a/drivers/net/phy/bcm63xx.c +++ b/drivers/net/phy/bcm63xx.c @@ -56,7 +56,6 @@ static struct phy_driver bcm63xx_driver[] = { .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, - .driver = { .owner = THIS_MODULE }, }, { /* same phy as above, with just a different OUI */ .phy_id = 0x002bdc00, @@ -69,7 +68,6 @@ static struct phy_driver bcm63xx_driver[] = { .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, - .driver = { .owner = THIS_MODULE }, } }; module_phy_driver(bcm63xx_driver); diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 9f4e6eb886af..bf241a3ec5e5 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -324,7 +324,6 @@ static int bcm7xxx_dummy_config_init(struct phy_device *phydev) .config_aneg = genphy_config_aneg, \ .read_status = genphy_read_status, \ .resume = bcm7xxx_28nm_resume, \ - .driver = { .owner = THIS_MODULE }, \ } static struct phy_driver bcm7xxx_driver[] = { @@ -346,7 +345,6 @@ static struct phy_driver bcm7xxx_driver[] = { .read_status = genphy_read_status, .suspend = bcm7xxx_suspend, .resume = bcm7xxx_config_init, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_ID_BCM7429, .phy_id_mask = 0xfffffff0, @@ -359,7 +357,6 @@ static struct phy_driver bcm7xxx_driver[] = { .read_status = genphy_read_status, .suspend = bcm7xxx_suspend, .resume = bcm7xxx_config_init, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_ID_BCM7435, .phy_id_mask = 0xfffffff0, @@ -372,7 +369,6 @@ static struct phy_driver bcm7xxx_driver[] = { .read_status = genphy_read_status, .suspend = bcm7xxx_suspend, .resume = bcm7xxx_config_init, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_BCM_OUI_4, .phy_id_mask = 0xffff0000, @@ -385,7 +381,6 @@ static struct phy_driver bcm7xxx_driver[] = { .read_status = genphy_read_status, .suspend = bcm7xxx_suspend, .resume = bcm7xxx_config_init, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_BCM_OUI_5, .phy_id_mask = 0xffffff00, @@ -398,7 +393,6 @@ static struct phy_driver bcm7xxx_driver[] = { .read_status = genphy_read_status, .suspend = bcm7xxx_suspend, .resume = bcm7xxx_config_init, - .driver = { .owner = THIS_MODULE }, } }; static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = { diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c index e536e30d1643..f7ebdcff53e4 100644 --- a/drivers/net/phy/bcm87xx.c +++ b/drivers/net/phy/bcm87xx.c @@ -201,7 +201,6 @@ static struct phy_driver bcm87xx_driver[] = { .config_intr = bcm87xx_config_intr, .did_interrupt = bcm87xx_did_interrupt, .match_phy_device = bcm8706_match_phy_device, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_ID_BCM8727, .phy_id_mask = 0xffffffff, @@ -214,7 +213,6 @@ static struct phy_driver bcm87xx_driver[] = { .config_intr = bcm87xx_config_intr, .did_interrupt = bcm87xx_did_interrupt, .match_phy_device = bcm8727_match_phy_device, - .driver = { .owner = THIS_MODULE }, } }; module_phy_driver(bcm87xx_driver); diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 3ce5d9514623..870327efccf7 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -460,7 +460,6 @@ static struct phy_driver broadcom_drivers[] = { .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_ID_BCM5421, .phy_id_mask = 0xfffffff0, @@ -473,7 +472,6 @@ static struct phy_driver broadcom_drivers[] = { .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_ID_BCM5461, .phy_id_mask = 0xfffffff0, @@ -486,7 +484,6 @@ static struct phy_driver broadcom_drivers[] = { .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_ID_BCM54616S, .phy_id_mask = 0xfffffff0, @@ -499,7 +496,6 @@ static struct phy_driver broadcom_drivers[] = { .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_ID_BCM5464, .phy_id_mask = 0xfffffff0, @@ -512,7 +508,6 @@ static struct phy_driver broadcom_drivers[] = { .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_ID_BCM5481, .phy_id_mask = 0xfffffff0, @@ -525,7 +520,6 @@ static struct phy_driver broadcom_drivers[] = { .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_ID_BCM5482, .phy_id_mask = 0xfffffff0, @@ -538,7 +532,6 @@ static struct phy_driver broadcom_drivers[] = { .read_status = bcm5482_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_ID_BCM50610, .phy_id_mask = 0xfffffff0, @@ -551,7 +544,6 @@ static struct phy_driver broadcom_drivers[] = { .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_ID_BCM50610M, .phy_id_mask = 0xfffffff0, @@ -564,7 +556,6 @@ static struct phy_driver broadcom_drivers[] = { .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_ID_BCM57780, .phy_id_mask = 0xfffffff0, @@ -577,7 +568,6 @@ static struct phy_driver broadcom_drivers[] = { .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_ID_BCMAC131, .phy_id_mask = 0xfffffff0, @@ -590,7 +580,6 @@ static struct phy_driver broadcom_drivers[] = { .read_status = genphy_read_status, .ack_interrupt = brcm_fet_ack_interrupt, .config_intr = brcm_fet_config_intr, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_ID_BCM5241, .phy_id_mask = 0xfffffff0, @@ -603,7 +592,6 @@ static struct phy_driver broadcom_drivers[] = { .read_status = genphy_read_status, .ack_interrupt = brcm_fet_ack_interrupt, .config_intr = brcm_fet_config_intr, - .driver = { .owner = THIS_MODULE }, } }; module_phy_driver(broadcom_drivers); diff --git a/drivers/net/phy/cicada.c b/drivers/net/phy/cicada.c index 27f5464899d4..d339c1afea77 100644 --- a/drivers/net/phy/cicada.c +++ b/drivers/net/phy/cicada.c @@ -114,7 +114,6 @@ static struct phy_driver cis820x_driver[] = { .read_status = &genphy_read_status, .ack_interrupt = &cis820x_ack_interrupt, .config_intr = &cis820x_config_intr, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = 0x000fc440, .name = "Cicada Cis8204", @@ -126,7 +125,6 @@ static struct phy_driver cis820x_driver[] = { .read_status = &genphy_read_status, .ack_interrupt = &cis820x_ack_interrupt, .config_intr = &cis820x_config_intr, - .driver = { .owner = THIS_MODULE,}, } }; module_phy_driver(cis820x_driver); diff --git a/drivers/net/phy/davicom.c b/drivers/net/phy/davicom.c index 2a328703b4ae..36e3e2033eca 100644 --- a/drivers/net/phy/davicom.c +++ b/drivers/net/phy/davicom.c @@ -156,7 +156,6 @@ static struct phy_driver dm91xx_driver[] = { .read_status = genphy_read_status, .ack_interrupt = dm9161_ack_interrupt, .config_intr = dm9161_config_intr, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = 0x0181b8b0, .name = "Davicom DM9161B/C", @@ -168,7 +167,6 @@ static struct phy_driver dm91xx_driver[] = { .read_status = genphy_read_status, .ack_interrupt = dm9161_ack_interrupt, .config_intr = dm9161_config_intr, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = 0x0181b8a0, .name = "Davicom DM9161A", @@ -180,7 +178,6 @@ static struct phy_driver dm91xx_driver[] = { .read_status = genphy_read_status, .ack_interrupt = dm9161_ack_interrupt, .config_intr = dm9161_config_intr, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = 0x00181b80, .name = "Davicom DM9131", @@ -191,7 +188,6 @@ static struct phy_driver dm91xx_driver[] = { .read_status = genphy_read_status, .ack_interrupt = dm9161_ack_interrupt, .config_intr = dm9161_config_intr, - .driver = { .owner = THIS_MODULE,}, } }; module_phy_driver(dm91xx_driver); diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 39da6fc6a85e..180f69952779 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1492,12 +1492,11 @@ static struct phy_driver dp83640_driver = { .hwtstamp = dp83640_hwtstamp, .rxtstamp = dp83640_rxtstamp, .txtstamp = dp83640_txtstamp, - .driver = {.owner = THIS_MODULE,} }; static int __init dp83640_init(void) { - return phy_driver_register(&dp83640_driver); + return phy_driver_register(&dp83640_driver, THIS_MODULE); } static void __exit dp83640_exit(void) diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c index 5ce9bef54468..5e14e629c597 100644 --- a/drivers/net/phy/dp83848.c +++ b/drivers/net/phy/dp83848.c @@ -88,8 +88,6 @@ static struct phy_driver dp83848_driver[] = { /* IRQ related */ .ack_interrupt = dp83848_ack_interrupt, .config_intr = dp83848_config_intr, - - .driver = { .owner = THIS_MODULE, }, }, }; module_phy_driver(dp83848_driver); diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 74e4521bd2d3..2afa61b51d41 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -214,8 +214,6 @@ static struct phy_driver dp83867_driver[] = { .read_status = genphy_read_status, .suspend = genphy_suspend, .resume = genphy_resume, - - .driver = {.owner = THIS_MODULE,} }, }; module_phy_driver(dp83867_driver); diff --git a/drivers/net/phy/et1011c.c b/drivers/net/phy/et1011c.c index a907743816a8..a9a4edfa23c8 100644 --- a/drivers/net/phy/et1011c.c +++ b/drivers/net/phy/et1011c.c @@ -95,7 +95,6 @@ static struct phy_driver et1011c_driver[] = { { .flags = PHY_POLL, .config_aneg = et1011c_config_aneg, .read_status = et1011c_read_status, - .driver = { .owner = THIS_MODULE,}, } }; module_phy_driver(et1011c_driver); diff --git a/drivers/net/phy/icplus.c b/drivers/net/phy/icplus.c index c12170d07b62..e5f251b91578 100644 --- a/drivers/net/phy/icplus.c +++ b/drivers/net/phy/icplus.c @@ -221,7 +221,6 @@ static struct phy_driver icplus_driver[] = { .read_status = &ip175c_read_status, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = 0x02430d90, .name = "ICPlus IP1001", @@ -233,7 +232,6 @@ static struct phy_driver icplus_driver[] = { .read_status = &genphy_read_status, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = 0x02430c54, .name = "ICPlus IP101A/G", @@ -247,7 +245,6 @@ static struct phy_driver icplus_driver[] = { .read_status = &genphy_read_status, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE,}, } }; module_phy_driver(icplus_driver); diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c index a3a5a703635b..f6078376ef50 100644 --- a/drivers/net/phy/lxt.c +++ b/drivers/net/phy/lxt.c @@ -278,7 +278,6 @@ static struct phy_driver lxt97x_driver[] = { .read_status = genphy_read_status, .ack_interrupt = lxt970_ack_interrupt, .config_intr = lxt970_config_intr, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = 0x001378e0, .name = "LXT971", @@ -289,7 +288,6 @@ static struct phy_driver lxt97x_driver[] = { .read_status = genphy_read_status, .ack_interrupt = lxt971_ack_interrupt, .config_intr = lxt971_config_intr, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = 0x00137a10, .name = "LXT973-A2", @@ -299,7 +297,6 @@ static struct phy_driver lxt97x_driver[] = { .probe = lxt973_probe, .config_aneg = lxt973_config_aneg, .read_status = lxt973a2_read_status, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = 0x00137a10, .name = "LXT973", @@ -309,7 +306,6 @@ static struct phy_driver lxt97x_driver[] = { .probe = lxt973_probe, .config_aneg = lxt973_config_aneg, .read_status = genphy_read_status, - .driver = { .owner = THIS_MODULE,}, } }; module_phy_driver(lxt97x_driver); diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index f96c93c9819a..e3eb96443c97 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1087,7 +1087,6 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = MARVELL_PHY_ID_88E1112, @@ -1106,7 +1105,6 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = MARVELL_PHY_ID_88E1111, @@ -1125,7 +1123,6 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = MARVELL_PHY_ID_88E1118, @@ -1144,7 +1141,6 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, - .driver = {.owner = THIS_MODULE,}, }, { .phy_id = MARVELL_PHY_ID_88E1121R, @@ -1163,7 +1159,6 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = MARVELL_PHY_ID_88E1318S, @@ -1184,7 +1179,6 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = MARVELL_PHY_ID_88E1145, @@ -1203,7 +1197,6 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = MARVELL_PHY_ID_88E1149R, @@ -1222,7 +1215,6 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = MARVELL_PHY_ID_88E1240, @@ -1241,7 +1233,6 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = MARVELL_PHY_ID_88E1116R, @@ -1260,7 +1251,6 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = MARVELL_PHY_ID_88E1510, @@ -1279,7 +1269,6 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = MARVELL_PHY_ID_88E1540, @@ -1298,7 +1287,6 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = MARVELL_PHY_ID_88E3016, @@ -1319,7 +1307,6 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, - .driver = { .owner = THIS_MODULE }, }, }; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index b51505be1fa9..0dbc6496b6f6 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -706,7 +706,6 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_KSZ8021, .phy_id_mask = 0x00ffffff, @@ -726,7 +725,6 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_KSZ8031, .phy_id_mask = 0x00ffffff, @@ -746,7 +744,6 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_KSZ8041, .phy_id_mask = 0x00fffff0, @@ -766,7 +763,6 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_KSZ8041RNLI, .phy_id_mask = 0x00fffff0, @@ -786,7 +782,6 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_KSZ8051, .phy_id_mask = 0x00fffff0, @@ -806,7 +801,6 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_KSZ8001, .name = "Micrel KSZ8001 or KS8721", @@ -825,7 +819,6 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_KSZ8081, .name = "Micrel KSZ8081 or KSZ8091", @@ -844,7 +837,6 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_KSZ8061, .name = "Micrel KSZ8061", @@ -861,7 +853,6 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_KSZ9021, .phy_id_mask = 0x000ffffe, @@ -881,7 +872,6 @@ static struct phy_driver ksphy_driver[] = { .resume = genphy_resume, .read_mmd_indirect = ksz9021_rd_mmd_phyreg, .write_mmd_indirect = ksz9021_wr_mmd_phyreg, - .driver = { .owner = THIS_MODULE, }, }, { .phy_id = PHY_ID_KSZ9031, .phy_id_mask = 0x00fffff0, @@ -899,7 +889,6 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE, }, }, { .phy_id = PHY_ID_KSZ8873MLL, .phy_id_mask = 0x00fffff0, @@ -914,7 +903,6 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE, }, }, { .phy_id = PHY_ID_KSZ886X, .phy_id_mask = 0x00fffff0, @@ -929,7 +917,6 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE, }, } }; module_phy_driver(ksphy_driver); diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 5e34b49be0b3..15f820648f82 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -129,8 +129,6 @@ static struct phy_driver microchip_phy_driver[] = { .suspend = lan88xx_suspend, .resume = genphy_resume, .set_wol = lan88xx_set_wol, - - .driver = { .owner = THIS_MODULE, } } }; module_phy_driver(microchip_phy_driver); diff --git a/drivers/net/phy/national.c b/drivers/net/phy/national.c index 0a7b9c7f09a2..2a1b490bc587 100644 --- a/drivers/net/phy/national.c +++ b/drivers/net/phy/national.c @@ -140,7 +140,6 @@ static struct phy_driver dp83865_driver[] = { { .read_status = genphy_read_status, .ack_interrupt = ns_ack_interrupt, .config_intr = ns_config_intr, - .driver = {.owner = THIS_MODULE,} } }; module_phy_driver(dp83865_driver); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index eb0b0ed32662..a1b833cd4183 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1564,8 +1564,9 @@ static int phy_remove(struct device *dev) /** * phy_driver_register - register a phy_driver with the PHY layer * @new_driver: new phy_driver to register + * @owner: module owning this PHY */ -int phy_driver_register(struct phy_driver *new_driver) +int phy_driver_register(struct phy_driver *new_driver, struct module *owner) { int retval; @@ -1573,6 +1574,7 @@ int phy_driver_register(struct phy_driver *new_driver) new_driver->driver.bus = &mdio_bus_type; new_driver->driver.probe = phy_probe; new_driver->driver.remove = phy_remove; + new_driver->driver.owner = owner; retval = driver_register(&new_driver->driver); if (retval) { @@ -1588,12 +1590,13 @@ int phy_driver_register(struct phy_driver *new_driver) } EXPORT_SYMBOL(phy_driver_register); -int phy_drivers_register(struct phy_driver *new_driver, int n) +int phy_drivers_register(struct phy_driver *new_driver, int n, + struct module *owner) { int i, ret = 0; for (i = 0; i < n; i++) { - ret = phy_driver_register(new_driver + i); + ret = phy_driver_register(new_driver + i, owner); if (ret) { while (i-- > 0) phy_driver_unregister(new_driver + i); @@ -1634,7 +1637,6 @@ static struct phy_driver genphy_driver[] = { .read_status = genphy_read_status, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE, }, }, { .phy_id = 0xffffffff, .phy_id_mask = 0xffffffff, @@ -1646,7 +1648,6 @@ static struct phy_driver genphy_driver[] = { .read_status = gen10g_read_status, .suspend = gen10g_suspend, .resume = gen10g_resume, - .driver = {.owner = THIS_MODULE, }, } }; static int __init phy_init(void) @@ -1658,7 +1659,7 @@ static int __init phy_init(void) return rc; rc = phy_drivers_register(genphy_driver, - ARRAY_SIZE(genphy_driver)); + ARRAY_SIZE(genphy_driver), THIS_MODULE); if (rc) mdio_bus_exit(); diff --git a/drivers/net/phy/qsemi.c b/drivers/net/phy/qsemi.c index be4c6f7c3645..d470db89e8dd 100644 --- a/drivers/net/phy/qsemi.c +++ b/drivers/net/phy/qsemi.c @@ -122,7 +122,6 @@ static struct phy_driver qs6612_driver[] = { { .read_status = genphy_read_status, .ack_interrupt = qs6612_ack_interrupt, .config_intr = qs6612_config_intr, - .driver = { .owner = THIS_MODULE,}, } }; module_phy_driver(qs6612_driver); diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 43ab691362d4..aadd6e9f54ad 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -124,7 +124,6 @@ static struct phy_driver realtek_drvs[] = { .flags = PHY_HAS_INTERRUPT, .config_aneg = &genphy_config_aneg, .read_status = &genphy_read_status, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = 0x001cc912, .name = "RTL8211B Gigabit Ethernet", @@ -135,7 +134,6 @@ static struct phy_driver realtek_drvs[] = { .read_status = &genphy_read_status, .ack_interrupt = &rtl821x_ack_interrupt, .config_intr = &rtl8211b_config_intr, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = 0x001cc914, .name = "RTL8211DN Gigabit Ethernet", @@ -148,7 +146,6 @@ static struct phy_driver realtek_drvs[] = { .config_intr = rtl8211e_config_intr, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = 0x001cc915, .name = "RTL8211E Gigabit Ethernet", @@ -161,7 +158,6 @@ static struct phy_driver realtek_drvs[] = { .config_intr = &rtl8211e_config_intr, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = 0x001cc916, .name = "RTL8211F Gigabit Ethernet", @@ -175,7 +171,6 @@ static struct phy_driver realtek_drvs[] = { .config_intr = &rtl8211f_config_intr, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = { .owner = THIS_MODULE }, }, }; diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 18c981b95910..ddb06135c21d 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -171,8 +171,6 @@ static struct phy_driver smsc_phy_driver[] = { .suspend = genphy_suspend, .resume = genphy_resume, - - .driver = { .owner = THIS_MODULE, } }, { .phy_id = 0x0007c0b0, /* OUI=0x00800f, Model#=0x0b */ .phy_id_mask = 0xfffffff0, @@ -194,8 +192,6 @@ static struct phy_driver smsc_phy_driver[] = { .suspend = genphy_suspend, .resume = genphy_resume, - - .driver = { .owner = THIS_MODULE, } }, { .phy_id = 0x0007c0c0, /* OUI=0x00800f, Model#=0x0c */ .phy_id_mask = 0xfffffff0, @@ -217,8 +213,6 @@ static struct phy_driver smsc_phy_driver[] = { .suspend = genphy_suspend, .resume = genphy_resume, - - .driver = { .owner = THIS_MODULE, } }, { .phy_id = 0x0007c0d0, /* OUI=0x00800f, Model#=0x0d */ .phy_id_mask = 0xfffffff0, @@ -239,8 +233,6 @@ static struct phy_driver smsc_phy_driver[] = { .suspend = genphy_suspend, .resume = genphy_resume, - - .driver = { .owner = THIS_MODULE, } }, { .phy_id = 0x0007c0f0, /* OUI=0x00800f, Model#=0x0f */ .phy_id_mask = 0xfffffff0, @@ -262,8 +254,6 @@ static struct phy_driver smsc_phy_driver[] = { .suspend = genphy_suspend, .resume = genphy_resume, - - .driver = { .owner = THIS_MODULE, } } }; module_phy_driver(smsc_phy_driver); diff --git a/drivers/net/phy/ste10Xp.c b/drivers/net/phy/ste10Xp.c index 3fc199b773e6..d00cfb64529e 100644 --- a/drivers/net/phy/ste10Xp.c +++ b/drivers/net/phy/ste10Xp.c @@ -95,7 +95,6 @@ static struct phy_driver ste10xp_pdriver[] = { .config_intr = ste10Xp_config_intr, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = {.owner = THIS_MODULE,} }, { .phy_id = STE100P_PHY_ID, .phy_id_mask = 0xffffffff, @@ -109,7 +108,6 @@ static struct phy_driver ste10xp_pdriver[] = { .config_intr = ste10Xp_config_intr, .suspend = genphy_suspend, .resume = genphy_resume, - .driver = {.owner = THIS_MODULE,} } }; module_phy_driver(ste10xp_pdriver); diff --git a/drivers/net/phy/teranetics.c b/drivers/net/phy/teranetics.c index 07463fcca212..fb2cef764e9a 100644 --- a/drivers/net/phy/teranetics.c +++ b/drivers/net/phy/teranetics.c @@ -108,7 +108,6 @@ static struct phy_driver teranetics_driver[] = { .config_aneg = teranetics_config_aneg, .read_status = teranetics_read_status, .match_phy_device = teranetics_match_phy_device, - .driver = { .owner = THIS_MODULE,}, }, }; diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index dd295dbaa074..2e37eb337d48 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -236,7 +236,6 @@ static struct phy_driver vsc82xx_driver[] = { .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_VSC8244, .name = "Vitesse VSC8244", @@ -248,7 +247,6 @@ static struct phy_driver vsc82xx_driver[] = { .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_VSC8514, .name = "Vitesse VSC8514", @@ -260,7 +258,6 @@ static struct phy_driver vsc82xx_driver[] = { .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_VSC8574, .name = "Vitesse VSC8574", @@ -272,7 +269,6 @@ static struct phy_driver vsc82xx_driver[] = { .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_VSC8601, .name = "Vitesse VSC8601", @@ -284,7 +280,6 @@ static struct phy_driver vsc82xx_driver[] = { .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, - .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_VSC8662, .name = "Vitesse VSC8662", @@ -296,7 +291,6 @@ static struct phy_driver vsc82xx_driver[] = { .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, - .driver = { .owner = THIS_MODULE,}, }, { /* Vitesse 8221 */ .phy_id = PHY_ID_VSC8221, @@ -309,7 +303,6 @@ static struct phy_driver vsc82xx_driver[] = { .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, - .driver = { .owner = THIS_MODULE,}, }, { /* Vitesse 8211 */ .phy_id = PHY_ID_VSC8211, @@ -322,7 +315,6 @@ static struct phy_driver vsc82xx_driver[] = { .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, - .driver = { .owner = THIS_MODULE,}, } }; module_phy_driver(vsc82xx_driver); diff --git a/include/linux/phy.h b/include/linux/phy.h index 2d7beef20825..49e4418822b3 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -797,8 +797,9 @@ int genphy_resume(struct phy_device *phydev); int genphy_soft_reset(struct phy_device *phydev); void phy_driver_unregister(struct phy_driver *drv); void phy_drivers_unregister(struct phy_driver *drv, int n); -int phy_driver_register(struct phy_driver *new_driver); -int phy_drivers_register(struct phy_driver *new_driver, int n); +int phy_driver_register(struct phy_driver *new_driver, struct module *owner); +int phy_drivers_register(struct phy_driver *new_driver, int n, + struct module *owner); void phy_state_machine(struct work_struct *work); void phy_change(struct work_struct *work); void phy_mac_interrupt(struct phy_device *phydev, int new_link); @@ -843,7 +844,7 @@ extern struct bus_type mdio_bus_type; #define phy_module_driver(__phy_drivers, __count) \ static int __init phy_module_init(void) \ { \ - return phy_drivers_register(__phy_drivers, __count); \ + return phy_drivers_register(__phy_drivers, __count, THIS_MODULE); \ } \ module_init(phy_module_init); \ static void __exit phy_module_exit(void) \ -- cgit v1.2.3-71-gd317 From e76a4957c5ee5cf69cea89d450c29c536e77ce9e Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:23 +0100 Subject: phy: Move phy specific bus match into phy_device Matching a driver to a device has both generic parts, and parts which are specific to PHY devices. Move the PHY specific parts into phy_device. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 38 ++++++++++++-------------------------- drivers/net/phy/phy_device.c | 28 ++++++++++++++++++++++++++++ include/linux/mdio.h | 1 + 3 files changed, 41 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 65ff8199bd09..bd523b2c6331 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -523,41 +523,27 @@ int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val) EXPORT_SYMBOL(mdiobus_write); /** - * mdio_bus_match - determine if given PHY driver supports the given PHY device - * @dev: target PHY device - * @drv: given PHY driver + * mdio_bus_match - determine if given MDIO driver supports the given + * MDIO device + * @dev: target MDIO device + * @drv: given MDIO driver * - * Description: Given a PHY device, and a PHY driver, return 1 if - * the driver supports the device. Otherwise, return 0. + * Description: Given a MDIO device, and a MDIO driver, return 1 if + * the driver supports the device. Otherwise, return 0. This may + * require calling the devices own match function, since different classes + * of MDIO devices have different match criteria. */ static int mdio_bus_match(struct device *dev, struct device_driver *drv) { - struct phy_device *phydev = to_phy_device(dev); - struct phy_driver *phydrv = to_phy_driver(drv); - const int num_ids = ARRAY_SIZE(phydev->c45_ids.device_ids); - int i; + struct mdio_device *mdio = to_mdio_device(dev); if (of_driver_match_device(dev, drv)) return 1; - if (phydrv->match_phy_device) - return phydrv->match_phy_device(phydev); + if (mdio->bus_match) + return mdio->bus_match(dev, drv); - if (phydev->is_c45) { - for (i = 1; i < num_ids; i++) { - if (!(phydev->c45_ids.devices_in_package & (1 << i))) - continue; - - if ((phydrv->phy_id & phydrv->phy_id_mask) == - (phydev->c45_ids.device_ids[i] & - phydrv->phy_id_mask)) - return 1; - } - return 0; - } else { - return (phydrv->phy_id & phydrv->phy_id_mask) == - (phydev->phy_id & phydrv->phy_id_mask); - } + return 0; } #ifdef CONFIG_PM diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index a1b833cd4183..78628428ee28 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -257,6 +257,33 @@ static int phy_scan_fixups(struct phy_device *phydev) return 0; } +static int phy_bus_match(struct device *dev, struct device_driver *drv) +{ + struct phy_device *phydev = to_phy_device(dev); + struct phy_driver *phydrv = to_phy_driver(drv); + const int num_ids = ARRAY_SIZE(phydev->c45_ids.device_ids); + int i; + + if (phydrv->match_phy_device) + return phydrv->match_phy_device(phydev); + + if (phydev->is_c45) { + for (i = 1; i < num_ids; i++) { + if (!(phydev->c45_ids.devices_in_package & (1 << i))) + continue; + + if ((phydrv->phy_id & phydrv->phy_id_mask) == + (phydev->c45_ids.device_ids[i] & + phydrv->phy_id_mask)) + return 1; + } + return 0; + } else { + return (phydrv->phy_id & phydrv->phy_id_mask) == + (phydev->phy_id & phydrv->phy_id_mask); + } +} + struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, bool is_c45, struct phy_c45_device_ids *c45_ids) @@ -275,6 +302,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, mdiodev->dev.bus = &mdio_bus_type; mdiodev->bus = bus; mdiodev->pm_ops = MDIO_BUS_PHY_PM_OPS; + mdiodev->bus_match = phy_bus_match; mdiodev->addr = addr; mdiodev->flags = MDIO_DEVICE_FLAG_PHY; diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 9f844d372ed5..0690359e55a5 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -17,6 +17,7 @@ struct mdio_device { struct device dev; const struct dev_pm_ops *pm_ops; struct mii_bus *bus; + int (*bus_match)(struct device *dev, struct device_driver *drv); /* Bus address of the MDIO device (0-31) */ int addr; int flags; -- cgit v1.2.3-71-gd317 From a9049e0c513c4521dbfaa302af8ed08b3366b41f Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:26 +0100 Subject: mdio: Add support for mdio drivers. Not all devices on an MDIO bus are PHYs. Meaning not all MDIO drivers are PHY drivers. Add support for generic MDIO drivers. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/Makefile | 2 +- drivers/net/phy/mdio_bus.c | 13 +++- drivers/net/phy/mdio_device.c | 169 ++++++++++++++++++++++++++++++++++++++++++ drivers/net/phy/phy_device.c | 27 ++++--- drivers/of/of_mdio.c | 33 +++++++++ include/linux/mdio.h | 50 +++++++++++++ include/linux/phy.h | 9 ++- 7 files changed, 286 insertions(+), 17 deletions(-) create mode 100644 drivers/net/phy/mdio_device.c (limited to 'include/linux') diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index f31a4e25cf15..680e88f9915a 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -1,6 +1,6 @@ # Makefile for Linux PHY drivers -libphy-objs := phy.o phy_device.o mdio_bus.o +libphy-objs := phy.o phy_device.o mdio_bus.o mdio_device.o obj-$(CONFIG_PHYLIB) += libphy.o obj-$(CONFIG_AQUANTIA_PHY) += aquantia.o diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 6f9ca51446db..0573cfd2116f 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -357,16 +357,25 @@ EXPORT_SYMBOL(__mdiobus_register); void mdiobus_unregister(struct mii_bus *bus) { + struct mdio_device *mdiodev; + struct phy_device *phydev; int i; BUG_ON(bus->state != MDIOBUS_REGISTERED); bus->state = MDIOBUS_UNREGISTERED; for (i = 0; i < PHY_MAX_ADDR; i++) { - struct phy_device *phydev = mdiobus_get_phy(bus, i); - if (phydev) { + mdiodev = bus->mdio_map[i]; + if (!mdiodev) + continue; + + if (!(mdiodev->flags & MDIO_DEVICE_FLAG_PHY)) { + phydev = container_of(mdiodev, struct phy_device, mdio); phy_device_remove(phydev); phy_device_free(phydev); + } else { + mdio_device_remove(mdiodev); + mdio_device_free(mdiodev); } } device_del(&bus->dev); diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c new file mode 100644 index 000000000000..64e3777c85b4 --- /dev/null +++ b/drivers/net/phy/mdio_device.c @@ -0,0 +1,169 @@ +/* Framework for MDIO devices, other than PHYs. + * + * Copyright (c) 2016 Andrew Lunn + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void mdio_device_free(struct mdio_device *mdiodev) +{ + put_device(&mdiodev->dev); +} +EXPORT_SYMBOL(mdio_device_free); + +static void mdio_device_release(struct device *dev) +{ + kfree(to_mdio_device(dev)); +} + +struct mdio_device *mdio_device_create(struct mii_bus *bus, int addr) +{ + struct mdio_device *mdiodev; + + /* We allocate the device, and initialize the default values */ + mdiodev = kzalloc(sizeof(*mdiodev), GFP_KERNEL); + if (!mdiodev) + return ERR_PTR(-ENOMEM); + + mdiodev->dev.release = mdio_device_release; + mdiodev->dev.parent = &bus->dev; + mdiodev->dev.bus = &mdio_bus_type; + mdiodev->bus = bus; + mdiodev->addr = addr; + + dev_set_name(&mdiodev->dev, PHY_ID_FMT, bus->id, addr); + + device_initialize(&mdiodev->dev); + + return mdiodev; +} +EXPORT_SYMBOL(mdio_device_create); + +/** + * mdio_device_register - Register the mdio device on the MDIO bus + * @mdiodev: mdio_device structure to be added to the MDIO bus + */ +int mdio_device_register(struct mdio_device *mdiodev) +{ + int err; + + dev_info(&mdiodev->dev, "mdio_device_register\n"); + + err = mdiobus_register_device(mdiodev); + if (err) + return err; + + err = device_add(&mdiodev->dev); + if (err) { + pr_err("MDIO %d failed to add\n", mdiodev->addr); + goto out; + } + + return 0; + + out: + mdiobus_unregister_device(mdiodev); + return err; +} +EXPORT_SYMBOL(mdio_device_register); + +/** + * mdio_device_remove - Remove a previously registered mdio device from the + * MDIO bus + * @mdiodev: mdio_device structure to remove + * + * This doesn't free the mdio_device itself, it merely reverses the effects + * of mdio_device_register(). Use mdio_device_free() to free the device + * after calling this function. + */ +void mdio_device_remove(struct mdio_device *mdiodev) +{ + device_del(&mdiodev->dev); + mdiobus_unregister_device(mdiodev); +} +EXPORT_SYMBOL(mdio_device_remove); + +/** + * mdio_probe - probe an MDIO device + * @dev: device to probe + * + * Description: Take care of setting up the mdio_device structure + * and calling the driver to probe the device. + */ +static int mdio_probe(struct device *dev) +{ + struct mdio_device *mdiodev = to_mdio_device(dev); + struct device_driver *drv = mdiodev->dev.driver; + struct mdio_driver *mdiodrv = to_mdio_driver(drv); + int err = 0; + + if (mdiodrv->probe) + err = mdiodrv->probe(mdiodev); + + return err; +} + +static int mdio_remove(struct device *dev) +{ + struct mdio_device *mdiodev = to_mdio_device(dev); + struct device_driver *drv = mdiodev->dev.driver; + struct mdio_driver *mdiodrv = to_mdio_driver(drv); + + if (mdiodrv->remove) + mdiodrv->remove(mdiodev); + + return 0; +} + +/** + * mdio_driver_register - register an mdio_driver with the MDIO layer + * @new_driver: new mdio_driver to register + */ +int mdio_driver_register(struct mdio_driver *drv) +{ + struct mdio_driver_common *mdiodrv = &drv->mdiodrv; + int retval; + + pr_info("mdio_driver_register: %s\n", mdiodrv->driver.name); + + mdiodrv->driver.bus = &mdio_bus_type; + mdiodrv->driver.probe = mdio_probe; + mdiodrv->driver.remove = mdio_remove; + + retval = driver_register(&mdiodrv->driver); + if (retval) { + pr_err("%s: Error %d in registering driver\n", + mdiodrv->driver.name, retval); + + return retval; + } + + return 0; +} +EXPORT_SYMBOL(mdio_driver_register); + +void mdio_driver_unregister(struct mdio_driver *drv) +{ + struct mdio_driver_common *mdiodrv = &drv->mdiodrv; + + driver_unregister(&mdiodrv->driver); +} +EXPORT_SYMBOL(mdio_driver_unregister); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 78628428ee28..af6cb6556cf9 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -264,6 +264,9 @@ static int phy_bus_match(struct device *dev, struct device_driver *drv) const int num_ids = ARRAY_SIZE(phydev->c45_ids.device_ids); int i; + if (!(phydrv->mdiodrv.flags & MDIO_DEVICE_IS_PHY)) + return 0; + if (phydrv->match_phy_device) return phydrv->match_phy_device(phydev); @@ -851,9 +854,11 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, */ if (!d->driver) { if (phydev->is_c45) - d->driver = &genphy_driver[GENPHY_DRV_10G].driver; + d->driver = + &genphy_driver[GENPHY_DRV_10G].mdiodrv.driver; else - d->driver = &genphy_driver[GENPHY_DRV_1G].driver; + d->driver = + &genphy_driver[GENPHY_DRV_1G].mdiodrv.driver; err = d->driver->probe(d); if (err >= 0) @@ -954,7 +959,8 @@ void phy_detach(struct phy_device *phydev) * real driver could be loaded */ for (i = 0; i < ARRAY_SIZE(genphy_driver); i++) { - if (phydev->mdio.dev.driver == &genphy_driver[i].driver) { + if (phydev->mdio.dev.driver == + &genphy_driver[i].mdiodrv.driver) { device_release_driver(&phydev->mdio.dev); break; } @@ -1598,13 +1604,14 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner) { int retval; - new_driver->driver.name = new_driver->name; - new_driver->driver.bus = &mdio_bus_type; - new_driver->driver.probe = phy_probe; - new_driver->driver.remove = phy_remove; - new_driver->driver.owner = owner; + new_driver->mdiodrv.flags |= MDIO_DEVICE_IS_PHY; + new_driver->mdiodrv.driver.name = new_driver->name; + new_driver->mdiodrv.driver.bus = &mdio_bus_type; + new_driver->mdiodrv.driver.probe = phy_probe; + new_driver->mdiodrv.driver.remove = phy_remove; + new_driver->mdiodrv.driver.owner = owner; - retval = driver_register(&new_driver->driver); + retval = driver_register(&new_driver->mdiodrv.driver); if (retval) { pr_err("%s: Error %d in registering driver\n", new_driver->name, retval); @@ -1637,7 +1644,7 @@ EXPORT_SYMBOL(phy_drivers_register); void phy_driver_unregister(struct phy_driver *drv) { - driver_unregister(&drv->driver); + driver_unregister(&drv->mdiodrv.driver); } EXPORT_SYMBOL(phy_driver_unregister); diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 6febe2df76f9..c0a8f84d92db 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -92,6 +92,37 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio, struct device_node *chi return 0; } +static int of_mdiobus_register_device(struct mii_bus *mdio, + struct device_node *child, + u32 addr) +{ + struct mdio_device *mdiodev; + int rc; + + mdiodev = mdio_device_create(mdio, addr); + if (!mdiodev || IS_ERR(mdiodev)) + return 1; + + /* Associate the OF node with the device structure so it + * can be looked up later. + */ + of_node_get(child); + mdiodev->dev.of_node = child; + + /* All data is now stored in the mdiodev struct; register it. */ + rc = mdio_device_register(mdiodev); + if (rc) { + mdio_device_free(mdiodev); + of_node_put(child); + return 1; + } + + dev_dbg(&mdio->dev, "registered mdio device %s at address %i\n", + child->name, addr); + + return 0; +} + int of_mdio_parse_addr(struct device *dev, const struct device_node *np) { u32 addr; @@ -179,6 +210,8 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) if (of_mdiobus_child_is_phy(child)) of_mdiobus_register_phy(mdio, child, addr); + else + of_mdiobus_register_device(mdio, child, addr); } if (!scanphys) diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 0690359e55a5..75f7fad0af4f 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -15,6 +15,7 @@ struct mii_bus; struct mdio_device { struct device dev; + const struct dev_pm_ops *pm_ops; struct mii_bus *bus; int (*bus_match)(struct device *dev, struct device_driver *drv); @@ -24,7 +25,37 @@ struct mdio_device { }; #define to_mdio_device(d) container_of(d, struct mdio_device, dev) +/* struct mdio_driver_common: Common to all MDIO drivers */ +struct mdio_driver_common { + struct device_driver driver; + int flags; +}; #define MDIO_DEVICE_FLAG_PHY 1 +#define to_mdio_common_driver(d) \ + container_of(d, struct mdio_driver_common, driver) + +/* struct mdio_driver: Generic MDIO driver */ +struct mdio_driver { + struct mdio_driver_common mdiodrv; + + /* + * Called during discovery. Used to set + * up device-specific structures, if any + */ + int (*probe)(struct mdio_device *mdiodev); + + /* Clears up any memory if needed */ + void (*remove)(struct mdio_device *mdiodev); +}; +#define to_mdio_driver(d) \ + container_of(to_mdio_common_driver(d), struct mdio_driver, mdiodrv) + +void mdio_device_free(struct mdio_device *mdiodev); +struct mdio_device *mdio_device_create(struct mii_bus *bus, int addr); +int mdio_device_register(struct mdio_device *mdiodev); +void mdio_device_remove(struct mdio_device *mdiodev); +int mdio_driver_register(struct mdio_driver *drv); +void mdio_driver_unregister(struct mdio_driver *drv); static inline bool mdio_phy_id_is_c45(int phy_id) { @@ -197,4 +228,23 @@ int mdiobus_unregister_device(struct mdio_device *mdiodev); bool mdiobus_is_registered_device(struct mii_bus *bus, int addr); struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr); +/** + * module_mdio_driver() - Helper macro for registering mdio drivers + * + * Helper macro for MDIO drivers which do not do anything special in module + * init/exit. Each module may only use this macro once, and calling it + * replaces module_init() and module_exit(). + */ +#define mdio_module_driver(_mdio_driver) \ +static int __init mdio_module_init(void) \ +{ \ + return mdio_driver_register(&_mdio_driver); \ +} \ +module_init(mdio_module_init); \ +static void __exit mdio_module_exit(void) \ +{ \ + mdio_driver_unregister(&_mdio_driver); \ +} \ +module_exit(mdio_module_exit) + #endif /* __LINUX_MDIO_H__ */ diff --git a/include/linux/phy.h b/include/linux/phy.h index 49e4418822b3..d6f3641e7933 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -60,6 +60,7 @@ #define PHY_HAS_INTERRUPT 0x00000001 #define PHY_HAS_MAGICANEG 0x00000002 #define PHY_IS_INTERNAL 0x00000004 +#define MDIO_DEVICE_IS_PHY 0x80000000 /* Interface Mode definitions */ typedef enum { @@ -432,6 +433,7 @@ struct phy_device { /* struct phy_driver: Driver structure for a particular PHY type * + * driver_data: static driver data * phy_id: The result of reading the UID registers of this PHY * type, and ANDing them with the phy_id_mask. This driver * only works for PHYs with IDs which match this field @@ -441,7 +443,6 @@ struct phy_device { * by this PHY * flags: A bitfield defining certain other features this PHY * supports (like interrupts) - * driver_data: static driver data * * The drivers must implement config_aneg and read_status. All * other functions are optional. Note that none of these @@ -452,6 +453,7 @@ struct phy_device { * supported in the driver). */ struct phy_driver { + struct mdio_driver_common mdiodrv; u32 phy_id; char *name; unsigned int phy_id_mask; @@ -587,10 +589,9 @@ struct phy_driver { void (*get_strings)(struct phy_device *dev, u8 *data); void (*get_stats)(struct phy_device *dev, struct ethtool_stats *stats, u64 *data); - - struct device_driver driver; }; -#define to_phy_driver(d) container_of(d, struct phy_driver, driver) +#define to_phy_driver(d) container_of(to_mdio_common_driver(d), \ + struct phy_driver, mdiodrv) #define PHY_ANY_ID "MATCH ANY PHY" #define PHY_ANY_UID 0xffffffff -- cgit v1.2.3-71-gd317 From 711fdba37a3dd7ee487e28767f9f0e67144cbf80 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:27 +0100 Subject: mdio: Abstract device_remove() and device_free() Make device_free and device_remove operations in the mdio device structure, so the core code does not need to differentiate between phy devices and generic mdio devices. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 23 +++++++++-------------- drivers/net/phy/mdio_device.c | 2 ++ drivers/net/phy/phy_device.c | 18 ++++++++++++++++++ include/linux/mdio.h | 4 ++++ 4 files changed, 33 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 0573cfd2116f..0be7b3d65f0f 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -299,6 +299,7 @@ static inline void of_mdiobus_link_mdiodev(struct mii_bus *mdio, */ int __mdiobus_register(struct mii_bus *bus, struct module *owner) { + struct mdio_device *mdiodev; int i, err; if (NULL == bus || NULL == bus->name || @@ -344,11 +345,12 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) error: while (--i >= 0) { - struct phy_device *phydev = mdiobus_get_phy(bus, i); - if (phydev) { - phy_device_remove(phydev); - phy_device_free(phydev); - } + mdiodev = bus->mdio_map[i]; + if (!mdiodev) + continue; + + mdiodev->device_remove(mdiodev); + mdiodev->device_free(mdiodev); } device_del(&bus->dev); return err; @@ -358,7 +360,6 @@ EXPORT_SYMBOL(__mdiobus_register); void mdiobus_unregister(struct mii_bus *bus) { struct mdio_device *mdiodev; - struct phy_device *phydev; int i; BUG_ON(bus->state != MDIOBUS_REGISTERED); @@ -369,14 +370,8 @@ void mdiobus_unregister(struct mii_bus *bus) if (!mdiodev) continue; - if (!(mdiodev->flags & MDIO_DEVICE_FLAG_PHY)) { - phydev = container_of(mdiodev, struct phy_device, mdio); - phy_device_remove(phydev); - phy_device_free(phydev); - } else { - mdio_device_remove(mdiodev); - mdio_device_free(mdiodev); - } + mdiodev->device_remove(mdiodev); + mdiodev->device_free(mdiodev); } device_del(&bus->dev); } diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c index 64e3777c85b4..9c88e6749b9a 100644 --- a/drivers/net/phy/mdio_device.c +++ b/drivers/net/phy/mdio_device.c @@ -46,6 +46,8 @@ struct mdio_device *mdio_device_create(struct mii_bus *bus, int addr) mdiodev->dev.release = mdio_device_release; mdiodev->dev.parent = &bus->dev; mdiodev->dev.bus = &mdio_bus_type; + mdiodev->device_free = mdio_device_free; + mdiodev->device_remove = mdio_device_remove; mdiodev->bus = bus; mdiodev->addr = addr; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index af6cb6556cf9..319300627c0b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -47,11 +47,27 @@ void phy_device_free(struct phy_device *phydev) } EXPORT_SYMBOL(phy_device_free); +static void phy_mdio_device_free(struct mdio_device *mdiodev) +{ + struct phy_device *phydev; + + phydev = container_of(mdiodev, struct phy_device, mdio); + phy_device_free(phydev); +} + static void phy_device_release(struct device *dev) { kfree(to_phy_device(dev)); } +static void phy_mdio_device_remove(struct mdio_device *mdiodev) +{ + struct phy_device *phydev; + + phydev = container_of(mdiodev, struct phy_device, mdio); + phy_device_remove(phydev); +} + enum genphy_driver { GENPHY_DRV_1G, GENPHY_DRV_10G, @@ -308,6 +324,8 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, mdiodev->bus_match = phy_bus_match; mdiodev->addr = addr; mdiodev->flags = MDIO_DEVICE_FLAG_PHY; + mdiodev->device_free = phy_mdio_device_free; + mdiodev->device_remove = phy_mdio_device_remove; dev->speed = 0; dev->duplex = -1; diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 75f7fad0af4f..5bfd99d1a40a 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -18,7 +18,11 @@ struct mdio_device { const struct dev_pm_ops *pm_ops; struct mii_bus *bus; + int (*bus_match)(struct device *dev, struct device_driver *drv); + void (*device_free)(struct mdio_device *mdiodev); + void (*device_remove)(struct mdio_device *mdiodev); + /* Bus address of the MDIO device (0-31) */ int addr; int flags; -- cgit v1.2.3-71-gd317 From 01dd194c387af5b3c4c1f6459d30f596565e466c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 6 Jan 2016 22:32:16 +0100 Subject: bpf: cleanup bpf_prog_run_{save,clear}_cb helpers Move the details behind the cb[] access into a small helper to decouple and make them generic for bpf_prog_run_save_cb()/bpf_prog_run_clear_cb() that was introduced via commit ff936a04e5f2 ("bpf: fix cb access in socket filter programs"). Also add a comment to better clarify what is done in bpf_skb_cb(). Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index f5b5891ed1ba..43aa1f8855c7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -350,25 +350,43 @@ struct sk_filter { #define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) +#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN + +static inline u8 *bpf_skb_cb(struct sk_buff *skb) +{ + /* eBPF programs may read/write skb->cb[] area to transfer meta + * data between tail calls. Since this also needs to work with + * tc, that scratch memory is mapped to qdisc_skb_cb's data area. + * + * In some socket filter cases, the cb unfortunately needs to be + * saved/restored so that protocol specific skb->cb[] data won't + * be lost. In any case, due to unpriviledged eBPF programs + * attached to sockets, we need to clear the bpf_skb_cb() area + * to not leak previous contents to user space. + */ + BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != BPF_SKB_CB_LEN); + BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != + FIELD_SIZEOF(struct qdisc_skb_cb, data)); + + return qdisc_skb_cb(skb)->data; +} + static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, struct sk_buff *skb) { - u8 *cb_data = qdisc_skb_cb(skb)->data; - u8 saved_cb[QDISC_CB_PRIV_LEN]; + u8 *cb_data = bpf_skb_cb(skb); + u8 cb_saved[BPF_SKB_CB_LEN]; u32 res; - BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != - QDISC_CB_PRIV_LEN); - if (unlikely(prog->cb_access)) { - memcpy(saved_cb, cb_data, sizeof(saved_cb)); - memset(cb_data, 0, sizeof(saved_cb)); + memcpy(cb_saved, cb_data, sizeof(cb_saved)); + memset(cb_data, 0, sizeof(cb_saved)); } res = BPF_PROG_RUN(prog, skb); if (unlikely(prog->cb_access)) - memcpy(cb_data, saved_cb, sizeof(saved_cb)); + memcpy(cb_data, cb_saved, sizeof(cb_saved)); return res; } @@ -376,10 +394,11 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, struct sk_buff *skb) { - u8 *cb_data = qdisc_skb_cb(skb)->data; + u8 *cb_data = bpf_skb_cb(skb); if (unlikely(prog->cb_access)) - memset(cb_data, 0, QDISC_CB_PRIV_LEN); + memset(cb_data, 0, BPF_SKB_CB_LEN); + return BPF_PROG_RUN(prog, skb); } -- cgit v1.2.3-71-gd317 From f8ffad69c9f8b8dfb0b633425d4ef4d2493ba61a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 7 Jan 2016 15:50:23 +0100 Subject: bpf: add skb_postpush_rcsum and fix dev_forward_skb occasions Add a small helper skb_postpush_rcsum() and fix up redirect locations that need CHECKSUM_COMPLETE fixups on ingress. dev_forward_skb() expects a proper csum that covers also Ethernet header, f.e. since 2c26d34bbcc0 ("net/core: Handle csum for CHECKSUM_COMPLETE VXLAN forwarding"), we also do skb_postpull_rcsum() after pulling Ethernet header off via eth_type_trans(). When using eBPF in a netns setup f.e. with vxlan in collect metadata mode, I can trigger the following csum issue with an IPv6 setup: [ 505.144065] dummy1: hw csum failure [...] [ 505.144108] Call Trace: [ 505.144112] [] dump_stack+0x44/0x5c [ 505.144134] [] netdev_rx_csum_fault+0x3a/0x40 [ 505.144142] [] __skb_checksum_complete+0xcf/0xe0 [ 505.144149] [] nf_ip6_checksum+0xb2/0x120 [ 505.144161] [] icmpv6_error+0x17e/0x328 [nf_conntrack_ipv6] [ 505.144170] [] ? ip6t_do_table+0x2fa/0x645 [ip6_tables] [ 505.144177] [] ? ipv6_get_l4proto+0x65/0xd0 [nf_conntrack_ipv6] [ 505.144189] [] nf_conntrack_in+0xc2/0x5a0 [nf_conntrack] [ 505.144196] [] ipv6_conntrack_in+0x1c/0x20 [nf_conntrack_ipv6] [ 505.144204] [] nf_iterate+0x5d/0x70 [ 505.144210] [] nf_hook_slow+0x66/0xc0 [ 505.144218] [] ipv6_rcv+0x3f2/0x4f0 [ 505.144225] [] ? ip6_make_skb+0x1b0/0x1b0 [ 505.144232] [] __netif_receive_skb_core+0x36b/0x9a0 [ 505.144239] [] ? __netif_receive_skb+0x18/0x60 [ 505.144245] [] __netif_receive_skb+0x18/0x60 [ 505.144252] [] process_backlog+0x9f/0x140 [ 505.144259] [] net_rx_action+0x145/0x320 [...] What happens is that on ingress, we push Ethernet header back in, either from cls_bpf or right before skb_do_redirect(), but without updating csum. The "hw csum failure" can be fixed by using the new skb_postpush_rcsum() helper for the dev_forward_skb() case to correct the csum diff again. Thanks to Hannes Frederic Sowa for the csum_partial() idea! Fixes: 3896d655f4d4 ("bpf: introduce bpf_clone_redirect() helper") Fixes: 27b29f63058d ("bpf: add bpf_redirect() helper") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/skbuff.h | 17 +++++++++++++++++ net/core/filter.c | 17 +++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6b6bd42d6134..07f9ccd28654 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2805,6 +2805,23 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb, unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); +static inline void skb_postpush_rcsum(struct sk_buff *skb, + const void *start, unsigned int len) +{ + /* For performing the reverse operation to skb_postpull_rcsum(), + * we can instead of ... + * + * skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); + * + * ... just use this equivalent version here to save a few + * instructions. Feeding csum of 0 in csum_partial() and later + * on adding skb->csum is equivalent to feed skb->csum in the + * first place. + */ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_partial(start, len, skb->csum); +} + /** * pskb_trim_rcsum - trim received skb and update checksum * @skb: buffer to trim diff --git a/net/core/filter.c b/net/core/filter.c index 35e6fed28709..0db92b5e2cbf 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1368,8 +1368,9 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) /* skb_store_bits cannot return -EFAULT here */ skb_store_bits(skb, offset, ptr, len); - if (BPF_RECOMPUTE_CSUM(flags) && skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0)); + if (BPF_RECOMPUTE_CSUM(flags)) + skb_postpush_rcsum(skb, ptr, len); + return 0; } @@ -1525,8 +1526,12 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) if (unlikely(!skb2)) return -ENOMEM; - if (BPF_IS_REDIRECT_INGRESS(flags)) + if (BPF_IS_REDIRECT_INGRESS(flags)) { + if (skb_at_tc_ingress(skb2)) + skb_postpush_rcsum(skb2, skb_mac_header(skb2), + skb2->mac_len); return dev_forward_skb(dev, skb2); + } skb2->dev = dev; skb_sender_cpu_clear(skb2); @@ -1569,8 +1574,12 @@ int skb_do_redirect(struct sk_buff *skb) return -EINVAL; } - if (BPF_IS_REDIRECT_INGRESS(ri->flags)) + if (BPF_IS_REDIRECT_INGRESS(ri->flags)) { + if (skb_at_tc_ingress(skb)) + skb_postpush_rcsum(skb, skb_mac_header(skb), + skb->mac_len); return dev_forward_skb(dev, skb); + } skb->dev = dev; skb_sender_cpu_clear(skb); -- cgit v1.2.3-71-gd317 From 1f211a1b929c804100e138c5d3d656992cfd5622 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 7 Jan 2016 22:29:47 +0100 Subject: net, sched: add clsact qdisc This work adds a generalization of the ingress qdisc as a qdisc holding only classifiers. The clsact qdisc works on ingress, but also on egress. In both cases, it's execution happens without taking the qdisc lock, and the main difference for the egress part compared to prior version of [1] is that this can be applied with _any_ underlying real egress qdisc (also classless ones). Besides solving the use-case of [1], that is, allowing for more programmability on assigning skb->priority for the mqprio case that is supported by most popular 10G+ NICs, it also opens up a lot more flexibility for other tc applications. The main work on classification can already be done at clsact egress time if the use-case allows and state stored for later retrieval f.e. again in skb->priority with major/minors (which is checked by most classful qdiscs before consulting tc_classify()) and/or in other skb fields like skb->tc_index for some light-weight post-processing to get to the eventual classid in case of a classful qdisc. Another use case is that the clsact egress part allows to have a central egress counterpart to the ingress classifiers, so that classifiers can easily share state (e.g. in cls_bpf via eBPF maps) for ingress and egress. Currently, default setups like mq + pfifo_fast would require for this to use, for example, prio qdisc instead (to get a tc_classify() run) and to duplicate the egress classifier for each queue. With clsact, it allows for leaving the setup as is, it can additionally assign skb->priority to put the skb in one of pfifo_fast's bands and it can share state with maps. Moreover, we can access the skb's dst entry (f.e. to retrieve tclassid) w/o the need to perform a skb_dst_force() to hold on to it any longer. In lwt case, we can also use this facility to setup dst metadata via cls_bpf (bpf_skb_set_tunnel_key()) without needing a real egress qdisc just for that (case of IFF_NO_QUEUE devices, for example). The realization can be done without any changes to the scheduler core framework. All it takes is that we have two a-priori defined minors/child classes, where we can mux between ingress and egress classifier list (dev->ingress_cl_list and dev->egress_cl_list, latter stored close to dev->_tx to avoid extra cacheline miss for moderate loads). The egress part is a bit similar modelled to handle_ing() and patched to a noop in case the functionality is not used. Both handlers are now called sch_handle_ingress() and sch_handle_egress(), code sharing among the two doesn't seem practical as there are various minor differences in both paths, so that making them conditional in a single handler would rather slow things down. Full compatibility to ingress qdisc is provided as well. Since both piggyback on TC_H_CLSACT, only one of them (ingress/clsact) can exist per netdevice, and thus ingress qdisc specific behaviour can be retained for user space. This means, either a user does 'tc qdisc add dev foo ingress' and configures ingress qdisc as usual, or the 'tc qdisc add dev foo clsact' alternative, where both, ingress and egress classifier can be configured as in the below example. ingress qdisc supports attaching classifier to any minor number whereas clsact has two fixed minors for muxing between the lists, therefore to not break user space setups, they are better done as two separate qdiscs. I decided to extend the sch_ingress module with clsact functionality so that commonly used code can be reused, the module is being aliased with sch_clsact so that it can be auto-loaded properly. Alternative would have been to add a flag when initializing ingress to alter its behaviour plus aliasing to a different name (as it's more than just ingress). However, the first would end up, based on the flag, choosing the new/old behaviour by calling different function implementations to handle each anyway, the latter would require to register ingress qdisc once again under different alias. So, this really begs to provide a minimal, cleaner approach to have Qdisc_ops and Qdisc_class_ops by its own that share callbacks used by both. Example, adding qdisc: # tc qdisc add dev foo clsact # tc qdisc show dev foo qdisc mq 0: root qdisc pfifo_fast 0: parent :1 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc pfifo_fast 0: parent :2 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc pfifo_fast 0: parent :3 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc pfifo_fast 0: parent :4 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc clsact ffff: parent ffff:fff1 Adding filters (deleting, etc works analogous by specifying ingress/egress): # tc filter add dev foo ingress bpf da obj bar.o sec ingress # tc filter add dev foo egress bpf da obj bar.o sec egress # tc filter show dev foo ingress filter protocol all pref 49152 bpf filter protocol all pref 49152 bpf handle 0x1 bar.o:[ingress] direct-action # tc filter show dev foo egress filter protocol all pref 49152 bpf filter protocol all pref 49152 bpf handle 0x1 bar.o:[egress] direct-action A 'tc filter show dev foo' or 'tc filter show dev foo parent ffff:' will show an empty list for clsact. Either using the parent names (ingress/egress) or specifying the full major/minor will then show the related filter lists. Prior work on a mqprio prequeue() facility [1] was done mainly by John Fastabend. [1] http://patchwork.ozlabs.org/patch/512949/ Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +- include/linux/rtnetlink.h | 5 +++ include/uapi/linux/pkt_sched.h | 4 ++ net/Kconfig | 3 ++ net/core/dev.c | 82 +++++++++++++++++++++++++++++++++++---- net/sched/Kconfig | 14 +++++-- net/sched/cls_bpf.c | 2 +- net/sched/sch_ingress.c | 88 +++++++++++++++++++++++++++++++++++++++++- 8 files changed, 186 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8d8e5ca951b4..2285596e7045 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1739,7 +1739,9 @@ struct net_device { #ifdef CONFIG_XPS struct xps_dev_maps __rcu *xps_maps; #endif - +#ifdef CONFIG_NET_CLS_ACT + struct tcf_proto __rcu *egress_cl_list; +#endif #ifdef CONFIG_NET_SWITCHDEV u32 offload_fwd_mark; #endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 4be5048b1fbe..c006cc900c44 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -84,6 +84,11 @@ void net_inc_ingress_queue(void); void net_dec_ingress_queue(void); #endif +#ifdef CONFIG_NET_EGRESS +void net_inc_egress_queue(void); +void net_dec_egress_queue(void); +#endif + extern void rtnetlink_init(void); extern void __rtnl_unlock(void); diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 8d2530daca9f..8cb18b44968e 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -72,6 +72,10 @@ struct tc_estimator { #define TC_H_UNSPEC (0U) #define TC_H_ROOT (0xFFFFFFFFU) #define TC_H_INGRESS (0xFFFFFFF1U) +#define TC_H_CLSACT TC_H_INGRESS + +#define TC_H_MIN_INGRESS 0xFFF2U +#define TC_H_MIN_EGRESS 0xFFF3U /* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */ enum tc_link_layer { diff --git a/net/Kconfig b/net/Kconfig index 11f8c22af34d..174354618f8a 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -48,6 +48,9 @@ config COMPAT_NETLINK_MESSAGES config NET_INGRESS bool +config NET_EGRESS + bool + menu "Networking options" source "net/packet/Kconfig" diff --git a/net/core/dev.c b/net/core/dev.c index 914b4a24c654..0ca95d5d7af0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1676,6 +1676,22 @@ void net_dec_ingress_queue(void) EXPORT_SYMBOL_GPL(net_dec_ingress_queue); #endif +#ifdef CONFIG_NET_EGRESS +static struct static_key egress_needed __read_mostly; + +void net_inc_egress_queue(void) +{ + static_key_slow_inc(&egress_needed); +} +EXPORT_SYMBOL_GPL(net_inc_egress_queue); + +void net_dec_egress_queue(void) +{ + static_key_slow_dec(&egress_needed); +} +EXPORT_SYMBOL_GPL(net_dec_egress_queue); +#endif + static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL /* We are not allowed to call static_key_slow_dec() from irq context @@ -3007,7 +3023,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, bool contended; int rc; - qdisc_pkt_len_init(skb); qdisc_calculate_pkt_len(skb, q); /* * Heuristic to force contended enqueues to serialize on a @@ -3100,6 +3115,49 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(dev_loopback_xmit); +#ifdef CONFIG_NET_EGRESS +static struct sk_buff * +sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) +{ + struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list); + struct tcf_result cl_res; + + if (!cl) + return skb; + + /* skb->tc_verd and qdisc_skb_cb(skb)->pkt_len were already set + * earlier by the caller. + */ + qdisc_bstats_cpu_update(cl->q, skb); + + switch (tc_classify(skb, cl, &cl_res, false)) { + case TC_ACT_OK: + case TC_ACT_RECLASSIFY: + skb->tc_index = TC_H_MIN(cl_res.classid); + break; + case TC_ACT_SHOT: + qdisc_qstats_cpu_drop(cl->q); + *ret = NET_XMIT_DROP; + goto drop; + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + *ret = NET_XMIT_SUCCESS; +drop: + kfree_skb(skb); + return NULL; + case TC_ACT_REDIRECT: + /* No need to push/pop skb's mac_header here on egress! */ + skb_do_redirect(skb); + *ret = NET_XMIT_SUCCESS; + return NULL; + default: + break; + } + + return skb; +} +#endif /* CONFIG_NET_EGRESS */ + static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_XPS @@ -3226,6 +3284,17 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) skb_update_prio(skb); + qdisc_pkt_len_init(skb); +#ifdef CONFIG_NET_CLS_ACT + skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); +# ifdef CONFIG_NET_EGRESS + if (static_key_false(&egress_needed)) { + skb = sch_handle_egress(skb, &rc, dev); + if (!skb) + goto out; + } +# endif +#endif /* If device/qdisc don't need skb->dst, release it right now while * its hot in this cpu cache. */ @@ -3247,9 +3316,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); -#endif trace_net_dev_queue(skb); if (q->enqueue) { rc = __dev_xmit_skb(skb, q, dev, txq); @@ -3806,9 +3872,9 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev, EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); #endif -static inline struct sk_buff *handle_ing(struct sk_buff *skb, - struct packet_type **pt_prev, - int *ret, struct net_device *orig_dev) +static inline struct sk_buff * +sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, + struct net_device *orig_dev) { #ifdef CONFIG_NET_CLS_ACT struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list); @@ -4002,7 +4068,7 @@ another_round: skip_taps: #ifdef CONFIG_NET_INGRESS if (static_key_false(&ingress_needed)) { - skb = handle_ing(skb, &pt_prev, &ret, orig_dev); + skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev); if (!skb) goto out; diff --git a/net/sched/Kconfig b/net/sched/Kconfig index daa33432b716..82830824fb1f 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -310,15 +310,21 @@ config NET_SCH_PIE If unsure, say N. config NET_SCH_INGRESS - tristate "Ingress Qdisc" + tristate "Ingress/classifier-action Qdisc" depends on NET_CLS_ACT select NET_INGRESS + select NET_EGRESS ---help--- - Say Y here if you want to use classifiers for incoming packets. + Say Y here if you want to use classifiers for incoming and/or outgoing + packets. This qdisc doesn't do anything else besides running classifiers, + which can also have actions attached to them. In case of outgoing packets, + classifiers that this qdisc holds are executed in the transmit path + before real enqueuing to an egress qdisc happens. + If unsure, say Y. - To compile this code as a module, choose M here: the - module will be called sch_ingress. + To compile this code as a module, choose M here: the module will be + called sch_ingress with alias of sch_clsact. config NET_SCH_PLUG tristate "Plug network traffic until release (PLUG)" diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index b3c8bb4aeef5..8dc84300ee79 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -291,7 +291,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, prog->bpf_name = name; prog->filter = fp; - if (fp->dst_needed) + if (fp->dst_needed && !(tp->q->flags & TCQ_F_INGRESS)) netif_keep_dst(qdisc_dev(tp->q)); return 0; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index e7c648fa9dc3..10adbc617905 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -1,4 +1,5 @@ -/* net/sched/sch_ingress.c - Ingress qdisc +/* net/sched/sch_ingress.c - Ingress and clsact qdisc + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -98,17 +99,100 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; +static unsigned long clsact_get(struct Qdisc *sch, u32 classid) +{ + switch (TC_H_MIN(classid)) { + case TC_H_MIN(TC_H_MIN_INGRESS): + case TC_H_MIN(TC_H_MIN_EGRESS): + return TC_H_MIN(classid); + default: + return 0; + } +} + +static unsigned long clsact_bind_filter(struct Qdisc *sch, + unsigned long parent, u32 classid) +{ + return clsact_get(sch, classid); +} + +static struct tcf_proto __rcu **clsact_find_tcf(struct Qdisc *sch, + unsigned long cl) +{ + struct net_device *dev = qdisc_dev(sch); + + switch (cl) { + case TC_H_MIN(TC_H_MIN_INGRESS): + return &dev->ingress_cl_list; + case TC_H_MIN(TC_H_MIN_EGRESS): + return &dev->egress_cl_list; + default: + return NULL; + } +} + +static int clsact_init(struct Qdisc *sch, struct nlattr *opt) +{ + net_inc_ingress_queue(); + net_inc_egress_queue(); + + sch->flags |= TCQ_F_CPUSTATS; + + return 0; +} + +static void clsact_destroy(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + + tcf_destroy_chain(&dev->ingress_cl_list); + tcf_destroy_chain(&dev->egress_cl_list); + + net_dec_ingress_queue(); + net_dec_egress_queue(); +} + +static const struct Qdisc_class_ops clsact_class_ops = { + .leaf = ingress_leaf, + .get = clsact_get, + .put = ingress_put, + .walk = ingress_walk, + .tcf_chain = clsact_find_tcf, + .bind_tcf = clsact_bind_filter, + .unbind_tcf = ingress_put, +}; + +static struct Qdisc_ops clsact_qdisc_ops __read_mostly = { + .cl_ops = &clsact_class_ops, + .id = "clsact", + .init = clsact_init, + .destroy = clsact_destroy, + .dump = ingress_dump, + .owner = THIS_MODULE, +}; + static int __init ingress_module_init(void) { - return register_qdisc(&ingress_qdisc_ops); + int ret; + + ret = register_qdisc(&ingress_qdisc_ops); + if (!ret) { + ret = register_qdisc(&clsact_qdisc_ops); + if (ret) + unregister_qdisc(&ingress_qdisc_ops); + } + + return ret; } static void __exit ingress_module_exit(void) { unregister_qdisc(&ingress_qdisc_ops); + unregister_qdisc(&clsact_qdisc_ops); } module_init(ingress_module_init); module_exit(ingress_module_exit); +MODULE_ALIAS("sch_clsact"); MODULE_LICENSE("GPL"); -- cgit v1.2.3-71-gd317 From 712f4aad406bb1ed67f3f98d04c044191f0ff593 Mon Sep 17 00:00:00 2001 From: willy tarreau Date: Sun, 10 Jan 2016 07:54:56 +0100 Subject: unix: properly account for FDs passed over unix sockets It is possible for a process to allocate and accumulate far more FDs than the process' limit by sending them over a unix socket then closing them to keep the process' fd count low. This change addresses this problem by keeping track of the number of FDs in flight per user and preventing non-privileged processes from having more FDs in flight than their configured FD limit. Reported-by: socketpair@gmail.com Reported-by: Tetsuo Handa Mitigates: CVE-2013-4312 (Linux 2.0+) Suggested-by: Linus Torvalds Acked-by: Hannes Frederic Sowa Signed-off-by: Willy Tarreau Signed-off-by: David S. Miller --- include/linux/sched.h | 1 + net/unix/af_unix.c | 24 ++++++++++++++++++++---- net/unix/garbage.c | 13 ++++++++----- 3 files changed, 29 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index edad7a43edea..fbf25f19b3b5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -830,6 +830,7 @@ struct user_struct { unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ #endif unsigned long locked_shm; /* How many pages of mlocked shm ? */ + unsigned long unix_inflight; /* How many files in flight in unix sockets */ #ifdef CONFIG_KEYS struct key *uid_keyring; /* UID specific keyring */ diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ef05cd9403d4..e3f85bc8b135 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1513,6 +1513,21 @@ static void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } +/* + * The "user->unix_inflight" variable is protected by the garbage + * collection lock, and we just read it locklessly here. If you go + * over the limit, there might be a tiny race in actually noticing + * it across threads. Tough. + */ +static inline bool too_many_unix_fds(struct task_struct *p) +{ + struct user_struct *user = current_user(); + + if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); + return false; +} + #define MAX_RECURSION_LEVEL 4 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) @@ -1521,6 +1536,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) unsigned char max_level = 0; int unix_sock_count = 0; + if (too_many_unix_fds(current)) + return -ETOOMANYREFS; + for (i = scm->fp->count - 1; i >= 0; i--) { struct sock *sk = unix_get_socket(scm->fp->fp[i]); @@ -1542,10 +1560,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) if (!UNIXCB(skb).fp) return -ENOMEM; - if (unix_sock_count) { - for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); - } + for (i = scm->fp->count - 1; i >= 0; i--) + unix_inflight(scm->fp->fp[i]); return max_level; } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index a73a226f2d33..8fcdc2283af5 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -120,11 +120,11 @@ void unix_inflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); - if (atomic_long_inc_return(&u->inflight) == 1) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); @@ -132,25 +132,28 @@ void unix_inflight(struct file *fp) BUG_ON(list_empty(&u->link)); } unix_tot_inflight++; - spin_unlock(&unix_gc_lock); } + fp->f_cred->user->unix_inflight++; + spin_unlock(&unix_gc_lock); } void unix_notinflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); BUG_ON(list_empty(&u->link)); if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); unix_tot_inflight--; - spin_unlock(&unix_gc_lock); } + fp->f_cred->user->unix_inflight--; + spin_unlock(&unix_gc_lock); } static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), -- cgit v1.2.3-71-gd317 From f0d22d1874730530a2ac304fd0888cb8a6864527 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 11 Jan 2016 10:25:57 +0200 Subject: net/mlx5_core: Introduce flow steering autogrouped flow table When user add rule to autogrouped flow table, we search for flow group with the same match criteria, if we don't find such group then we create new flow group with the required match criteria and insert the rule to this group. We divide the flow table into required_groups + 1, in order to reserve a part of the flow table for rules which don't match any existing group. Signed-off-by: Maor Gottlieb Signed-off-by: Moni Shoua Signed-off-by: Matan Barak Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 166 +++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 5 + include/linux/mlx5/fs.h | 6 + 3 files changed, 158 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index f7d62fe595f6..7d24bbba58ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -85,6 +85,12 @@ static struct init_tree_node { } }; +enum fs_i_mutex_lock_class { + FS_MUTEX_GRANDPARENT, + FS_MUTEX_PARENT, + FS_MUTEX_CHILD +}; + static void del_rule(struct fs_node *node); static void del_flow_table(struct fs_node *node); static void del_flow_group(struct fs_node *node); @@ -119,10 +125,11 @@ static void tree_get_node(struct fs_node *node) atomic_inc(&node->refcount); } -static void nested_lock_ref_node(struct fs_node *node) +static void nested_lock_ref_node(struct fs_node *node, + enum fs_i_mutex_lock_class class) { if (node) { - mutex_lock_nested(&node->lock, SINGLE_DEPTH_NESTING); + mutex_lock_nested(&node->lock, class); atomic_inc(&node->refcount); } } @@ -481,9 +488,7 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, list_add_tail(&ft->node.list, &fs_prio->node.children); fs_prio->num_ft++; unlock_ref_node(&fs_prio->node); - return ft; - free_ft: kfree(ft); unlock_prio: @@ -491,8 +496,32 @@ unlock_prio: return ERR_PTR(err); } -struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, - u32 *fg_in) +struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, + int prio, + int num_flow_table_entries, + int max_num_groups) +{ + struct mlx5_flow_table *ft; + + if (max_num_groups > num_flow_table_entries) + return ERR_PTR(-EINVAL); + + ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries); + if (IS_ERR(ft)) + return ft; + + ft->autogroup.active = true; + ft->autogroup.required_groups = max_num_groups; + + return ft; +} + +/* Flow table should be locked */ +static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table *ft, + u32 *fg_in, + struct list_head + *prev_fg, + bool is_auto_fg) { struct mlx5_flow_group *fg; struct mlx5_core_dev *dev = get_dev(&ft->node); @@ -505,18 +534,33 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, if (IS_ERR(fg)) return fg; - lock_ref_node(&ft->node); err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); if (err) { kfree(fg); - unlock_ref_node(&ft->node); return ERR_PTR(err); } + + if (ft->autogroup.active) + ft->autogroup.num_groups++; /* Add node to tree */ - tree_init_node(&fg->node, 1, del_flow_group); + tree_init_node(&fg->node, !is_auto_fg, del_flow_group); tree_add_node(&fg->node, &ft->node); /* Add node to group list */ list_add(&fg->node.list, ft->node.children.prev); + + return fg; +} + +struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, + u32 *fg_in) +{ + struct mlx5_flow_group *fg; + + if (ft->autogroup.active) + return ERR_PTR(-EPERM); + + lock_ref_node(&ft->node); + fg = create_flow_group_common(ft, fg_in, &ft->node.children, false); unlock_ref_node(&ft->node); return fg; @@ -614,7 +658,63 @@ static struct fs_fte *create_fte(struct mlx5_flow_group *fg, return fte; } -/* Assuming parent fg(flow table) is locked */ +static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct list_head *prev = &ft->node.children; + unsigned int candidate_index = 0; + struct mlx5_flow_group *fg; + void *match_criteria_addr; + unsigned int group_size = 0; + u32 *in; + + if (!ft->autogroup.active) + return ERR_PTR(-ENOENT); + + in = mlx5_vzalloc(inlen); + if (!in) + return ERR_PTR(-ENOMEM); + + if (ft->autogroup.num_groups < ft->autogroup.required_groups) + /* We save place for flow groups in addition to max types */ + group_size = ft->max_fte / (ft->autogroup.required_groups + 1); + + /* ft->max_fte == ft->autogroup.max_types */ + if (group_size == 0) + group_size = 1; + + /* sorted by start_index */ + fs_for_each_fg(fg, ft) { + if (candidate_index + group_size > fg->start_index) + candidate_index = fg->start_index + fg->max_ftes; + else + break; + prev = &fg->node.list; + } + + if (candidate_index + group_size > ft->max_fte) { + fg = ERR_PTR(-ENOSPC); + goto out; + } + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + match_criteria_enable); + MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index); + MLX5_SET(create_flow_group_in, in, end_flow_index, candidate_index + + group_size - 1); + match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in, + in, match_criteria); + memcpy(match_criteria_addr, match_criteria, + MLX5_ST_SZ_BYTES(fte_match_param)); + + fg = create_flow_group_common(ft, in, prev, true); +out: + kvfree(in); + return fg; +} + static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, u32 *match_value, u8 action, @@ -626,9 +726,9 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, struct mlx5_flow_table *ft; struct list_head *prev; - lock_ref_node(&fg->node); + nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT); fs_for_each_fte(fte, fg) { - nested_lock_ref_node(&fte->node); + nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); if (compare_match_value(&fg->mask, match_value, &fte->val) && action == fte->action && flow_tag == fte->flow_tag) { rule = add_rule_fte(fte, fg, dest); @@ -669,6 +769,33 @@ unlock_fg: return rule; } +static struct mlx5_flow_rule *add_rule_to_auto_fg(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u8 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *rule; + struct mlx5_flow_group *g; + + g = create_autogroup(ft, match_criteria_enable, match_criteria); + if (IS_ERR(g)) + return (void *)g; + + rule = add_rule_fg(g, match_value, + action, flow_tag, dest); + if (IS_ERR(rule)) { + /* Remove assumes refcount > 0 and autogroup creates a group + * with a refcount = 0. + */ + tree_get_node(&g->node); + tree_remove_node(&g->node); + } + return rule; +} + struct mlx5_flow_rule * mlx5_add_flow_rule(struct mlx5_flow_table *ft, u8 match_criteria_enable, @@ -679,23 +806,24 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft, struct mlx5_flow_destination *dest) { struct mlx5_flow_group *g; - struct mlx5_flow_rule *rule = ERR_PTR(-EINVAL); + struct mlx5_flow_rule *rule; - tree_get_node(&ft->node); - lock_ref_node(&ft->node); + nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); fs_for_each_fg(g, ft) if (compare_match_criteria(g->mask.match_criteria_enable, match_criteria_enable, g->mask.match_criteria, match_criteria)) { - unlock_ref_node(&ft->node); rule = add_rule_fg(g, match_value, action, flow_tag, dest); - goto put; + if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) + goto unlock; } + + rule = add_rule_to_auto_fg(ft, match_criteria_enable, match_criteria, + match_value, action, flow_tag, dest); +unlock: unlock_ref_node(&ft->node); -put: - tree_put_node(&ft->node); return rule; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 4ebb97fd5544..0f98257c0d31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -77,6 +77,11 @@ struct mlx5_flow_table { unsigned int max_fte; unsigned int level; enum fs_flow_table_type type; + struct { + bool active; + unsigned int required_groups; + unsigned int num_groups; + } autogroup; }; /* Type of children is mlx5_flow_rule */ diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index bc7ad019afde..06ac6e8fccfa 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -61,6 +61,12 @@ struct mlx5_flow_namespace * mlx5_get_flow_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type); +struct mlx5_flow_table * +mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, + int prio, + int num_flow_table_entries, + int max_num_groups); + struct mlx5_flow_table * mlx5_create_flow_table(struct mlx5_flow_namespace *ns, int prio, -- cgit v1.2.3-71-gd317 From 2cc43b494a6c30ec0e554ea91ce763c97069e8cc Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 11 Jan 2016 10:25:59 +0200 Subject: net/mlx5_core: Managing root flow table The root Flow Table for each Flow Table Type is defined, by default, as the Flow Table with level 0. In order not to use an empty flow tables and introduce new hops, but still preserve space for flow-tables that have a priority greater(lower number) than the current flow table, we introduce this new set root flow table command. This command tells the HW to start matching packets from the assigned root flow table. This command is used when we create new flow table with level lower than the current lowest flow table or it is the first flow table. Signed-off-by: Maor Gottlieb Signed-off-by: Moni Shoua Signed-off-by: Matan Barak Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 18 +++++ drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h | 2 + drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 97 ++++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 6 ++ include/linux/mlx5/mlx5_ifc.h | 31 +++++++- 5 files changed, 144 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 5096f4f336bd..d8b1195fba3d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -38,6 +38,24 @@ #include "fs_cmd.h" #include "mlx5_core.h" +int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft) +{ + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]; + u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(set_flow_table_root_in, in, opcode, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); + MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); + MLX5_SET(set_flow_table_root_in, in, table_id, ft->id); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, enum fs_flow_table_type type, unsigned int level, unsigned int log_size, unsigned int *table_id) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index f39304ede186..70d18ec145c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -62,4 +62,6 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, unsigned int index); +int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index c5a96e6abe0d..64bdb54041d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -510,6 +510,29 @@ static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio) return find_closest_ft(prio, true); } +static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio + *prio) +{ + struct mlx5_flow_root_namespace *root = find_root(&prio->node); + int min_level = INT_MAX; + int err; + + if (root->root_ft) + min_level = root->root_ft->level; + + if (ft->level >= min_level) + return 0; + + err = mlx5_cmd_update_root_ft(root->dev, ft); + if (err) + mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", + ft->id); + else + root->root_ft = ft; + + return err; +} + struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, int prio, int max_fte) @@ -526,14 +549,15 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, return ERR_PTR(-ENODEV); } + mutex_lock(&root->chain_lock); fs_prio = find_prio(ns, prio); - if (!fs_prio) - return ERR_PTR(-EINVAL); - - lock_ref_node(&fs_prio->node); + if (!fs_prio) { + err = -EINVAL; + goto unlock_root; + } if (fs_prio->num_ft == fs_prio->max_ft) { err = -ENOSPC; - goto unlock_prio; + goto unlock_root; } ft = alloc_flow_table(find_next_free_level(fs_prio), @@ -541,7 +565,7 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, root->table_type); if (!ft) { err = -ENOMEM; - goto unlock_prio; + goto unlock_root; } tree_init_node(&ft->node, 1, del_flow_table); @@ -551,15 +575,25 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, if (err) goto free_ft; + if (MLX5_CAP_FLOWTABLE(root->dev, + flow_table_properties_nic_receive.modify_root)) { + err = update_root_ft_create(ft, fs_prio); + if (err) + goto destroy_ft; + } + lock_ref_node(&fs_prio->node); tree_add_node(&ft->node, &fs_prio->node); list_add_tail(&ft->node.list, &fs_prio->node.children); fs_prio->num_ft++; unlock_ref_node(&fs_prio->node); + mutex_unlock(&root->chain_lock); return ft; +destroy_ft: + mlx5_cmd_destroy_flow_table(root->dev, ft); free_ft: kfree(ft); -unlock_prio: - unlock_ref_node(&fs_prio->node); +unlock_root: + mutex_unlock(&root->chain_lock); return ERR_PTR(err); } @@ -899,13 +933,57 @@ void mlx5_del_flow_rule(struct mlx5_flow_rule *rule) tree_remove_node(&rule->node); } +/* Assuming prio->node.children(flow tables) is sorted by level */ +static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft) +{ + struct fs_prio *prio; + + fs_get_obj(prio, ft->node.parent); + + if (!list_is_last(&ft->node.list, &prio->node.children)) + return list_next_entry(ft, node.list); + return find_next_chained_ft(prio); +} + +static int update_root_ft_destroy(struct mlx5_flow_table *ft) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + struct mlx5_flow_table *new_root_ft = NULL; + + if (root->root_ft != ft) + return 0; + + new_root_ft = find_next_ft(ft); + if (new_root_ft) { + int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft); + + if (err) { + mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", + ft->id); + return err; + } + root->root_ft = new_root_ft; + } + return 0; +} + int mlx5_destroy_flow_table(struct mlx5_flow_table *ft) { + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + int err = 0; + + mutex_lock(&root->chain_lock); + err = update_root_ft_destroy(ft); + if (err) { + mutex_unlock(&root->chain_lock); + return err; + } if (tree_remove_node(&ft->node)) mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n", ft->id); + mutex_unlock(&root->chain_lock); - return 0; + return err; } void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) @@ -1072,6 +1150,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev ns = &root_ns->ns; fs_init_namespace(ns); + mutex_init(&root_ns->chain_lock); tree_init_node(&ns->node, 1, NULL); tree_add_node(&ns->node, NULL); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 0f98257c0d31..1a2e08bad529 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -129,6 +129,9 @@ struct mlx5_flow_root_namespace { struct mlx5_flow_namespace ns; enum fs_flow_table_type table_type; struct mlx5_core_dev *dev; + struct mlx5_flow_table *root_ft; + /* Should be held when chaining flow tables */ + struct mutex chain_lock; }; int mlx5_init_fs(struct mlx5_core_dev *dev); @@ -148,6 +151,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev); #define fs_for_each_prio(pos, ns) \ fs_list_for_each_entry(pos, &(ns)->node.children) +#define fs_for_each_ft(pos, prio) \ + fs_list_for_each_entry(pos, &(prio)->node.children) + #define fs_for_each_fg(pos, ft) \ fs_list_for_each_entry(pos, &(ft)->node.children) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1780a85a8797..323e713c44ba 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -185,6 +185,7 @@ enum { MLX5_CMD_OP_MODIFY_RQT = 0x917, MLX5_CMD_OP_DESTROY_RQT = 0x918, MLX5_CMD_OP_QUERY_RQT = 0x919, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT = 0x92f, MLX5_CMD_OP_CREATE_FLOW_TABLE = 0x930, MLX5_CMD_OP_DESTROY_FLOW_TABLE = 0x931, MLX5_CMD_OP_QUERY_FLOW_TABLE = 0x932, @@ -258,7 +259,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 ft_support[0x1]; u8 reserved_0[0x2]; u8 flow_modify_en[0x1]; - u8 reserved_1[0x1c]; + u8 modify_root[0x1]; + u8 reserved_1[0x1b]; u8 reserved_2[0x2]; u8 log_max_ft_size[0x6]; @@ -6946,4 +6948,31 @@ union mlx5_ifc_uplink_pci_interface_document_bits { u8 reserved_0[0x20060]; }; +struct mlx5_ifc_set_flow_table_root_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_flow_table_root_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x140]; +}; + #endif /* MLX5_IFC_H */ -- cgit v1.2.3-71-gd317 From 34a40e689393a6b13673ab395a9a4d063d249fe9 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 11 Jan 2016 10:26:00 +0200 Subject: net/mlx5_core: Introduce modify flow table command Introduce the modify flow table command. This command is used when we want to change the next flow table of an existing flow table. The next flow table is defined as the table we search (in order to find a match), if we couldn't find a match in any of the flow table entries in the current flow table. Signed-off-by: Maor Gottlieb Signed-off-by: Moni Shoua Signed-off-by: Matan Barak Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 27 ++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h | 4 ++ include/linux/mlx5/mlx5_ifc.h | 56 ++++++++++++++++++++++-- 3 files changed, 83 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index d8b1195fba3d..2b5562553f2d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -101,6 +101,33 @@ int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, sizeof(out)); } +int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)]; + u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(modify_flow_table_in, in, opcode, + MLX5_CMD_OP_MODIFY_FLOW_TABLE); + MLX5_SET(modify_flow_table_in, in, table_type, ft->type); + MLX5_SET(modify_flow_table_in, in, table_id, ft->id); + MLX5_SET(modify_flow_table_in, in, modify_field_select, + MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID); + if (next_ft) { + MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1); + MLX5_SET(modify_flow_table_in, in, table_miss_id, next_ft->id); + } else { + MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0); + } + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, u32 *in, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 70d18ec145c2..1ae9b685c783 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -40,6 +40,10 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft); +int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft); + int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, u32 *in, unsigned int *group_id); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 323e713c44ba..7f166955d4c9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -194,7 +194,8 @@ enum { MLX5_CMD_OP_QUERY_FLOW_GROUP = 0x935, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY = 0x936, MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY = 0x937, - MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY = 0x938 + MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY = 0x938, + MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c }; struct mlx5_ifc_flow_table_fields_supported_bits { @@ -260,7 +261,9 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_0[0x2]; u8 flow_modify_en[0x1]; u8 modify_root[0x1]; - u8 reserved_1[0x1b]; + u8 identified_miss_table_mode[0x1]; + u8 flow_table_modify[0x1]; + u8 reserved_1[0x19]; u8 reserved_2[0x2]; u8 log_max_ft_size[0x6]; @@ -5669,12 +5672,16 @@ struct mlx5_ifc_create_flow_table_in_bits { u8 reserved_4[0x20]; - u8 reserved_5[0x8]; + u8 reserved_5[0x4]; + u8 table_miss_mode[0x4]; u8 level[0x8]; u8 reserved_6[0x8]; u8 log_size[0x8]; - u8 reserved_7[0x120]; + u8 reserved_7[0x8]; + u8 table_miss_id[0x18]; + + u8 reserved_8[0x100]; }; struct mlx5_ifc_create_flow_group_out_bits { @@ -6975,4 +6982,45 @@ struct mlx5_ifc_set_flow_table_root_in_bits { u8 reserved_5[0x140]; }; +enum { + MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID = 0x1, +}; + +struct mlx5_ifc_modify_flow_table_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_flow_table_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x20]; + + u8 reserved_3[0x10]; + u8 modify_field_select[0x10]; + + u8 table_type[0x8]; + u8 reserved_4[0x18]; + + u8 reserved_5[0x8]; + u8 table_id[0x18]; + + u8 reserved_6[0x4]; + u8 table_miss_mode[0x4]; + u8 reserved_7[0x18]; + + u8 reserved_8[0x8]; + u8 table_miss_id[0x18]; + + u8 reserved_9[0x100]; +}; + #endif /* MLX5_IFC_H */ -- cgit v1.2.3-71-gd317 From 4cbdd30ed5c8bc5cf40813b025b4fb57b376a592 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 11 Jan 2016 10:26:04 +0200 Subject: net/mlx5_core: Enable flow steering support for the IB driver When the driver is loaded, we create flow steering namespace for kernel bypass with nine priorities and another namespace for leftovers(in order to catch packets that weren't matched). Verbs applications will use these priorities. we found nine as a number that balances the requirements from the user and retains performance. The bypass namespace is used by verbs applications that want to bypass the kernel networking stack. The leftovers namespace is used by verbs applications and the sniffer in order to catch packets that weren't handled by any preceding rules. Signed-off-by: Maor Gottlieb Signed-off-by: Moni Shoua Signed-off-by: Matan Barak Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 55 +++++++++++++++++++---- include/linux/mlx5/device.h | 2 + include/linux/mlx5/fs.h | 2 + 3 files changed, 51 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 96e287a326ae..757725bf48a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -40,18 +40,19 @@ #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ sizeof(struct init_tree_node)) -#define ADD_PRIO(min_level_val, max_ft_val, caps_val,\ +#define ADD_PRIO(num_prios_val, min_level_val, max_ft_val, caps_val,\ ...) {.type = FS_TYPE_PRIO,\ .min_ft_level = min_level_val,\ .max_ft = max_ft_val,\ + .num_leaf_prios = num_prios_val,\ .caps = caps_val,\ .children = (struct init_tree_node[]) {__VA_ARGS__},\ .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ } -#define ADD_FT_PRIO(max_ft_val, ...)\ - ADD_PRIO(0, max_ft_val, {},\ - __VA_ARGS__)\ +#define ADD_MULTIPLE_PRIO(num_prios_val, max_ft_val, ...)\ + ADD_PRIO(num_prios_val, 0, max_ft_val, {},\ + __VA_ARGS__)\ #define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\ .children = (struct init_tree_node[]) {__VA_ARGS__},\ @@ -66,7 +67,14 @@ #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \ .caps = (long[]) {__VA_ARGS__} } +#define LEFTOVERS_MAX_FT 1 +#define LEFTOVERS_NUM_PRIOS 1 +#define BY_PASS_PRIO_MAX_FT 1 +#define BY_PASS_MIN_LEVEL (KENREL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ + LEFTOVERS_MAX_FT) + #define KERNEL_MAX_FT 2 +#define KERNEL_NUM_PRIOS 1 #define KENREL_MIN_LEVEL 2 struct node_caps { @@ -79,14 +87,27 @@ static struct init_tree_node { int ar_size; struct node_caps caps; int min_ft_level; + int num_leaf_prios; int prio; int max_ft; } root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 1, + .ar_size = 3, .children = (struct init_tree_node[]) { - ADD_PRIO(KENREL_MIN_LEVEL, 0, {}, - ADD_NS(ADD_FT_PRIO(KERNEL_MAX_FT))), + ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, + FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), + FS_CAP(flow_table_properties_nic_receive.modify_root), + FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), + FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), + ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_MAX_FT))), + ADD_PRIO(0, KENREL_MIN_LEVEL, 0, {}, + ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NUM_PRIOS, KERNEL_MAX_FT))), + ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, + FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), + FS_CAP(flow_table_properties_nic_receive.modify_root), + FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), + FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), + ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))), } }; @@ -1098,8 +1119,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, return NULL; switch (type) { + case MLX5_FLOW_NAMESPACE_BYPASS: case MLX5_FLOW_NAMESPACE_KERNEL: - prio = 0; + case MLX5_FLOW_NAMESPACE_LEFTOVERS: + prio = type; break; case MLX5_FLOW_NAMESPACE_FDB: if (dev->priv.fdb_root_ns) @@ -1164,6 +1187,20 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) return ns; } +static int create_leaf_prios(struct mlx5_flow_namespace *ns, struct init_tree_node + *prio_metadata) +{ + struct fs_prio *fs_prio; + int i; + + for (i = 0; i < prio_metadata->num_leaf_prios; i++) { + fs_prio = fs_create_prio(ns, i, prio_metadata->max_ft); + if (IS_ERR(fs_prio)) + return PTR_ERR(fs_prio); + } + return 0; +} + #define FLOW_TABLE_BIT_SZ 1 #define GET_FLOW_TABLE_CAP(dev, offset) \ ((be32_to_cpu(*((__be32 *)(dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE]) + \ @@ -1201,6 +1238,8 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, return 0; fs_get_obj(fs_ns, fs_parent_node); + if (init_node->num_leaf_prios) + return create_leaf_prios(fs_ns, init_node); fs_prio = fs_create_prio(fs_ns, index, init_node->max_ft); if (IS_ERR(fs_prio)) return PTR_ERR(fs_prio); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index df2f79ef3cac..7be845e30689 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1258,4 +1258,6 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; } +#define MLX5_BY_PASS_NUM_PRIOS 9 + #endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 06ac6e8fccfa..a94341271e3f 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -39,7 +39,9 @@ #define MLX5_FS_DEFAULT_FLOW_TAG 0x0 enum mlx5_flow_namespace_type { + MLX5_FLOW_NAMESPACE_BYPASS, MLX5_FLOW_NAMESPACE_KERNEL, + MLX5_FLOW_NAMESPACE_LEFTOVERS, MLX5_FLOW_NAMESPACE_FDB, }; -- cgit v1.2.3-71-gd317 From b4d1f032d75b2efb73304e8c12faa7149ad700c7 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 11 Jan 2016 10:26:05 +0200 Subject: net/mlx5_core: Make ipv4/ipv6 location more clear Change the mlx5 firmware interface header to make it more clear which bytes should be used by IPv4 or IPv6 addresses. Signed-off-by: Maor Gottlieb Signed-off-by: Moni Shoua Signed-off-by: Matan Barak Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 7f166955d4c9..68d73f82e009 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -298,6 +298,22 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits { u8 reserved_1[0x1a]; }; +struct mlx5_ifc_ipv4_layout_bits { + u8 reserved_0[0x60]; + + u8 ipv4[0x20]; +}; + +struct mlx5_ifc_ipv6_layout_bits { + u8 ipv6[16][0x8]; +}; + +union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits { + struct mlx5_ifc_ipv6_layout_bits ipv6_layout; + struct mlx5_ifc_ipv4_layout_bits ipv4_layout; + u8 reserved_0[0x80]; +}; + struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 smac_47_16[0x20]; @@ -328,9 +344,9 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 udp_sport[0x10]; u8 udp_dport[0x10]; - u8 src_ip[4][0x20]; + union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6; - u8 dst_ip[4][0x20]; + union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6; }; struct mlx5_ifc_fte_match_set_misc_bits { -- cgit v1.2.3-71-gd317 From 038d2ef87572757861a177b19f9d489def2c48b8 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 11 Jan 2016 10:26:07 +0200 Subject: IB/mlx5: Add flow steering support Adding flow steering support by creating a flow-table per priority (if rules exist in the priority). mlx5_ib uses autogrouping and thus only creates the required destinations. Also includes adding of these flow steering utilities 1. Parsing verbs flow attributes hardware steering specs. 2. Check if flow is multicast - this is required in order to decide to which flow table will we add the steering rule. 3. Set outer headers in flow match criteria to zeros. Signed-off-by: Maor Gottlieb Signed-off-by: Moni Shoua Signed-off-by: Matan Barak Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx5/main.c | 463 +++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 45 +++- include/linux/mlx5/fs.h | 10 + 3 files changed, 517 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 7e97cb55a6bf..b0ec175cc6ba 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -43,6 +43,9 @@ #include #include #include +#include +#include +#include #include "user.h" #include "mlx5_ib.h" @@ -835,6 +838,457 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd) return 0; } +static bool outer_header_zero(u32 *match_criteria) +{ + int size = MLX5_ST_SZ_BYTES(fte_match_param); + char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers); + + return outer_headers_c[0] == 0 && !memcmp(outer_headers_c, + outer_headers_c + 1, + size - 1); +} + +static int parse_flow_attr(u32 *match_c, u32 *match_v, + union ib_flow_spec *ib_spec) +{ + void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + switch (ib_spec->type) { + case IB_FLOW_SPEC_ETH: + if (ib_spec->size != sizeof(ib_spec->eth)) + return -EINVAL; + + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dmac_47_16), + ib_spec->eth.mask.dst_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + dmac_47_16), + ib_spec->eth.val.dst_mac); + + if (ib_spec->eth.mask.vlan_tag) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + vlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + vlan_tag, 1); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + first_vid, ntohs(ib_spec->eth.val.vlan_tag)); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + first_cfi, + ntohs(ib_spec->eth.mask.vlan_tag) >> 12); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + first_cfi, + ntohs(ib_spec->eth.val.vlan_tag) >> 12); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + first_prio, + ntohs(ib_spec->eth.mask.vlan_tag) >> 13); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + first_prio, + ntohs(ib_spec->eth.val.vlan_tag) >> 13); + } + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + ethertype, ntohs(ib_spec->eth.mask.ether_type)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + ethertype, ntohs(ib_spec->eth.val.ether_type)); + break; + case IB_FLOW_SPEC_IPV4: + if (ib_spec->size != sizeof(ib_spec->ipv4)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + ethertype, 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + ethertype, ETH_P_IP); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.mask.src_ip, + sizeof(ib_spec->ipv4.mask.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.val.src_ip, + sizeof(ib_spec->ipv4.val.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.mask.dst_ip, + sizeof(ib_spec->ipv4.mask.dst_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.val.dst_ip, + sizeof(ib_spec->ipv4.val.dst_ip)); + break; + case IB_FLOW_SPEC_TCP: + if (ib_spec->size != sizeof(ib_spec->tcp_udp)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + 0xff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + IPPROTO_TCP); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport, + ntohs(ib_spec->tcp_udp.mask.src_port)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport, + ntohs(ib_spec->tcp_udp.val.src_port)); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport, + ntohs(ib_spec->tcp_udp.mask.dst_port)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport, + ntohs(ib_spec->tcp_udp.val.dst_port)); + break; + case IB_FLOW_SPEC_UDP: + if (ib_spec->size != sizeof(ib_spec->tcp_udp)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + 0xff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + IPPROTO_UDP); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport, + ntohs(ib_spec->tcp_udp.mask.src_port)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport, + ntohs(ib_spec->tcp_udp.val.src_port)); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport, + ntohs(ib_spec->tcp_udp.mask.dst_port)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport, + ntohs(ib_spec->tcp_udp.val.dst_port)); + break; + default: + return -EINVAL; + } + + return 0; +} + +/* If a flow could catch both multicast and unicast packets, + * it won't fall into the multicast flow steering table and this rule + * could steal other multicast packets. + */ +static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr) +{ + struct ib_flow_spec_eth *eth_spec; + + if (ib_attr->type != IB_FLOW_ATTR_NORMAL || + ib_attr->size < sizeof(struct ib_flow_attr) + + sizeof(struct ib_flow_spec_eth) || + ib_attr->num_of_specs < 1) + return false; + + eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1); + if (eth_spec->type != IB_FLOW_SPEC_ETH || + eth_spec->size != sizeof(*eth_spec)) + return false; + + return is_multicast_ether_addr(eth_spec->mask.dst_mac) && + is_multicast_ether_addr(eth_spec->val.dst_mac); +} + +static bool is_valid_attr(struct ib_flow_attr *flow_attr) +{ + union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); + bool has_ipv4_spec = false; + bool eth_type_ipv4 = true; + unsigned int spec_index; + + /* Validate that ethertype is correct */ + for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { + if (ib_spec->type == IB_FLOW_SPEC_ETH && + ib_spec->eth.mask.ether_type) { + if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) && + ib_spec->eth.val.ether_type == htons(ETH_P_IP))) + eth_type_ipv4 = false; + } else if (ib_spec->type == IB_FLOW_SPEC_IPV4) { + has_ipv4_spec = true; + } + ib_spec = (void *)ib_spec + ib_spec->size; + } + return !has_ipv4_spec || eth_type_ipv4; +} + +static void put_flow_table(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *prio, bool ft_added) +{ + prio->refcount -= !!ft_added; + if (!prio->refcount) { + mlx5_destroy_flow_table(prio->flow_table); + prio->flow_table = NULL; + } +} + +static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) +{ + struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device); + struct mlx5_ib_flow_handler *handler = container_of(flow_id, + struct mlx5_ib_flow_handler, + ibflow); + struct mlx5_ib_flow_handler *iter, *tmp; + + mutex_lock(&dev->flow_db.lock); + + list_for_each_entry_safe(iter, tmp, &handler->list, list) { + mlx5_del_flow_rule(iter->rule); + list_del(&iter->list); + kfree(iter); + } + + mlx5_del_flow_rule(handler->rule); + put_flow_table(dev, &dev->flow_db.prios[handler->prio], true); + mutex_unlock(&dev->flow_db.lock); + + kfree(handler); + + return 0; +} + +#define MLX5_FS_MAX_TYPES 10 +#define MLX5_FS_MAX_ENTRIES 32000UL +static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, + struct ib_flow_attr *flow_attr) +{ + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_ib_flow_prio *prio; + struct mlx5_flow_table *ft; + int num_entries; + int num_groups; + int priority; + int err = 0; + + if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { + if (flow_is_multicast_only(flow_attr)) + priority = MLX5_IB_FLOW_MCAST_PRIO; + else + priority = flow_attr->priority; + ns = mlx5_get_flow_namespace(dev->mdev, + MLX5_FLOW_NAMESPACE_BYPASS); + num_entries = MLX5_FS_MAX_ENTRIES; + num_groups = MLX5_FS_MAX_TYPES; + prio = &dev->flow_db.prios[priority]; + } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { + ns = mlx5_get_flow_namespace(dev->mdev, + MLX5_FLOW_NAMESPACE_LEFTOVERS); + build_leftovers_ft_param(&priority, + &num_entries, + &num_groups); + prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; + } + + if (!ns) + return ERR_PTR(-ENOTSUPP); + + ft = prio->flow_table; + if (!ft) { + ft = mlx5_create_auto_grouped_flow_table(ns, priority, + num_entries, + num_groups); + + if (!IS_ERR(ft)) { + prio->refcount = 0; + prio->flow_table = ft; + } else { + err = PTR_ERR(ft); + } + } + + return err ? ERR_PTR(err) : prio; +} + +static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + struct mlx5_flow_table *ft = ft_prio->flow_table; + struct mlx5_ib_flow_handler *handler; + void *ib_flow = flow_attr + 1; + u8 match_criteria_enable = 0; + unsigned int spec_index; + u32 *match_c; + u32 *match_v; + int err = 0; + + if (!is_valid_attr(flow_attr)) + return ERR_PTR(-EINVAL); + + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + handler = kzalloc(sizeof(*handler), GFP_KERNEL); + if (!handler || !match_c || !match_v) { + err = -ENOMEM; + goto free; + } + + INIT_LIST_HEAD(&handler->list); + + for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { + err = parse_flow_attr(match_c, match_v, ib_flow); + if (err < 0) + goto free; + + ib_flow += ((union ib_flow_spec *)ib_flow)->size; + } + + /* Outer header support only */ + match_criteria_enable = (!outer_header_zero(match_c)) << 0; + handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable, + match_c, match_v, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + dst); + + if (IS_ERR(handler->rule)) { + err = PTR_ERR(handler->rule); + goto free; + } + + handler->prio = ft_prio - dev->flow_db.prios; + + ft_prio->flow_table = ft; +free: + if (err) + kfree(handler); + kfree(match_c); + kfree(match_v); + return err ? ERR_PTR(err) : handler; +} + +enum { + LEFTOVERS_MC, + LEFTOVERS_UC, +}; + +static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + struct mlx5_ib_flow_handler *handler_ucast = NULL; + struct mlx5_ib_flow_handler *handler = NULL; + + static struct { + struct ib_flow_attr flow_attr; + struct ib_flow_spec_eth eth_flow; + } leftovers_specs[] = { + [LEFTOVERS_MC] = { + .flow_attr = { + .num_of_specs = 1, + .size = sizeof(leftovers_specs[0]) + }, + .eth_flow = { + .type = IB_FLOW_SPEC_ETH, + .size = sizeof(struct ib_flow_spec_eth), + .mask = {.dst_mac = {0x1} }, + .val = {.dst_mac = {0x1} } + } + }, + [LEFTOVERS_UC] = { + .flow_attr = { + .num_of_specs = 1, + .size = sizeof(leftovers_specs[0]) + }, + .eth_flow = { + .type = IB_FLOW_SPEC_ETH, + .size = sizeof(struct ib_flow_spec_eth), + .mask = {.dst_mac = {0x1} }, + .val = {.dst_mac = {} } + } + } + }; + + handler = create_flow_rule(dev, ft_prio, + &leftovers_specs[LEFTOVERS_MC].flow_attr, + dst); + if (!IS_ERR(handler) && + flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) { + handler_ucast = create_flow_rule(dev, ft_prio, + &leftovers_specs[LEFTOVERS_UC].flow_attr, + dst); + if (IS_ERR(handler_ucast)) { + kfree(handler); + handler = handler_ucast; + } else { + list_add(&handler_ucast->list, &handler->list); + } + } + + return handler; +} + +static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, + int domain) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_flow_handler *handler = NULL; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_ib_flow_prio *ft_prio; + int err; + + if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) + return ERR_PTR(-ENOSPC); + + if (domain != IB_FLOW_DOMAIN_USER || + flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) || + flow_attr->flags) + return ERR_PTR(-EINVAL); + + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) + return ERR_PTR(-ENOMEM); + + mutex_lock(&dev->flow_db.lock); + + ft_prio = get_flow_table(dev, flow_attr); + if (IS_ERR(ft_prio)) { + err = PTR_ERR(ft_prio); + goto unlock; + } + + dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn; + + if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { + handler = create_flow_rule(dev, ft_prio, flow_attr, + dst); + } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { + handler = create_leftovers_rule(dev, ft_prio, flow_attr, + dst); + } else { + err = -EINVAL; + goto destroy_ft; + } + + if (IS_ERR(handler)) { + err = PTR_ERR(handler); + handler = NULL; + goto destroy_ft; + } + + ft_prio->refcount++; + mutex_unlock(&dev->flow_db.lock); + kfree(dst); + + return &handler->ibflow; + +destroy_ft: + put_flow_table(dev, ft_prio, false); +unlock: + mutex_unlock(&dev->flow_db.lock); + kfree(dst); + kfree(handler); + return ERR_PTR(err); +} + static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -1439,10 +1893,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); } + if (mlx5_ib_port_link_layer(&dev->ib_dev) == + IB_LINK_LAYER_ETHERNET) { + dev->ib_dev.create_flow = mlx5_ib_create_flow; + dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; + dev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + } err = init_node_data(dev); if (err) goto err_dealloc; + mutex_init(&dev->flow_db.lock); mutex_init(&dev->cap_mask_mutex); err = create_dev_resources(&dev->devr); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 633347260b79..1474cccd1e0f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -105,6 +105,36 @@ struct mlx5_ib_pd { u32 pdn; }; +#define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1) +#define MLX5_IB_FLOW_LAST_PRIO (MLX5_IB_FLOW_MCAST_PRIO - 1) +#if (MLX5_IB_FLOW_LAST_PRIO <= 0) +#error "Invalid number of bypass priorities" +#endif +#define MLX5_IB_FLOW_LEFTOVERS_PRIO (MLX5_IB_FLOW_MCAST_PRIO + 1) + +#define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1) +struct mlx5_ib_flow_prio { + struct mlx5_flow_table *flow_table; + unsigned int refcount; +}; + +struct mlx5_ib_flow_handler { + struct list_head list; + struct ib_flow ibflow; + unsigned int prio; + struct mlx5_flow_rule *rule; +}; + +struct mlx5_ib_flow_db { + struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; + /* Protect flow steering bypass flow tables + * when add/del flow rules. + * only single add/removal of flow steering rule could be done + * simultaneously. + */ + struct mutex lock; +}; + /* Use macros here so that don't have to duplicate * enum ib_send_flags and enum ib_qp_type for low-level driver */ @@ -171,9 +201,21 @@ struct mlx5_ib_pfault { struct mlx5_pagefault mpfault; }; +struct mlx5_ib_rq { + u32 tirn; +}; + +struct mlx5_ib_raw_packet_qp { + struct mlx5_ib_rq rq; +}; + struct mlx5_ib_qp { struct ib_qp ibqp; - struct mlx5_core_qp mqp; + union { + struct mlx5_core_qp mqp; + struct mlx5_ib_raw_packet_qp raw_packet_qp; + }; + struct mlx5_buf buf; struct mlx5_db db; @@ -431,6 +473,7 @@ struct mlx5_ib_dev { */ struct srcu_struct mr_srcu; #endif + struct mlx5_ib_flow_db flow_db; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index a94341271e3f..8230caa3fb6e 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -38,6 +38,16 @@ #define MLX5_FS_DEFAULT_FLOW_TAG 0x0 +#define LEFTOVERS_RULE_NUM 2 +static inline void build_leftovers_ft_param(int *priority, + int *n_ent, + int *n_grp) +{ + *priority = 0; /* Priority of leftovers_prio-0 */ + *n_ent = LEFTOVERS_RULE_NUM; + *n_grp = LEFTOVERS_RULE_NUM; +} + enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_BYPASS, MLX5_FLOW_NAMESPACE_KERNEL, -- cgit v1.2.3-71-gd317 From b6a0e72ad3cffabaf30b856deb58fbe64a0f36a8 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 11 Jan 2016 10:19:10 -0800 Subject: net: Fix typo in netdev_intersect_features Obviously need to 'or in NETIF_F_IP_CSUM and NETIF_F_IPV6_CSUM. Fixes: c8cd0989bd151f ("net: Eliminate NETIF_F_GEN_CSUM and NETIF_F_V[46]_CSUM") Reported-by: Jack Morgenstein Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2285596e7045..5ac140dcb789 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3873,9 +3873,9 @@ static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, { if ((f1 ^ f2) & NETIF_F_HW_CSUM) { if (f1 & NETIF_F_HW_CSUM) - f1 |= (NETIF_F_IP_CSUM|NETIF_F_IP_CSUM); + f1 |= (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); else - f2 |= (NETIF_F_IP_CSUM|NETIF_F_IP_CSUM); + f2 |= (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); } return f1 & f2; -- cgit v1.2.3-71-gd317