From eec517cdb4810b3843eb7707971de3164088bff1 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 20 Apr 2020 00:11:50 +0200 Subject: net: Add IF_OPER_TESTING RFC 2863 defines the operational state testing. Add support for this state, both as a IF_LINK_MODE_ and __LINK_STATE_. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/netdevice.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 130a668049ab..0750b54b3765 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -288,6 +288,7 @@ enum netdev_state_t { __LINK_STATE_NOCARRIER, __LINK_STATE_LINKWATCH_PENDING, __LINK_STATE_DORMANT, + __LINK_STATE_TESTING, }; @@ -3907,6 +3908,46 @@ static inline bool netif_dormant(const struct net_device *dev) } +/** + * netif_testing_on - mark device as under test. + * @dev: network device + * + * Mark device as under test (as per RFC2863). + * + * The testing state indicates that some test(s) must be performed on + * the interface. After completion, of the test, the interface state + * will change to up, dormant, or down, as appropriate. + */ +static inline void netif_testing_on(struct net_device *dev) +{ + if (!test_and_set_bit(__LINK_STATE_TESTING, &dev->state)) + linkwatch_fire_event(dev); +} + +/** + * netif_testing_off - set device as not under test. + * @dev: network device + * + * Device is not in testing state. + */ +static inline void netif_testing_off(struct net_device *dev) +{ + if (test_and_clear_bit(__LINK_STATE_TESTING, &dev->state)) + linkwatch_fire_event(dev); +} + +/** + * netif_testing - test if device is under test + * @dev: network device + * + * Check if device is under test + */ +static inline bool netif_testing(const struct net_device *dev) +{ + return test_bit(__LINK_STATE_TESTING, &dev->state); +} + + /** * netif_oper_up - test if device is operational * @dev: network device -- cgit v1.2.3-71-gd317 From 6f8b12d661d09b488b9ac879b8eafbd2cc4a1450 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Apr 2020 09:13:27 -0700 Subject: net: napi: add hard irqs deferral feature Back in commit 3b47d30396ba ("net: gro: add a per device gro flush timer") we added the ability to arm one high resolution timer, that we used to keep not-complete packets in GRO engine a bit longer, hoping that further frames might be added to them. Since then, we added the napi_complete_done() interface, and commit 364b6055738b ("net: busy-poll: return busypolling status to drivers") allowed drivers to avoid re-arming NIC interrupts if we made a promise that their NAPI poll() handler would be called in the near future. This infrastructure can be leveraged, thanks to a new device parameter, which allows to arm the napi hrtimer, instead of re-arming the device hard IRQ. We have noticed that on some servers with 32 RX queues or more, the chit-chat between the NIC and the host caused by IRQ delivery and re-arming could hurt throughput by ~20% on 100Gbit NIC. In contrast, hrtimers are using local (percpu) resources and might have lower cost. The new tunable, named napi_defer_hard_irqs, is placed in the same hierarchy than gro_flush_timeout (/sys/class/net/ethX/) By default, both gro_flush_timeout and napi_defer_hard_irqs are zero. This patch does not change the prior behavior of gro_flush_timeout if used alone : NIC hard irqs should be rearmed as before. One concrete usage can be : echo 20000 >/sys/class/net/eth1/gro_flush_timeout echo 10 >/sys/class/net/eth1/napi_defer_hard_irqs If at least one packet is retired, then we will reset napi counter to 10 (napi_defer_hard_irqs), ensuring at least 10 periodic scans of the queue. On busy queues, this should avoid NIC hard IRQ, while before this patch IRQ avoidance was only possible if napi->poll() was exhausting its budget and not call napi_complete_done(). This feature also can be used to work around some non-optimal NIC irq coalescing strategies. Having the ability to insert XX usec delays between each napi->poll() can increase cache efficiency, since we increase batch sizes. It also keeps serving cpus not idle too long, reducing tail latencies. Co-developed-by: Luigi Rizzo Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 29 ++++++++++++++++++----------- net/core/net-sysfs.c | 18 ++++++++++++++++++ 3 files changed, 38 insertions(+), 11 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0750b54b3765..5a8d40f1ffe2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -329,6 +329,7 @@ struct napi_struct { unsigned long state; int weight; + int defer_hard_irqs_count; unsigned long gro_bitmask; int (*poll)(struct napi_struct *, int); #ifdef CONFIG_NETPOLL @@ -1995,6 +1996,7 @@ struct net_device { struct bpf_prog __rcu *xdp_prog; unsigned long gro_flush_timeout; + int napi_defer_hard_irqs; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; diff --git a/net/core/dev.c b/net/core/dev.c index fb61522b1ce1..67585484ad32 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6227,7 +6227,8 @@ EXPORT_SYMBOL(__napi_schedule_irqoff); bool napi_complete_done(struct napi_struct *n, int work_done) { - unsigned long flags, val, new; + unsigned long flags, val, new, timeout = 0; + bool ret = true; /* * 1) Don't let napi dequeue from the cpu poll list @@ -6239,20 +6240,23 @@ bool napi_complete_done(struct napi_struct *n, int work_done) NAPIF_STATE_IN_BUSY_POLL))) return false; - if (n->gro_bitmask) { - unsigned long timeout = 0; - - if (work_done) + if (work_done) { + if (n->gro_bitmask) timeout = n->dev->gro_flush_timeout; - + n->defer_hard_irqs_count = n->dev->napi_defer_hard_irqs; + } + if (n->defer_hard_irqs_count > 0) { + n->defer_hard_irqs_count--; + timeout = n->dev->gro_flush_timeout; + if (timeout) + ret = false; + } + if (n->gro_bitmask) { /* When the NAPI instance uses a timeout and keeps postponing * it, we need to bound somehow the time packets are kept in * the GRO layer */ napi_gro_flush(n, !!timeout); - if (timeout) - hrtimer_start(&n->timer, ns_to_ktime(timeout), - HRTIMER_MODE_REL_PINNED); } gro_normal_list(n); @@ -6284,7 +6288,10 @@ bool napi_complete_done(struct napi_struct *n, int work_done) return false; } - return true; + if (timeout) + hrtimer_start(&n->timer, ns_to_ktime(timeout), + HRTIMER_MODE_REL_PINNED); + return ret; } EXPORT_SYMBOL(napi_complete_done); @@ -6464,7 +6471,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) /* Note : we use a relaxed variant of napi_schedule_prep() not setting * NAPI_STATE_MISSED, since we do not react to a device IRQ. */ - if (napi->gro_bitmask && !napi_disable_pending(napi) && + if (!napi_disable_pending(napi) && !test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) __napi_schedule_irqoff(napi); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 0d9e46de205e..f3b650cd0923 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -382,6 +382,23 @@ static ssize_t gro_flush_timeout_store(struct device *dev, } NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong); +static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val) +{ + dev->napi_defer_hard_irqs = val; + return 0; +} + +static ssize_t napi_defer_hard_irqs_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + return netdev_store(dev, attr, buf, len, change_napi_defer_hard_irqs); +} +NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_dec); + static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -545,6 +562,7 @@ static struct attribute *net_class_attrs[] __ro_after_init = { &dev_attr_flags.attr, &dev_attr_tx_queue_len.attr, &dev_attr_gro_flush_timeout.attr, + &dev_attr_napi_defer_hard_irqs.attr, &dev_attr_phys_port_id.attr, &dev_attr_phys_port_name.attr, &dev_attr_phys_switch_id.attr, -- cgit v1.2.3-71-gd317 From cff9f12b18915d957a2130885a00f8ab15cff7e4 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:31 +0300 Subject: net/core: Introduce netdev_get_xmit_slave Add new ndo to get the xmit slave of master device. The reference counters are not incremented so the caller must be careful with locks. User can ask to get the xmit slave assume all the slaves can transmit by set all_slaves arg to true. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: David Ahern Acked-by: David S. Miller Signed-off-by: Saeed Mahameed --- include/linux/netdevice.h | 12 ++++++++++++ net/core/dev.c | 22 ++++++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 130a668049ab..26bc0f11b7ad 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1146,6 +1146,12 @@ struct netdev_net_notifier { * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); * Called to release previously enslaved netdev. * + * struct net_device *(*ndo_get_xmit_slave)(struct net_device *dev, + * struct sk_buff *skb, + * bool all_slaves); + * Get the xmit slave of master device. If all_slaves is true, function + * assume all the slaves can transmit. + * * Feature/offload setting functions. * netdev_features_t (*ndo_fix_features)(struct net_device *dev, * netdev_features_t features); @@ -1389,6 +1395,9 @@ struct net_device_ops { struct netlink_ext_ack *extack); int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); + struct net_device* (*ndo_get_xmit_slave)(struct net_device *dev, + struct sk_buff *skb, + bool all_slaves); netdev_features_t (*ndo_fix_features)(struct net_device *dev, netdev_features_t features); int (*ndo_set_features)(struct net_device *dev, @@ -2731,6 +2740,9 @@ void netdev_freemem(struct net_device *dev); void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); +struct net_device *netdev_get_xmit_slave(struct net_device *dev, + struct sk_buff *skb, + bool all_slaves); struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); diff --git a/net/core/dev.c b/net/core/dev.c index 9c9e763bfe0e..e6c10980abfd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7785,6 +7785,28 @@ void netdev_bonding_info_change(struct net_device *dev, } EXPORT_SYMBOL(netdev_bonding_info_change); +/** + * netdev_get_xmit_slave - Get the xmit slave of master device + * @skb: The packet + * @all_slaves: assume all the slaves are active + * + * The reference counters are not incremented so the caller must be + * careful with locks. The caller must hold RCU lock. + * %NULL is returned if no slave is found. + */ + +struct net_device *netdev_get_xmit_slave(struct net_device *dev, + struct sk_buff *skb, + bool all_slaves) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_get_xmit_slave) + return NULL; + return ops->ndo_get_xmit_slave(dev, skb, all_slaves); +} +EXPORT_SYMBOL(netdev_get_xmit_slave); + static void netdev_adjacent_add_links(struct net_device *dev) { struct netdev_adjacent *iter; -- cgit v1.2.3-71-gd317 From 1a33e10e4a95cb109ff1145098175df3113313ef Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 2 May 2020 22:22:19 -0700 Subject: net: partially revert dynamic lockdep key changes This patch reverts the folowing commits: commit 064ff66e2bef84f1153087612032b5b9eab005bd "bonding: add missing netdev_update_lockdep_key()" commit 53d374979ef147ab51f5d632dfe20b14aebeccd0 "net: avoid updating qdisc_xmit_lock_key in netdev_update_lockdep_key()" commit 1f26c0d3d24125992ab0026b0dab16c08df947c7 "net: fix kernel-doc warning in " commit ab92d68fc22f9afab480153bd82a20f6e2533769 "net: core: add generic lockdep keys" but keeps the addr_list_lock_key because we still lock addr_list_lock nestedly on stack devices, unlikely xmit_lock this is safe because we don't take addr_list_lock on any fast path. Reported-and-tested-by: syzbot+aaa6fa4949cc5d9b7b25@syzkaller.appspotmail.com Cc: Dmitry Vyukov Cc: Taehee Yoo Signed-off-by: Cong Wang Acked-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 + drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 16 ++++ drivers/net/hamradio/bpqether.c | 20 +++++ drivers/net/hyperv/netvsc_drv.c | 2 + drivers/net/ipvlan/ipvlan_main.c | 2 + drivers/net/macsec.c | 2 + drivers/net/macvlan.c | 2 + drivers/net/ppp/ppp_generic.c | 2 + drivers/net/team/team.c | 1 + drivers/net/vrf.c | 1 + drivers/net/wireless/intersil/hostap/hostap_hw.c | 22 ++++++ include/linux/netdevice.h | 27 +++++-- net/8021q/vlan_dev.c | 21 ++++++ net/batman-adv/soft-interface.c | 30 ++++++++ net/bluetooth/6lowpan.c | 8 ++ net/core/dev.c | 90 ++++++++++++++++++----- net/dsa/slave.c | 12 +++ net/ieee802154/6lowpan/core.c | 8 ++ net/l2tp/l2tp_eth.c | 1 + net/netrom/af_netrom.c | 21 ++++++ net/rose/af_rose.c | 21 ++++++ net/sched/sch_generic.c | 17 +++-- 22 files changed, 294 insertions(+), 33 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2e70e43c5df5..d01871321d22 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4898,6 +4898,7 @@ static int bond_init(struct net_device *bond_dev) spin_lock_init(&bond->stats_lock); lockdep_register_key(&bond->stats_lock_key); lockdep_set_class(&bond->stats_lock, &bond->stats_lock_key); + netdev_lockdep_set_classes(bond_dev); list_add_tail(&bond->bond_list, &bn->dev_list); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index 79d72c88bbef..b3cabc274121 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -299,6 +299,20 @@ static void nfp_repr_clean(struct nfp_repr *repr) nfp_port_free(repr->port); } +static struct lock_class_key nfp_repr_netdev_xmit_lock_key; + +static void nfp_repr_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, &nfp_repr_netdev_xmit_lock_key); +} + +static void nfp_repr_set_lockdep_class(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, nfp_repr_set_lockdep_class_one, NULL); +} + int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, u32 cmsg_port_id, struct nfp_port *port, struct net_device *pf_netdev) @@ -308,6 +322,8 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, u32 repr_cap = nn->tlv_caps.repr_cap; int err; + nfp_repr_set_lockdep_class(netdev); + repr->port = port; repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, GFP_KERNEL); if (!repr->dst) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index fbea6f232819..206688154fdf 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -107,6 +107,25 @@ struct bpqdev { static LIST_HEAD(bpq_devices); +/* + * bpqether network devices are paired with ethernet devices below them, so + * form a special "super class" of normal ethernet devices; split their locks + * off into a separate class since they always nest. + */ +static struct lock_class_key bpq_netdev_xmit_lock_key; + +static void bpq_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, &bpq_netdev_xmit_lock_key); +} + +static void bpq_set_lockdep_class(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, bpq_set_lockdep_class_one, NULL); +} + /* ------------------------------------------------------------------------ */ @@ -477,6 +496,7 @@ static int bpq_new_device(struct net_device *edev) err = register_netdevice(ndev); if (err) goto error; + bpq_set_lockdep_class(ndev); /* List protected by RTNL */ list_add_rcu(&bpq->bpq_list, &bpq_devices); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index d8e86bdbfba1..c0b647a4c893 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2456,6 +2456,8 @@ static int netvsc_probe(struct hv_device *dev, NETIF_F_HW_VLAN_CTAG_RX; net->vlan_features = net->features; + netdev_lockdep_set_classes(net); + /* MTU range: 68 - 1500 or 65521 */ net->min_mtu = NETVSC_MTU_MIN; if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index f195f278a83a..15e87c097b0b 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -131,6 +131,8 @@ static int ipvlan_init(struct net_device *dev) dev->gso_max_segs = phy_dev->gso_max_segs; dev->hard_header_len = phy_dev->hard_header_len; + netdev_lockdep_set_classes(dev); + ipvlan->pcpu_stats = netdev_alloc_pcpu_stats(struct ipvl_pcpu_stats); if (!ipvlan->pcpu_stats) return -ENOMEM; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 758baf7cb8a1..ea3f25cc79ef 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -4047,6 +4047,8 @@ static int macsec_newlink(struct net *net, struct net_device *dev, if (err < 0) return err; + netdev_lockdep_set_classes(dev); + err = netdev_upper_dev_link(real_dev, dev, extack); if (err < 0) goto unregister; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d45600e0a38c..34eb073cdd74 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -890,6 +890,8 @@ static int macvlan_init(struct net_device *dev) dev->gso_max_segs = lowerdev->gso_max_segs; dev->hard_header_len = lowerdev->hard_header_len; + netdev_lockdep_set_classes(dev); + vlan->pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan->pcpu_stats) return -ENOMEM; diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 22cc2cb9d878..7d005896a0f9 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1410,6 +1410,8 @@ static int ppp_dev_init(struct net_device *dev) { struct ppp *ppp; + netdev_lockdep_set_classes(dev); + ppp = netdev_priv(dev); /* Let the netdevice take a reference on the ppp file. This ensures * that ppp_destroy_interface() won't run before the device gets diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 04845a4017f9..8c1e02752ff6 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1647,6 +1647,7 @@ static int team_init(struct net_device *dev) lockdep_register_key(&team->team_lock_key); __mutex_init(&team->lock, "team->team_lock_key", &team->team_lock_key); + netdev_lockdep_set_classes(dev); return 0; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 56f8aab46f89..43928a1c2f2a 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -867,6 +867,7 @@ static int vrf_dev_init(struct net_device *dev) /* similarly, oper state is irrelevant; set to up to avoid confusion */ dev->operstate = IF_OPER_UP; + netdev_lockdep_set_classes(dev); return 0; out_rth: diff --git a/drivers/net/wireless/intersil/hostap/hostap_hw.c b/drivers/net/wireless/intersil/hostap/hostap_hw.c index 58212c532c90..aadf3dec5bf3 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_hw.c +++ b/drivers/net/wireless/intersil/hostap/hostap_hw.c @@ -3041,6 +3041,27 @@ static void prism2_clear_set_tim_queue(local_info_t *local) } } + +/* + * HostAP uses two layers of net devices, where the inner + * layer gets called all the time from the outer layer. + * This is a natural nesting, which needs a split lock type. + */ +static struct lock_class_key hostap_netdev_xmit_lock_key; + +static void prism2_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, + &hostap_netdev_xmit_lock_key); +} + +static void prism2_set_lockdep_class(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, prism2_set_lockdep_class_one, NULL); +} + static struct net_device * prism2_init_local_data(struct prism2_helper_functions *funcs, int card_idx, struct device *sdev) @@ -3199,6 +3220,7 @@ while (0) if (ret >= 0) ret = register_netdevice(dev); + prism2_set_lockdep_class(dev); rtnl_unlock(); if (ret < 0) { printk(KERN_WARNING "%s: register netdevice failed!\n", diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5a8d40f1ffe2..7725efd6e48a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1805,13 +1805,11 @@ enum netdev_priv_flags { * @phydev: Physical device may attach itself * for hardware timestamping * @sfp_bus: attached &struct sfp_bus structure. - * @qdisc_tx_busylock_key: lockdep class annotating Qdisc->busylock - * spinlock - * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount - * @qdisc_xmit_lock_key: lockdep class annotating - * netdev_queue->_xmit_lock spinlock + * * @addr_list_lock_key: lockdep class annotating * net_device->addr_list_lock spinlock + * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock + * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount * * @proto_down: protocol port state information can be sent to the * switch driver and used to set the phys state of the @@ -2112,10 +2110,9 @@ struct net_device { #endif struct phy_device *phydev; struct sfp_bus *sfp_bus; - struct lock_class_key qdisc_tx_busylock_key; - struct lock_class_key qdisc_running_key; - struct lock_class_key qdisc_xmit_lock_key; struct lock_class_key addr_list_lock_key; + struct lock_class_key *qdisc_tx_busylock; + struct lock_class_key *qdisc_running_key; bool proto_down; unsigned wol_enabled:1; @@ -2200,6 +2197,20 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, f(dev, &dev->_tx[i], arg); } +#define netdev_lockdep_set_classes(dev) \ +{ \ + static struct lock_class_key qdisc_tx_busylock_key; \ + static struct lock_class_key qdisc_running_key; \ + static struct lock_class_key qdisc_xmit_lock_key; \ + unsigned int i; \ + \ + (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ + (dev)->qdisc_running_key = &qdisc_running_key; \ + for (i = 0; i < (dev)->num_tx_queues; i++) \ + lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \ + &qdisc_xmit_lock_key); \ +} + u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 990b9fde28c6..319220b2341d 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -489,6 +489,25 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); } +/* + * vlan network devices have devices nesting below it, and are a special + * "super class" of normal network devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key vlan_netdev_xmit_lock_key; + +static void vlan_dev_set_lockdep_one(struct net_device *dev, + struct netdev_queue *txq, + void *unused) +{ + lockdep_set_class(&txq->_xmit_lock, &vlan_netdev_xmit_lock_key); +} + +static void vlan_dev_set_lockdep_class(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL); +} + static const struct header_ops vlan_header_ops = { .create = vlan_dev_hard_header, .parse = eth_header_parse, @@ -579,6 +598,8 @@ static int vlan_dev_init(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &vlan_type); + vlan_dev_set_lockdep_class(dev); + vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan->vlan_pcpu_stats) return -ENOMEM; diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 5f05a728f347..822af540b854 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -739,6 +739,34 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto, return 0; } +/* batman-adv network devices have devices nesting below it and are a special + * "super class" of normal network devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key batadv_netdev_xmit_lock_key; + +/** + * batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue + * @dev: device which owns the tx queue + * @txq: tx queue to modify + * @_unused: always NULL + */ +static void batadv_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, &batadv_netdev_xmit_lock_key); +} + +/** + * batadv_set_lockdep_class() - Set txq and addr_list lockdep class + * @dev: network device to modify + */ +static void batadv_set_lockdep_class(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL); +} + /** * batadv_softif_init_late() - late stage initialization of soft interface * @dev: registered network device to modify @@ -752,6 +780,8 @@ static int batadv_softif_init_late(struct net_device *dev) int ret; size_t cnt_len = sizeof(u64) * BATADV_CNT_NUM; + batadv_set_lockdep_class(dev); + bat_priv = netdev_priv(dev); bat_priv->soft_iface = dev; diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 4febc82a7c76..bb55d92691b0 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -571,7 +571,15 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev) return err < 0 ? NET_XMIT_DROP : err; } +static int bt_dev_init(struct net_device *dev) +{ + netdev_lockdep_set_classes(dev); + + return 0; +} + static const struct net_device_ops netdev_ops = { + .ndo_init = bt_dev_init, .ndo_start_xmit = bt_xmit, }; diff --git a/net/core/dev.c b/net/core/dev.c index afff16849c26..f8d83922a6af 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -398,6 +398,74 @@ static RAW_NOTIFIER_HEAD(netdev_chain); DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); EXPORT_PER_CPU_SYMBOL(softnet_data); +#ifdef CONFIG_LOCKDEP +/* + * register_netdevice() inits txq->_xmit_lock and sets lockdep class + * according to dev->type + */ +static const unsigned short netdev_lock_type[] = { + ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, + ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, + ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, + ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, + ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, + ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, + ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, + ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, + ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, + ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, + ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, + ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, + ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, + ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE, + ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE}; + +static const char *const netdev_lock_name[] = { + "_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", + "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", + "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", + "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", + "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", + "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", + "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", + "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", + "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", + "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", + "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", + "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", + "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", + "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE", + "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"}; + +static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; + +static inline unsigned short netdev_lock_pos(unsigned short dev_type) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) + if (netdev_lock_type[i] == dev_type) + return i; + /* the last key is used by default */ + return ARRAY_SIZE(netdev_lock_type) - 1; +} + +static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, + unsigned short dev_type) +{ + int i; + + i = netdev_lock_pos(dev_type); + lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], + netdev_lock_name[i]); +} +#else +static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, + unsigned short dev_type) +{ +} +#endif + /******************************************************************************* * * Protocol management and registration routines @@ -9208,7 +9276,7 @@ static void netdev_init_one_queue(struct net_device *dev, { /* Initialize queue lock */ spin_lock_init(&queue->_xmit_lock); - lockdep_set_class(&queue->_xmit_lock, &dev->qdisc_xmit_lock_key); + netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); queue->xmit_lock_owner = -1; netdev_queue_numa_node_write(queue, NUMA_NO_NODE); queue->dev = dev; @@ -9255,22 +9323,6 @@ void netif_tx_stop_all_queues(struct net_device *dev) } EXPORT_SYMBOL(netif_tx_stop_all_queues); -static void netdev_register_lockdep_key(struct net_device *dev) -{ - lockdep_register_key(&dev->qdisc_tx_busylock_key); - lockdep_register_key(&dev->qdisc_running_key); - lockdep_register_key(&dev->qdisc_xmit_lock_key); - lockdep_register_key(&dev->addr_list_lock_key); -} - -static void netdev_unregister_lockdep_key(struct net_device *dev) -{ - lockdep_unregister_key(&dev->qdisc_tx_busylock_key); - lockdep_unregister_key(&dev->qdisc_running_key); - lockdep_unregister_key(&dev->qdisc_xmit_lock_key); - lockdep_unregister_key(&dev->addr_list_lock_key); -} - void netdev_update_lockdep_key(struct net_device *dev) { lockdep_unregister_key(&dev->addr_list_lock_key); @@ -9837,7 +9889,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); - netdev_register_lockdep_key(dev); + lockdep_register_key(&dev->addr_list_lock_key); dev->gso_max_size = GSO_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; @@ -9926,7 +9978,7 @@ void free_netdev(struct net_device *dev) free_percpu(dev->xdp_bulkq); dev->xdp_bulkq = NULL; - netdev_unregister_lockdep_key(dev); + lockdep_unregister_key(&dev->addr_list_lock_key); /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { diff --git a/net/dsa/slave.c b/net/dsa/slave.c index ba8bf90dc0cc..fa2634043751 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1671,6 +1671,15 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) return ret; } +static struct lock_class_key dsa_slave_netdev_xmit_lock_key; +static void dsa_slave_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, + &dsa_slave_netdev_xmit_lock_key); +} + int dsa_slave_suspend(struct net_device *slave_dev) { struct dsa_port *dp = dsa_slave_to_port(slave_dev); @@ -1754,6 +1763,9 @@ int dsa_slave_create(struct dsa_port *port) slave_dev->max_mtu = ETH_MAX_MTU; SET_NETDEV_DEVTYPE(slave_dev, &dsa_type); + netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one, + NULL); + SET_NETDEV_DEV(slave_dev, port->ds->dev); slave_dev->dev.of_node = port->dn; slave_dev->vlan_features = master->vlan_features; diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index c0b107cdd715..3297e7fa9945 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -58,6 +58,13 @@ static const struct header_ops lowpan_header_ops = { .create = lowpan_header_create, }; +static int lowpan_dev_init(struct net_device *ldev) +{ + netdev_lockdep_set_classes(ldev); + + return 0; +} + static int lowpan_open(struct net_device *dev) { if (!open_count) @@ -89,6 +96,7 @@ static int lowpan_get_iflink(const struct net_device *dev) } static const struct net_device_ops lowpan_netdev_ops = { + .ndo_init = lowpan_dev_init, .ndo_start_xmit = lowpan_xmit, .ndo_open = lowpan_open, .ndo_stop = lowpan_stop, diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index d3b520b9b2c9..fd5ac2788e45 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -56,6 +56,7 @@ static int l2tp_eth_dev_init(struct net_device *dev) { eth_hw_addr_random(dev); eth_broadcast_addr(dev->broadcast); + netdev_lockdep_set_classes(dev); return 0; } diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 7b1a74f74aad..eccc7d366e17 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -63,6 +63,26 @@ static DEFINE_SPINLOCK(nr_list_lock); static const struct proto_ops nr_proto_ops; +/* + * NETROM network devices are virtual network devices encapsulating NETROM + * frames into AX.25 which will be sent through an AX.25 device, so form a + * special "super class" of normal net devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key nr_netdev_xmit_lock_key; + +static void nr_set_lockdep_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, &nr_netdev_xmit_lock_key); +} + +static void nr_set_lockdep_key(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL); +} + /* * Socket removal during an interrupt is now safe. */ @@ -1394,6 +1414,7 @@ static int __init nr_proto_init(void) free_netdev(dev); goto fail; } + nr_set_lockdep_key(dev); dev_nr[i] = dev; } diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 1e8eeb044b07..e7a872207b46 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -64,6 +64,26 @@ static const struct proto_ops rose_proto_ops; ax25_address rose_callsign; +/* + * ROSE network devices are virtual network devices encapsulating ROSE + * frames into AX.25 which will be sent through an AX.25 device, so form a + * special "super class" of normal net devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key rose_netdev_xmit_lock_key; + +static void rose_set_lockdep_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, &rose_netdev_xmit_lock_key); +} + +static void rose_set_lockdep_key(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL); +} + /* * Convert a ROSE address into text. */ @@ -1511,6 +1531,7 @@ static int __init rose_proto_init(void) free_netdev(dev); goto fail; } + rose_set_lockdep_key(dev); dev_rose[i] = dev; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ad24fa1a51e6..ebc55d884247 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -794,6 +794,9 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { }; EXPORT_SYMBOL(pfifo_fast_ops); +static struct lock_class_key qdisc_tx_busylock; +static struct lock_class_key qdisc_running_key; + struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, struct netlink_ext_ack *extack) @@ -846,9 +849,17 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, } spin_lock_init(&sch->busylock); + lockdep_set_class(&sch->busylock, + dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); + /* seqlock has the same scope of busylock, for NOLOCK qdisc */ spin_lock_init(&sch->seqlock); + lockdep_set_class(&sch->busylock, + dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); + seqcount_init(&sch->running); + lockdep_set_class(&sch->running, + dev->qdisc_running_key ?: &qdisc_running_key); sch->ops = ops; sch->flags = ops->static_flags; @@ -859,12 +870,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, dev_hold(dev); refcount_set(&sch->refcnt, 1); - if (sch != &noop_qdisc) { - lockdep_set_class(&sch->busylock, &dev->qdisc_tx_busylock_key); - lockdep_set_class(&sch->seqlock, &dev->qdisc_tx_busylock_key); - lockdep_set_class(&sch->running, &dev->qdisc_running_key); - } - return sch; errout1: kfree(p); -- cgit v1.2.3-71-gd317 From 607259a695312cdfac2b52fb9d5b5890c834d573 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 May 2020 15:03:13 +0200 Subject: net: add a new ndo_tunnel_ioctl method This method is used to properly allow kernel callers of the IPv4 route management ioctls. The exsting ip_tunnel_ioctl helper is renamed to ip_tunnel_ctl to better reflect that it doesn't directly implement ioctls touching user memory, and is used for the guts of ndo_tunnel_ctl implementations. A new ip_tunnel_ioctl helper is added that can be wired up directly to the ndo_do_ioctl method and takes care of the copy to and from userspace. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ include/net/ip_tunnels.h | 3 ++- net/ipv4/ip_gre.c | 35 ++++++++++++++--------------------- net/ipv4/ip_tunnel.c | 16 +++++++++++++++- net/ipv4/ip_vti.c | 32 +++++++++++++------------------- net/ipv4/ipip.c | 30 +++++++++--------------------- 6 files changed, 59 insertions(+), 63 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6a8f8daef09d..a18f8fdf4260 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -53,6 +53,7 @@ struct netpoll_info; struct device; struct phy_device; struct dsa_port; +struct ip_tunnel_parm; struct macsec_context; struct macsec_ops; @@ -1274,6 +1275,9 @@ struct netdev_net_notifier { * Get devlink port instance associated with a given netdev. * Called with a reference on the netdevice and devlink locks only, * rtnl_lock is not held. + * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p, + * int cmd); + * Add, change, delete or get information on an IPv4 tunnel. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1479,6 +1483,8 @@ struct net_device_ops { int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); + int (*ndo_tunnel_ctl)(struct net_device *dev, + struct ip_tunnel_parm *p, int cmd); }; /** diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 236503a50759..076e5d7db7d3 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -269,7 +269,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const u8 proto, int tunnel_hlen); -int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); +int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); +int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0ce9b91ff55c..4e31f23e4117 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -768,45 +768,37 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu) } } -static int ipgre_tunnel_ioctl(struct net_device *dev, - struct ifreq *ifr, int cmd) +static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, + int cmd) { - struct ip_tunnel_parm p; int err; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - return -EFAULT; - if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || - p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) || - ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING))) + if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE || + p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) || + ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; } - p.i_flags = gre_flags_to_tnl_flags(p.i_flags); - p.o_flags = gre_flags_to_tnl_flags(p.o_flags); + p->i_flags = gre_flags_to_tnl_flags(p->i_flags); + p->o_flags = gre_flags_to_tnl_flags(p->o_flags); - err = ip_tunnel_ioctl(dev, &p, cmd); + err = ip_tunnel_ctl(dev, p, cmd); if (err) return err; if (cmd == SIOCCHGTUNNEL) { struct ip_tunnel *t = netdev_priv(dev); - t->parms.i_flags = p.i_flags; - t->parms.o_flags = p.o_flags; + t->parms.i_flags = p->i_flags; + t->parms.o_flags = p->o_flags; if (strcmp(dev->rtnl_link_ops->kind, "erspan")) ipgre_link_update(dev, true); } - p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags); - p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags); - - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - return -EFAULT; - + p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags); + p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags); return 0; } @@ -924,10 +916,11 @@ static const struct net_device_ops ipgre_netdev_ops = { .ndo_stop = ipgre_close, #endif .ndo_start_xmit = ipgre_xmit, - .ndo_do_ioctl = ipgre_tunnel_ioctl, + .ndo_do_ioctl = ip_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_tunnel_ctl = ipgre_tunnel_ctl, }; #define GRE_FEATURES (NETIF_F_SG | \ diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index cd4b84310d92..f4f1d11eab50 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -860,7 +860,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, netdev_state_change(dev); } -int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) +int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) { int err = 0; struct ip_tunnel *t = netdev_priv(dev); @@ -960,6 +960,20 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) done: return err; } +EXPORT_SYMBOL_GPL(ip_tunnel_ctl); + +int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct ip_tunnel_parm p; + int err; + + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + return -EFAULT; + err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd); + if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + return -EFAULT; + return err; +} EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 1b4e6f298648..c8974360a99f 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -378,38 +378,31 @@ static int vti4_err(struct sk_buff *skb, u32 info) } static int -vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) { int err = 0; - struct ip_tunnel_parm p; - - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - return -EFAULT; if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || - p.iph.ihl != 5) + if (p->iph.version != 4 || p->iph.protocol != IPPROTO_IPIP || + p->iph.ihl != 5) return -EINVAL; } - if (!(p.i_flags & GRE_KEY)) - p.i_key = 0; - if (!(p.o_flags & GRE_KEY)) - p.o_key = 0; + if (!(p->i_flags & GRE_KEY)) + p->i_key = 0; + if (!(p->o_flags & GRE_KEY)) + p->o_key = 0; - p.i_flags = VTI_ISVTI; + p->i_flags = VTI_ISVTI; - err = ip_tunnel_ioctl(dev, &p, cmd); + err = ip_tunnel_ctl(dev, p, cmd); if (err) return err; if (cmd != SIOCDELTUNNEL) { - p.i_flags |= GRE_KEY; - p.o_flags |= GRE_KEY; + p->i_flags |= GRE_KEY; + p->o_flags |= GRE_KEY; } - - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - return -EFAULT; return 0; } @@ -417,10 +410,11 @@ static const struct net_device_ops vti_netdev_ops = { .ndo_init = vti_tunnel_init, .ndo_uninit = ip_tunnel_uninit, .ndo_start_xmit = vti_tunnel_xmit, - .ndo_do_ioctl = vti_tunnel_ioctl, + .ndo_do_ioctl = ip_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_tunnel_ctl = vti_tunnel_ctl, }; static void vti_tunnel_setup(struct net_device *dev) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 2f01cf6fa0de..df663baf2516 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -327,41 +327,29 @@ static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto) } static int -ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) { - int err = 0; - struct ip_tunnel_parm p; - - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - return -EFAULT; - if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { - if (p.iph.version != 4 || - !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) + if (p->iph.version != 4 || + !ipip_tunnel_ioctl_verify_protocol(p->iph.protocol) || + p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF))) return -EINVAL; } - p.i_key = p.o_key = 0; - p.i_flags = p.o_flags = 0; - err = ip_tunnel_ioctl(dev, &p, cmd); - if (err) - return err; - - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - return -EFAULT; - - return 0; + p->i_key = p->o_key = 0; + p->i_flags = p->o_flags = 0; + return ip_tunnel_ctl(dev, p, cmd); } static const struct net_device_ops ipip_netdev_ops = { .ndo_init = ipip_tunnel_init, .ndo_uninit = ip_tunnel_uninit, .ndo_start_xmit = ipip_tunnel_xmit, - .ndo_do_ioctl = ipip_tunnel_ioctl, + .ndo_do_ioctl = ip_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_tunnel_ctl = ipip_tunnel_ctl, }; #define IPIP_FEATURES (NETIF_F_SG | \ -- cgit v1.2.3-71-gd317 From cd16627fc0468564fdd60f20ad52420b87195127 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Sat, 23 May 2020 15:27:10 +0200 Subject: net: devres: provide devm_register_netdev() Provide devm_register_netdev() - a device resource managed variant of register_netdev(). This new helper will only work for net_device structs that are also already managed by devres. Signed-off-by: Bartosz Golaszewski Signed-off-by: David S. Miller --- Documentation/driver-api/driver-model/devres.rst | 1 + include/linux/netdevice.h | 2 + net/devres.c | 55 ++++++++++++++++++++++++ 3 files changed, 58 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index 50df28d20fa7..fc242ed4bde5 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -375,6 +375,7 @@ MUX NET devm_alloc_etherdev() devm_alloc_etherdev_mqs() + devm_register_netdev() PER-CPU MEM devm_alloc_percpu() diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a18f8fdf4260..1a96e9c4ec36 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4280,6 +4280,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, int register_netdev(struct net_device *dev); void unregister_netdev(struct net_device *dev); +int devm_register_netdev(struct device *dev, struct net_device *ndev); + /* General hardware address lists handling functions */ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len); diff --git a/net/devres.c b/net/devres.c index b97b0c5a8216..57a6a88d11f6 100644 --- a/net/devres.c +++ b/net/devres.c @@ -38,3 +38,58 @@ struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv, return dr->ndev; } EXPORT_SYMBOL(devm_alloc_etherdev_mqs); + +static void devm_netdev_release(struct device *dev, void *this) +{ + struct net_device_devres *res = this; + + unregister_netdev(res->ndev); +} + +static int netdev_devres_match(struct device *dev, void *this, void *match_data) +{ + struct net_device_devres *res = this; + struct net_device *ndev = match_data; + + return ndev == res->ndev; +} + +/** + * devm_register_netdev - resource managed variant of register_netdev() + * @dev: managing device for this netdev - usually the parent device + * @ndev: device to register + * + * This is a devres variant of register_netdev() for which the unregister + * function will be call automatically when the managing device is + * detached. Note: the net_device used must also be resource managed by + * the same struct device. + */ +int devm_register_netdev(struct device *dev, struct net_device *ndev) +{ + struct net_device_devres *dr; + int ret; + + /* struct net_device must itself be managed. For now a managed netdev + * can only be allocated by devm_alloc_etherdev_mqs() so the check is + * straightforward. + */ + if (WARN_ON(!devres_find(dev, devm_free_netdev, + netdev_devres_match, ndev))) + return -EINVAL; + + dr = devres_alloc(devm_netdev_release, sizeof(*dr), GFP_KERNEL); + if (!dr) + return -ENOMEM; + + ret = register_netdev(ndev); + if (ret) { + devres_free(dr); + return ret; + } + + dr->ndev = ndev; + devres_add(ndev->dev.parent, dr); + + return 0; +} +EXPORT_SYMBOL(devm_register_netdev); -- cgit v1.2.3-71-gd317