summaryrefslogtreecommitdiffstats
path: root/kernel/bpf/offload.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-15 15:04:25 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-15 15:04:25 -0700
commit9a76aba02a37718242d7cdc294f0a3901928aa57 (patch)
tree2040d038f85d2120f21af83b0793efd5af1864e3 /kernel/bpf/offload.c
parent0a957467c5fd46142bc9c52758ffc552d4c5e2f7 (diff)
parent26a1ccc6c117be8e33e0410fce8c5298b0015b99 (diff)
downloadcachepc-linux-9a76aba02a37718242d7cdc294f0a3901928aa57.tar.gz
cachepc-linux-9a76aba02a37718242d7cdc294f0a3901928aa57.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: - Gustavo A. R. Silva keeps working on the implicit switch fallthru changes. - Support 802.11ax High-Efficiency wireless in cfg80211 et al, From Luca Coelho. - Re-enable ASPM in r8169, from Kai-Heng Feng. - Add virtual XFRM interfaces, which avoids all of the limitations of existing IPSEC tunnels. From Steffen Klassert. - Convert GRO over to use a hash table, so that when we have many flows active we don't traverse a long list during accumluation. - Many new self tests for routing, TC, tunnels, etc. Too many contributors to mention them all, but I'm really happy to keep seeing this stuff. - Hardware timestamping support for dpaa_eth/fsl-fman from Yangbo Lu. - Lots of cleanups and fixes in L2TP code from Guillaume Nault. - Add IPSEC offload support to netdevsim, from Shannon Nelson. - Add support for slotting with non-uniform distribution to netem packet scheduler, from Yousuk Seung. - Add UDP GSO support to mlx5e, from Boris Pismenny. - Support offloading of Team LAG in NFP, from John Hurley. - Allow to configure TX queue selection based upon RX queue, from Amritha Nambiar. - Support ethtool ring size configuration in aquantia, from Anton Mikaev. - Support DSCP and flowlabel per-transport in SCTP, from Xin Long. - Support list based batching and stack traversal of SKBs, this is very exciting work. From Edward Cree. - Busyloop optimizations in vhost_net, from Toshiaki Makita. - Introduce the ETF qdisc, which allows time based transmissions. IGB can offload this in hardware. From Vinicius Costa Gomes. - Add parameter support to devlink, from Moshe Shemesh. - Several multiplication and division optimizations for BPF JIT in nfp driver, from Jiong Wang. - Lots of prepatory work to make more of the packet scheduler layer lockless, when possible, from Vlad Buslov. - Add ACK filter and NAT awareness to sch_cake packet scheduler, from Toke Høiland-Jørgensen. - Support regions and region snapshots in devlink, from Alex Vesker. - Allow to attach XDP programs to both HW and SW at the same time on a given device, with initial support in nfp. From Jakub Kicinski. - Add TLS RX offload and support in mlx5, from Ilya Lesokhin. - Use PHYLIB in r8169 driver, from Heiner Kallweit. - All sorts of changes to support Spectrum 2 in mlxsw driver, from Ido Schimmel. - PTP support in mv88e6xxx DSA driver, from Andrew Lunn. - Make TCP_USER_TIMEOUT socket option more accurate, from Jon Maxwell. - Support for templates in packet scheduler classifier, from Jiri Pirko. - IPV6 support in RDS, from Ka-Cheong Poon. - Native tproxy support in nf_tables, from Máté Eckl. - Maintain IP fragment queue in an rbtree, but optimize properly for in-order frags. From Peter Oskolkov. - Improvde handling of ACKs on hole repairs, from Yuchung Cheng" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1996 commits) bpf: test: fix spelling mistake "REUSEEPORT" -> "REUSEPORT" hv/netvsc: Fix NULL dereference at single queue mode fallback net: filter: mark expected switch fall-through xen-netfront: fix warn message as irq device name has '/' cxgb4: Add new T5 PCI device ids 0x50af and 0x50b0 net: dsa: mv88e6xxx: missing unlock on error path rds: fix building with IPV6=m inet/connection_sock: prefer _THIS_IP_ to current_text_addr net: dsa: mv88e6xxx: bitwise vs logical bug net: sock_diag: Fix spectre v1 gadget in __sock_diag_cmd() ieee802154: hwsim: using right kind of iteration net: hns3: Add vlan filter setting by ethtool command -K net: hns3: Set tx ring' tc info when netdev is up net: hns3: Remove tx ring BD len register in hns3_enet net: hns3: Fix desc num set to default when setting channel net: hns3: Fix for phy link issue when using marvell phy driver net: hns3: Fix for information of phydev lost problem when down/up net: hns3: Fix for command format parsing error in hclge_is_all_function_id_zero net: hns3: Add support for serdes loopback selftest bnxt_en: take coredump_record structure off stack ...
Diffstat (limited to 'kernel/bpf/offload.c')
-rw-r--r--kernel/bpf/offload.c223
1 files changed, 174 insertions, 49 deletions
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index ac747d5cf7c6..177a52436394 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -18,19 +18,43 @@
#include <linux/bug.h>
#include <linux/kdev_t.h>
#include <linux/list.h>
+#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
#include <linux/proc_ns.h>
+#include <linux/rhashtable.h>
#include <linux/rtnetlink.h>
#include <linux/rwsem.h>
-/* Protects bpf_prog_offload_devs, bpf_map_offload_devs and offload members
+/* Protects offdevs, members of bpf_offload_netdev and offload members
* of all progs.
* RTNL lock cannot be taken when holding this lock.
*/
static DECLARE_RWSEM(bpf_devs_lock);
-static LIST_HEAD(bpf_prog_offload_devs);
-static LIST_HEAD(bpf_map_offload_devs);
+
+struct bpf_offload_dev {
+ struct list_head netdevs;
+};
+
+struct bpf_offload_netdev {
+ struct rhash_head l;
+ struct net_device *netdev;
+ struct bpf_offload_dev *offdev;
+ struct list_head progs;
+ struct list_head maps;
+ struct list_head offdev_netdevs;
+};
+
+static const struct rhashtable_params offdevs_params = {
+ .nelem_hint = 4,
+ .key_len = sizeof(struct net_device *),
+ .key_offset = offsetof(struct bpf_offload_netdev, netdev),
+ .head_offset = offsetof(struct bpf_offload_netdev, l),
+ .automatic_shrinking = true,
+};
+
+static struct rhashtable offdevs;
+static bool offdevs_inited;
static int bpf_dev_offload_check(struct net_device *netdev)
{
@@ -41,8 +65,19 @@ static int bpf_dev_offload_check(struct net_device *netdev)
return 0;
}
+static struct bpf_offload_netdev *
+bpf_offload_find_netdev(struct net_device *netdev)
+{
+ lockdep_assert_held(&bpf_devs_lock);
+
+ if (!offdevs_inited)
+ return NULL;
+ return rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
+}
+
int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
{
+ struct bpf_offload_netdev *ondev;
struct bpf_prog_offload *offload;
int err;
@@ -66,12 +101,13 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
goto err_maybe_put;
down_write(&bpf_devs_lock);
- if (offload->netdev->reg_state != NETREG_REGISTERED) {
+ ondev = bpf_offload_find_netdev(offload->netdev);
+ if (!ondev) {
err = -EINVAL;
goto err_unlock;
}
prog->aux->offload = offload;
- list_add_tail(&offload->offloads, &bpf_prog_offload_devs);
+ list_add_tail(&offload->offloads, &ondev->progs);
dev_put(offload->netdev);
up_write(&bpf_devs_lock);
@@ -294,6 +330,7 @@ static int bpf_map_offload_ndo(struct bpf_offloaded_map *offmap,
struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
{
struct net *net = current->nsproxy->net_ns;
+ struct bpf_offload_netdev *ondev;
struct bpf_offloaded_map *offmap;
int err;
@@ -316,11 +353,17 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
if (err)
goto err_unlock;
+ ondev = bpf_offload_find_netdev(offmap->netdev);
+ if (!ondev) {
+ err = -EINVAL;
+ goto err_unlock;
+ }
+
err = bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_ALLOC);
if (err)
goto err_unlock;
- list_add_tail(&offmap->offloads, &bpf_map_offload_devs);
+ list_add_tail(&offmap->offloads, &ondev->maps);
up_write(&bpf_devs_lock);
rtnl_unlock();
@@ -468,77 +511,159 @@ int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map)
return 0;
}
-bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
+static bool __bpf_offload_dev_match(struct bpf_prog *prog,
+ struct net_device *netdev)
{
- struct bpf_offloaded_map *offmap;
+ struct bpf_offload_netdev *ondev1, *ondev2;
struct bpf_prog_offload *offload;
- bool ret;
if (!bpf_prog_is_dev_bound(prog->aux))
return false;
- if (!bpf_map_is_dev_bound(map))
- return bpf_map_offload_neutral(map);
- down_read(&bpf_devs_lock);
offload = prog->aux->offload;
- offmap = map_to_offmap(map);
+ if (!offload)
+ return false;
+ if (offload->netdev == netdev)
+ return true;
- ret = offload && offload->netdev == offmap->netdev;
+ ondev1 = bpf_offload_find_netdev(offload->netdev);
+ ondev2 = bpf_offload_find_netdev(netdev);
+
+ return ondev1 && ondev2 && ondev1->offdev == ondev2->offdev;
+}
+
+bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev)
+{
+ bool ret;
+
+ down_read(&bpf_devs_lock);
+ ret = __bpf_offload_dev_match(prog, netdev);
up_read(&bpf_devs_lock);
return ret;
}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_match);
-static void bpf_offload_orphan_all_progs(struct net_device *netdev)
+bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
{
- struct bpf_prog_offload *offload, *tmp;
+ struct bpf_offloaded_map *offmap;
+ bool ret;
- list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs, offloads)
- if (offload->netdev == netdev)
- __bpf_prog_offload_destroy(offload->prog);
+ if (!bpf_map_is_dev_bound(map))
+ return bpf_map_offload_neutral(map);
+ offmap = map_to_offmap(map);
+
+ down_read(&bpf_devs_lock);
+ ret = __bpf_offload_dev_match(prog, offmap->netdev);
+ up_read(&bpf_devs_lock);
+
+ return ret;
}
-static void bpf_offload_orphan_all_maps(struct net_device *netdev)
+int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
+ struct net_device *netdev)
{
- struct bpf_offloaded_map *offmap, *tmp;
+ struct bpf_offload_netdev *ondev;
+ int err;
- list_for_each_entry_safe(offmap, tmp, &bpf_map_offload_devs, offloads)
- if (offmap->netdev == netdev)
- __bpf_map_offload_destroy(offmap);
+ ondev = kzalloc(sizeof(*ondev), GFP_KERNEL);
+ if (!ondev)
+ return -ENOMEM;
+
+ ondev->netdev = netdev;
+ ondev->offdev = offdev;
+ INIT_LIST_HEAD(&ondev->progs);
+ INIT_LIST_HEAD(&ondev->maps);
+
+ down_write(&bpf_devs_lock);
+ err = rhashtable_insert_fast(&offdevs, &ondev->l, offdevs_params);
+ if (err) {
+ netdev_warn(netdev, "failed to register for BPF offload\n");
+ goto err_unlock_free;
+ }
+
+ list_add(&ondev->offdev_netdevs, &offdev->netdevs);
+ up_write(&bpf_devs_lock);
+ return 0;
+
+err_unlock_free:
+ up_write(&bpf_devs_lock);
+ kfree(ondev);
+ return err;
}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register);
-static int bpf_offload_notification(struct notifier_block *notifier,
- ulong event, void *ptr)
+void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
+ struct net_device *netdev)
{
- struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+ struct bpf_offload_netdev *ondev, *altdev;
+ struct bpf_offloaded_map *offmap, *mtmp;
+ struct bpf_prog_offload *offload, *ptmp;
ASSERT_RTNL();
- switch (event) {
- case NETDEV_UNREGISTER:
- /* ignore namespace changes */
- if (netdev->reg_state != NETREG_UNREGISTERING)
- break;
-
- down_write(&bpf_devs_lock);
- bpf_offload_orphan_all_progs(netdev);
- bpf_offload_orphan_all_maps(netdev);
- up_write(&bpf_devs_lock);
- break;
- default:
- break;
+ down_write(&bpf_devs_lock);
+ ondev = rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
+ if (WARN_ON(!ondev))
+ goto unlock;
+
+ WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params));
+ list_del(&ondev->offdev_netdevs);
+
+ /* Try to move the objects to another netdev of the device */
+ altdev = list_first_entry_or_null(&offdev->netdevs,
+ struct bpf_offload_netdev,
+ offdev_netdevs);
+ if (altdev) {
+ list_for_each_entry(offload, &ondev->progs, offloads)
+ offload->netdev = altdev->netdev;
+ list_splice_init(&ondev->progs, &altdev->progs);
+
+ list_for_each_entry(offmap, &ondev->maps, offloads)
+ offmap->netdev = altdev->netdev;
+ list_splice_init(&ondev->maps, &altdev->maps);
+ } else {
+ list_for_each_entry_safe(offload, ptmp, &ondev->progs, offloads)
+ __bpf_prog_offload_destroy(offload->prog);
+ list_for_each_entry_safe(offmap, mtmp, &ondev->maps, offloads)
+ __bpf_map_offload_destroy(offmap);
}
- return NOTIFY_OK;
-}
-static struct notifier_block bpf_offload_notifier = {
- .notifier_call = bpf_offload_notification,
-};
+ WARN_ON(!list_empty(&ondev->progs));
+ WARN_ON(!list_empty(&ondev->maps));
+ kfree(ondev);
+unlock:
+ up_write(&bpf_devs_lock);
+}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);
-static int __init bpf_offload_init(void)
+struct bpf_offload_dev *bpf_offload_dev_create(void)
{
- register_netdevice_notifier(&bpf_offload_notifier);
- return 0;
+ struct bpf_offload_dev *offdev;
+ int err;
+
+ down_write(&bpf_devs_lock);
+ if (!offdevs_inited) {
+ err = rhashtable_init(&offdevs, &offdevs_params);
+ if (err)
+ return ERR_PTR(err);
+ offdevs_inited = true;
+ }
+ up_write(&bpf_devs_lock);
+
+ offdev = kzalloc(sizeof(*offdev), GFP_KERNEL);
+ if (!offdev)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&offdev->netdevs);
+
+ return offdev;
}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_create);
-subsys_initcall(bpf_offload_init);
+void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev)
+{
+ WARN_ON(!list_empty(&offdev->netdevs));
+ kfree(offdev);
+}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_destroy);