summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-09-28 04:48:20 -0400
committerDavid S. Miller <davem@davemloft.net>2016-09-28 04:48:20 -0400
commit9c5982fe260a28e84d167e894123dc342e76c39f (patch)
treebdd2565cbf373e53c32a47ff30c11e4de0028ce7 /include
parenteb523f42d77a43f80bb9c57a34fbdc8406c7b075 (diff)
parentfd41b0eaa06a8a0516f9e0b0a5889035bf423784 (diff)
downloadcachepc-linux-9c5982fe260a28e84d167e894123dc342e76c39f.tar.gz
cachepc-linux-9c5982fe260a28e84d167e894123dc342e76c39f.zip
Merge branch 'fib-offload-notifications'
Jiri Pirko says: ==================== fib offload: switch to notifier The goal of this patchset is to allow driver to propagate all prefixes configured in kernel down HW. This is necessary for routing to work as expected. If we don't do that HW might forward prefixes known to kernel incorrectly. Take an example when default route is set in switch HW and there is an IP address set on a management (non-switch) port. Currently, only FIB entries related to the switch port netdev are offloaded using switchdev ops. This model is not extendable so the first patch introduces a replacement: notifier to propagate FIB entry additions and removals to whoever is interested. The second patch introduces couple of helpers to deal with RTNH_F_OFFLOAD flags. Currently it is set in switchdev core. There the assumption is that only one offload device exists. But for FIB notifier, we assume multiple offload devices. So the patch introduces a per FIB entry reference counter and helpers use it in order to achieve this: 0 means RTNH_F_OFFLOAD is not set, no device offloads this entry n means RTNH_F_OFFLOAD is set and the entry is offloaded by n devices Patches 3 and 4 convert mlxsw and rocker to adopt this new way, registering one notifier block for each asic instance. Both of these patches also implement internal "abort" mechanism. Using switchdev ops, "abort" is called by switchdev core whenever there is an error during FIB entry add offload. This leads to removal of all offloaded entries on system by fib_trie code. Now the new notifier assumes the driver takes care of the abort action. Here's why: 1) The fact that one HW cannot offload an entry does not mean that the others can't do it. So let only one entity to abort and leave the rest to work happily. 2) The driver knows what to in order to properly abort. For example, currently abort is broken for mlxsw, as for Spectrum there is a need to set 0.0.0.0/0 trap in RALUE register. The fifth patch removes the old, no longer used FIB offload infrastructure. The last patch reflects the changes into switchdev documentation file. --- v2->v3: -patch 3/6 -fixed offload inc/dec to be done in fib4_entry_init/fini and only in case !trap as suggested by Ido v1->v2: -patch 3/6: -fixed lpm tree setup and binding for abort and pointed out by Ido -do nexthop checks as suggested by Ido -fix use after free during abort -patch 6/6: -fixed texts as suggested by Ido ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/net/ip_fib.h49
-rw-r--r--include/net/switchdev.h40
2 files changed, 44 insertions, 45 deletions
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 7d4a72e75f33..b9314b48e39f 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -22,6 +22,7 @@
#include <net/fib_rules.h>
#include <net/inetpeer.h>
#include <linux/percpu.h>
+#include <linux/notifier.h>
struct fib_config {
u8 fc_dst_len;
@@ -122,6 +123,7 @@ struct fib_info {
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_weight;
#endif
+ unsigned int fib_offload_cnt;
struct rcu_head rcu;
struct fib_nh fib_nh[0];
#define fib_dev fib_nh[0].nh_dev
@@ -173,6 +175,18 @@ struct fib_result_nl {
__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh);
+static inline void fib_info_offload_inc(struct fib_info *fi)
+{
+ fi->fib_offload_cnt++;
+ fi->fib_flags |= RTNH_F_OFFLOAD;
+}
+
+static inline void fib_info_offload_dec(struct fib_info *fi)
+{
+ if (--fi->fib_offload_cnt == 0)
+ fi->fib_flags &= ~RTNH_F_OFFLOAD;
+}
+
#define FIB_RES_SADDR(net, res) \
((FIB_RES_NH(res).nh_saddr_genid == \
atomic_read(&(net)->ipv4.dev_addr_genid)) ? \
@@ -185,6 +199,33 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh);
#define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \
FIB_RES_SADDR(net, res))
+struct fib_notifier_info {
+ struct net *net;
+};
+
+struct fib_entry_notifier_info {
+ struct fib_notifier_info info; /* must be first */
+ u32 dst;
+ int dst_len;
+ struct fib_info *fi;
+ u8 tos;
+ u8 type;
+ u32 tb_id;
+ u32 nlflags;
+};
+
+enum fib_event_type {
+ FIB_EVENT_ENTRY_ADD,
+ FIB_EVENT_ENTRY_DEL,
+ FIB_EVENT_RULE_ADD,
+ FIB_EVENT_RULE_DEL,
+};
+
+int register_fib_notifier(struct notifier_block *nb);
+int unregister_fib_notifier(struct notifier_block *nb);
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info);
+
struct fib_table {
struct hlist_node tb_hlist;
u32 tb_id;
@@ -196,13 +237,12 @@ struct fib_table {
int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
struct fib_result *res, int fib_flags);
-int fib_table_insert(struct fib_table *, struct fib_config *);
-int fib_table_delete(struct fib_table *, struct fib_config *);
+int fib_table_insert(struct net *, struct fib_table *, struct fib_config *);
+int fib_table_delete(struct net *, struct fib_table *, struct fib_config *);
int fib_table_dump(struct fib_table *table, struct sk_buff *skb,
struct netlink_callback *cb);
-int fib_table_flush(struct fib_table *table);
+int fib_table_flush(struct net *net, struct fib_table *table);
struct fib_table *fib_trie_unmerge(struct fib_table *main_tb);
-void fib_table_flush_external(struct fib_table *table);
void fib_free_table(struct fib_table *tb);
#ifndef CONFIG_IP_MULTIPLE_TABLES
@@ -315,7 +355,6 @@ static inline int fib_num_tclassid_users(struct net *net)
}
#endif
int fib_unmerge(struct net *net);
-void fib_flush_external(struct net *net);
/* Exported by fib_semantics.c */
int ip_fib_check_default(__be32 gw, struct net_device *dev);
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 729fe1534160..eba80c4fc56f 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -68,7 +68,6 @@ struct switchdev_attr {
enum switchdev_obj_id {
SWITCHDEV_OBJ_ID_UNDEFINED,
SWITCHDEV_OBJ_ID_PORT_VLAN,
- SWITCHDEV_OBJ_ID_IPV4_FIB,
SWITCHDEV_OBJ_ID_PORT_FDB,
SWITCHDEV_OBJ_ID_PORT_MDB,
};
@@ -92,21 +91,6 @@ struct switchdev_obj_port_vlan {
#define SWITCHDEV_OBJ_PORT_VLAN(obj) \
container_of(obj, struct switchdev_obj_port_vlan, obj)
-/* SWITCHDEV_OBJ_ID_IPV4_FIB */
-struct switchdev_obj_ipv4_fib {
- struct switchdev_obj obj;
- u32 dst;
- int dst_len;
- struct fib_info *fi;
- u8 tos;
- u8 type;
- u32 nlflags;
- u32 tb_id;
-};
-
-#define SWITCHDEV_OBJ_IPV4_FIB(obj) \
- container_of(obj, struct switchdev_obj_ipv4_fib, obj)
-
/* SWITCHDEV_OBJ_ID_PORT_FDB */
struct switchdev_obj_port_fdb {
struct switchdev_obj obj;
@@ -209,11 +193,6 @@ int switchdev_port_bridge_setlink(struct net_device *dev,
struct nlmsghdr *nlh, u16 flags);
int switchdev_port_bridge_dellink(struct net_device *dev,
struct nlmsghdr *nlh, u16 flags);
-int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
- u8 tos, u8 type, u32 nlflags, u32 tb_id);
-int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
- u8 tos, u8 type, u32 tb_id);
-void switchdev_fib_ipv4_abort(struct fib_info *fi);
int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev, const unsigned char *addr,
u16 vid, u16 nlm_flags);
@@ -304,25 +283,6 @@ static inline int switchdev_port_bridge_dellink(struct net_device *dev,
return -EOPNOTSUPP;
}
-static inline int switchdev_fib_ipv4_add(u32 dst, int dst_len,
- struct fib_info *fi,
- u8 tos, u8 type,
- u32 nlflags, u32 tb_id)
-{
- return 0;
-}
-
-static inline int switchdev_fib_ipv4_del(u32 dst, int dst_len,
- struct fib_info *fi,
- u8 tos, u8 type, u32 tb_id)
-{
- return 0;
-}
-
-static inline void switchdev_fib_ipv4_abort(struct fib_info *fi)
-{
-}
-
static inline int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr,