From b83d23a2a38b1770da0491257ae81d52307f7816 Mon Sep 17 00:00:00 2001 From: Mark Gray Date: Thu, 15 Jul 2021 08:27:54 -0400 Subject: openvswitch: Introduce per-cpu upcall dispatch The Open vSwitch kernel module uses the upcall mechanism to send packets from kernel space to user space when it misses in the kernel space flow table. The upcall sends packets via a Netlink socket. Currently, a Netlink socket is created for every vport. In this way, there is a 1:1 mapping between a vport and a Netlink socket. When a packet is received by a vport, if it needs to be sent to user space, it is sent via the corresponding Netlink socket. This mechanism, with various iterations of the corresponding user space code, has seen some limitations and issues: * On systems with a large number of vports, there is a correspondingly large number of Netlink sockets which can limit scaling. (https://bugzilla.redhat.com/show_bug.cgi?id=1526306) * Packet reordering on upcalls. (https://bugzilla.redhat.com/show_bug.cgi?id=1844576) * A thundering herd issue. (https://bugzilla.redhat.com/show_bug.cgi?id=1834444) This patch introduces an alternative, feature-negotiated, upcall mode using a per-cpu dispatch rather than a per-vport dispatch. In this mode, the Netlink socket to be used for the upcall is selected based on the CPU of the thread that is executing the upcall. In this way, it resolves the issues above as: a) The number of Netlink sockets scales with the number of CPUs rather than the number of vports. b) Ordering per-flow is maintained as packets are distributed to CPUs based on mechanisms such as RSS and flows are distributed to a single user space thread. c) Packets from a flow can only wake up one user space thread. The corresponding user space code can be found at: https://mail.openvswitch.org/pipermail/ovs-dev/2021-July/385139.html Bugzilla: https://bugzilla.redhat.com/1844576 Signed-off-by: Mark Gray Acked-by: Flavio Leitner Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux/openvswitch.h') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 8d16744edc31..6571b57b2268 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -70,6 +70,8 @@ enum ovs_datapath_cmd { * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should * not be sent. + * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when + * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set. * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the * datapath. Always present in notifications. * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the @@ -87,6 +89,9 @@ enum ovs_datapath_attr { OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ OVS_DP_ATTR_PAD, OVS_DP_ATTR_MASKS_CACHE_SIZE, + OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in per-cpu + * dispatch mode + */ __OVS_DP_ATTR_MAX }; @@ -127,6 +132,9 @@ struct ovs_vport_stats { /* Allow tc offload recirc sharing */ #define OVS_DP_F_TC_RECIRC_SHARING (1 << 2) +/* Allow per-cpu dispatch of upcalls */ +#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3) + /* Fixed logical ports. */ #define OVSP_LOCAL ((__u32)0) -- cgit v1.2.3-71-gd317 From e4252cb66637b846b916cca7c2cdb4ed22ab2fc3 Mon Sep 17 00:00:00 2001 From: Mark Gray Date: Fri, 23 Jul 2021 10:24:12 -0400 Subject: openvswitch: update kdoc OVS_DP_ATTR_PER_CPU_PIDS Signed-off-by: Mark Gray Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux/openvswitch.h') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 6571b57b2268..0e436a3755f1 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -70,7 +70,7 @@ enum ovs_datapath_cmd { * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should * not be sent. - * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when + * @OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set. * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the * datapath. Always present in notifications. -- cgit v1.2.3-71-gd317 From 784dcfa56e0453bb197601ba0b8196f6f892ebcb Mon Sep 17 00:00:00 2001 From: Mark Gray Date: Fri, 23 Jul 2021 10:24:13 -0400 Subject: openvswitch: fix alignment issues Signed-off-by: Mark Gray Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 4 ++-- net/openvswitch/actions.c | 6 ++++-- net/openvswitch/datapath.c | 16 ++++++++++------ 3 files changed, 16 insertions(+), 10 deletions(-) (limited to 'include/uapi/linux/openvswitch.h') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 0e436a3755f1..150bcff49b1c 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -89,8 +89,8 @@ enum ovs_datapath_attr { OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ OVS_DP_ATTR_PAD, OVS_DP_ATTR_MASKS_CACHE_SIZE, - OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in per-cpu - * dispatch mode + OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in + * per-cpu dispatch mode */ __OVS_DP_ATTR_MAX }; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index f79679746c62..076774034bb9 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -924,9 +924,11 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, break; case OVS_USERSPACE_ATTR_PID: - if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) + if (dp->user_features & + OVS_DP_F_DISPATCH_UPCALL_PER_CPU) upcall.portid = - ovs_dp_get_upcall_portid(dp, smp_processor_id()); + ovs_dp_get_upcall_portid(dp, + smp_processor_id()); else upcall.portid = nla_get_u32(a); break; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 7a4edafdc685..e6f0ae5618dd 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -244,7 +244,8 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) upcall.cmd = OVS_PACKET_CMD_MISS; if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) - upcall.portid = ovs_dp_get_upcall_portid(dp, smp_processor_id()); + upcall.portid = + ovs_dp_get_upcall_portid(dp, smp_processor_id()); else upcall.portid = ovs_vport_find_upcall_portid(p, skb); @@ -1636,13 +1637,16 @@ u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id) if (dp_nlsk_pids) { if (cpu_id < dp_nlsk_pids->n_pids) { return dp_nlsk_pids->pids[cpu_id]; - } else if (dp_nlsk_pids->n_pids > 0 && cpu_id >= dp_nlsk_pids->n_pids) { - /* If the number of netlink PIDs is mismatched with the number of - * CPUs as seen by the kernel, log this and send the upcall to an - * arbitrary socket (0) in order to not drop packets + } else if (dp_nlsk_pids->n_pids > 0 && + cpu_id >= dp_nlsk_pids->n_pids) { + /* If the number of netlink PIDs is mismatched with + * the number of CPUs as seen by the kernel, log this + * and send the upcall to an arbitrary socket (0) in + * order to not drop packets */ pr_info_ratelimited("cpu_id mismatch with handler threads"); - return dp_nlsk_pids->pids[cpu_id % dp_nlsk_pids->n_pids]; + return dp_nlsk_pids->pids[cpu_id % + dp_nlsk_pids->n_pids]; } else { return 0; } -- cgit v1.2.3-71-gd317