summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-01-09 18:41:42 -0800
committerDavid S. Miller <davem@davemloft.net>2020-01-09 18:41:42 -0800
commit9f120e7659b022c790416a3409dfdb54b4a775d5 (patch)
tree92e71bf4a33294a1d0dc22f9f9529a105f6a381a /include
parent6b3acfc3cc3d54a99cc5148960edfc38c94a93f2 (diff)
parent8b69a803814bb8b14155ea60df83f6d57527e69e (diff)
downloadcachepc-linux-9f120e7659b022c790416a3409dfdb54b4a775d5.tar.gz
cachepc-linux-9f120e7659b022c790416a3409dfdb54b4a775d5.zip
Merge branch 'mptcp-prereq'
Mat Martineau says: ==================== Multipath TCP: Prerequisites v6 -> v7: Rename/move ULP clone helper to make inline-friendly (patch 5) v5 -> v6: Fix BPF accessors for sk_type and sk_protocol (patch 2), fix the width of an __unused bitfield (patch 6), and add some commit message and comment text (patches 5 & 7). v4 -> v5: Cover letter subject fix. No changes to commits. v3 -> v4: Update coalesce/collapse of incoming MPTCP skbs (patch 7) v2 -> v3: Ensure sk_type alignment in struct sock (patch 2) v1 -> v2: sk_pacing_shift left as a regular struct member (patch 2), and modified SACK space check based on recent -net fix (patch 9). The MPTCP upstreaming community has been collaborating on an upstreamable MPTCP implementation that complies with RFC 8684. A minimal set of features to comply with the specification involves a sizeable set of code changes, so David requested that we split this work in to multiple, smaller patch sets to build up MPTCP infrastructure. The minimal MPTCP feature set we are proposing for review in the v5.6 timeframe begins with these three parts: Part 1 (this patch set): MPTCP prerequisites. Introduce some MPTCP definitions, additional ULP and skb extension features, TCP option space checking, and a few exported symbols. Part 2: Single subflow implementation and self tests. Part 3: Switch from MPTCP v0 (RFC 6824) to MPTCP v1 (new RFC 8684, publication expected in the next few days). Additional patches for multiple subflow support, path management, active backup, and other features are in the pipeline for submission after making progress with the above reviews. Clone/fetch: https://github.com/multipath-tcp/mptcp_net-next.git (tag: netdev-v7-part1) Browse: https://github.com/multipath-tcp/mptcp_net-next/tree/netdev-v7-part1 Thank you for your review. You can find us at mptcp@lists.01.org and https://is.gd/mptcp_upstream ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/skbuff.h6
-rw-r--r--include/net/mptcp.h85
-rw-r--r--include/net/sock.h27
-rw-r--r--include/net/tcp.h20
-rw-r--r--include/trace/events/sock.h5
-rw-r--r--include/uapi/linux/in.h2
6 files changed, 122 insertions, 23 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 64e5b1be9ff5..016b3c4ab99a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4097,6 +4097,9 @@ enum skb_ext_id {
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
TC_SKB_EXT,
#endif
+#if IS_ENABLED(CONFIG_MPTCP)
+ SKB_EXT_MPTCP,
+#endif
SKB_EXT_NUM, /* must be last */
};
@@ -4117,6 +4120,9 @@ struct skb_ext {
char data[0] __aligned(8);
};
+struct skb_ext *__skb_ext_alloc(void);
+void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id,
+ struct skb_ext *ext);
void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id);
void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id);
void __skb_ext_put(struct skb_ext *ext);
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
new file mode 100644
index 000000000000..0573ae75c3db
--- /dev/null
+++ b/include/net/mptcp.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Multipath TCP
+ *
+ * Copyright (c) 2017 - 2019, Intel Corporation.
+ */
+
+#ifndef __NET_MPTCP_H
+#define __NET_MPTCP_H
+
+#include <linux/skbuff.h>
+#include <linux/types.h>
+
+/* MPTCP sk_buff extension data */
+struct mptcp_ext {
+ u64 data_ack;
+ u64 data_seq;
+ u32 subflow_seq;
+ u16 data_len;
+ u8 use_map:1,
+ dsn64:1,
+ data_fin:1,
+ use_ack:1,
+ ack64:1,
+ __unused:3;
+ /* one byte hole */
+};
+
+#ifdef CONFIG_MPTCP
+
+/* move the skb extension owership, with the assumption that 'to' is
+ * newly allocated
+ */
+static inline void mptcp_skb_ext_move(struct sk_buff *to,
+ struct sk_buff *from)
+{
+ if (!skb_ext_exist(from, SKB_EXT_MPTCP))
+ return;
+
+ if (WARN_ON_ONCE(to->active_extensions))
+ skb_ext_put(to);
+
+ to->active_extensions = from->active_extensions;
+ to->extensions = from->extensions;
+ from->active_extensions = 0;
+}
+
+static inline bool mptcp_ext_matches(const struct mptcp_ext *to_ext,
+ const struct mptcp_ext *from_ext)
+{
+ /* MPTCP always clears the ext when adding it to the skb, so
+ * holes do not bother us here
+ */
+ return !from_ext ||
+ (to_ext && from_ext &&
+ !memcmp(from_ext, to_ext, sizeof(struct mptcp_ext)));
+}
+
+/* check if skbs can be collapsed.
+ * MPTCP collapse is allowed if neither @to or @from carry an mptcp data
+ * mapping, or if the extension of @to is the same as @from.
+ * Collapsing is not possible if @to lacks an extension, but @from carries one.
+ */
+static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
+ const struct sk_buff *from)
+{
+ return mptcp_ext_matches(skb_ext_find(to, SKB_EXT_MPTCP),
+ skb_ext_find(from, SKB_EXT_MPTCP));
+}
+
+#else
+
+static inline void mptcp_skb_ext_move(struct sk_buff *to,
+ const struct sk_buff *from)
+{
+}
+
+static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
+ const struct sk_buff *from)
+{
+ return true;
+}
+
+#endif /* CONFIG_MPTCP */
+#endif /* __NET_MPTCP_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 8dff68b4c316..432ff73d20f3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -436,31 +436,15 @@ struct sock {
* Because of non atomicity rules, all
* changes are protected by socket lock.
*/
- unsigned int __sk_flags_offset[0];
-#ifdef __BIG_ENDIAN_BITFIELD
-#define SK_FL_PROTO_SHIFT 16
-#define SK_FL_PROTO_MASK 0x00ff0000
-
-#define SK_FL_TYPE_SHIFT 0
-#define SK_FL_TYPE_MASK 0x0000ffff
-#else
-#define SK_FL_PROTO_SHIFT 8
-#define SK_FL_PROTO_MASK 0x0000ff00
-
-#define SK_FL_TYPE_SHIFT 16
-#define SK_FL_TYPE_MASK 0xffff0000
-#endif
-
- unsigned int sk_padding : 1,
+ u8 sk_padding : 1,
sk_kern_sock : 1,
sk_no_check_tx : 1,
sk_no_check_rx : 1,
- sk_userlocks : 4,
- sk_protocol : 8,
- sk_type : 16;
-#define SK_PROTOCOL_MAX U8_MAX
- u16 sk_gso_max_segs;
+ sk_userlocks : 4;
u8 sk_pacing_shift;
+ u16 sk_type;
+ u16 sk_protocol;
+ u16 sk_gso_max_segs;
unsigned long sk_lingertime;
struct proto *sk_prot_creator;
rwlock_t sk_callback_lock;
@@ -1480,6 +1464,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
sk_mem_uncharge(sk, skb->truesize);
if (static_branch_unlikely(&tcp_tx_skb_cache_key) &&
!sk->sk_tx_skb_cache && !skb_cloned(skb)) {
+ skb_ext_reset(skb);
skb_zcopy_clear(skb, true);
sk->sk_tx_skb_cache = skb;
return;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7df37e2fddca..5e4133d09b9d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -39,6 +39,7 @@
#include <net/tcp_states.h>
#include <net/inet_ecn.h>
#include <net/dst.h>
+#include <net/mptcp.h>
#include <linux/seq_file.h>
#include <linux/memcontrol.h>
@@ -182,6 +183,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOPT_SACK 5 /* SACK Block */
#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
+#define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */
#define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */
#define TCPOPT_EXP 254 /* Experimental */
/* Magic number to be after the option value for sharing TCP
@@ -328,6 +330,9 @@ int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags);
ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
size_t size, int flags);
+int tcp_send_mss(struct sock *sk, int *size_goal, int flags);
+void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,
+ int size_goal);
void tcp_release_cb(struct sock *sk);
void tcp_wfree(struct sk_buff *skb);
void tcp_write_timer_handler(struct sock *sk);
@@ -977,6 +982,13 @@ static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb)
return likely(!TCP_SKB_CB(skb)->eor);
}
+static inline bool tcp_skb_can_collapse(const struct sk_buff *to,
+ const struct sk_buff *from)
+{
+ return likely(tcp_skb_can_collapse_to(to) &&
+ mptcp_skb_can_collapse(to, from));
+}
+
/* Events passed to congestion control interface */
enum tcp_ca_event {
CA_EVENT_TX_START, /* first transmit when no packets in flight */
@@ -2002,6 +2014,11 @@ struct tcp_request_sock_ops {
enum tcp_synack_type synack_type);
};
+extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops;
+#if IS_ENABLED(CONFIG_IPV6)
+extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops;
+#endif
+
#ifdef CONFIG_SYN_COOKIES
static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
const struct sock *sk, struct sk_buff *skb,
@@ -2153,6 +2170,9 @@ struct tcp_ulp_ops {
/* diagnostic */
int (*get_info)(const struct sock *sk, struct sk_buff *skb);
size_t (*get_info_size)(const struct sock *sk);
+ /* clone ulp */
+ void (*clone)(const struct request_sock *req, struct sock *newsk,
+ const gfp_t priority);
char name[TCP_ULP_NAME_MAX];
struct module *owner;
diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
index 51fe9f6719eb..a966d4b5ab37 100644
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -19,7 +19,8 @@
#define inet_protocol_names \
EM(IPPROTO_TCP) \
EM(IPPROTO_DCCP) \
- EMe(IPPROTO_SCTP)
+ EM(IPPROTO_SCTP) \
+ EMe(IPPROTO_MPTCP)
#define tcp_state_names \
EM(TCP_ESTABLISHED) \
@@ -147,7 +148,7 @@ TRACE_EVENT(inet_sock_set_state,
__field(__u16, sport)
__field(__u16, dport)
__field(__u16, family)
- __field(__u8, protocol)
+ __field(__u16, protocol)
__array(__u8, saddr, 4)
__array(__u8, daddr, 4)
__array(__u8, saddr_v6, 16)
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index e7ad9d350a28..1521073b6348 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -76,6 +76,8 @@ enum {
#define IPPROTO_MPLS IPPROTO_MPLS
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
+ IPPROTO_MPTCP = 262, /* Multipath TCP connection */
+#define IPPROTO_MPTCP IPPROTO_MPTCP
IPPROTO_MAX
};
#endif