cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

protocol.h (30823B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2/* Multipath TCP
      3 *
      4 * Copyright (c) 2017 - 2019, Intel Corporation.
      5 */
      6
      7#ifndef __MPTCP_PROTOCOL_H
      8#define __MPTCP_PROTOCOL_H
      9
     10#include <linux/random.h>
     11#include <net/tcp.h>
     12#include <net/inet_connection_sock.h>
     13#include <uapi/linux/mptcp.h>
     14#include <net/genetlink.h>
     15
     16#define MPTCP_SUPPORTED_VERSION	1
     17
     18/* MPTCP option bits */
     19#define OPTION_MPTCP_MPC_SYN	BIT(0)
     20#define OPTION_MPTCP_MPC_SYNACK	BIT(1)
     21#define OPTION_MPTCP_MPC_ACK	BIT(2)
     22#define OPTION_MPTCP_MPJ_SYN	BIT(3)
     23#define OPTION_MPTCP_MPJ_SYNACK	BIT(4)
     24#define OPTION_MPTCP_MPJ_ACK	BIT(5)
     25#define OPTION_MPTCP_ADD_ADDR	BIT(6)
     26#define OPTION_MPTCP_RM_ADDR	BIT(7)
     27#define OPTION_MPTCP_FASTCLOSE	BIT(8)
     28#define OPTION_MPTCP_PRIO	BIT(9)
     29#define OPTION_MPTCP_RST	BIT(10)
     30#define OPTION_MPTCP_DSS	BIT(11)
     31#define OPTION_MPTCP_FAIL	BIT(12)
     32
     33#define OPTION_MPTCP_CSUMREQD	BIT(13)
     34
     35#define OPTIONS_MPTCP_MPC	(OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | \
     36				 OPTION_MPTCP_MPC_ACK)
     37#define OPTIONS_MPTCP_MPJ	(OPTION_MPTCP_MPJ_SYN | OPTION_MPTCP_MPJ_SYNACK | \
     38				 OPTION_MPTCP_MPJ_ACK)
     39
     40/* MPTCP option subtypes */
     41#define MPTCPOPT_MP_CAPABLE	0
     42#define MPTCPOPT_MP_JOIN	1
     43#define MPTCPOPT_DSS		2
     44#define MPTCPOPT_ADD_ADDR	3
     45#define MPTCPOPT_RM_ADDR	4
     46#define MPTCPOPT_MP_PRIO	5
     47#define MPTCPOPT_MP_FAIL	6
     48#define MPTCPOPT_MP_FASTCLOSE	7
     49#define MPTCPOPT_RST		8
     50
     51/* MPTCP suboption lengths */
     52#define TCPOLEN_MPTCP_MPC_SYN		4
     53#define TCPOLEN_MPTCP_MPC_SYNACK	12
     54#define TCPOLEN_MPTCP_MPC_ACK		20
     55#define TCPOLEN_MPTCP_MPC_ACK_DATA	22
     56#define TCPOLEN_MPTCP_MPJ_SYN		12
     57#define TCPOLEN_MPTCP_MPJ_SYNACK	16
     58#define TCPOLEN_MPTCP_MPJ_ACK		24
     59#define TCPOLEN_MPTCP_DSS_BASE		4
     60#define TCPOLEN_MPTCP_DSS_ACK32		4
     61#define TCPOLEN_MPTCP_DSS_ACK64		8
     62#define TCPOLEN_MPTCP_DSS_MAP32		10
     63#define TCPOLEN_MPTCP_DSS_MAP64		14
     64#define TCPOLEN_MPTCP_DSS_CHECKSUM	2
     65#define TCPOLEN_MPTCP_ADD_ADDR		16
     66#define TCPOLEN_MPTCP_ADD_ADDR_PORT	18
     67#define TCPOLEN_MPTCP_ADD_ADDR_BASE	8
     68#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT	10
     69#define TCPOLEN_MPTCP_ADD_ADDR6		28
     70#define TCPOLEN_MPTCP_ADD_ADDR6_PORT	30
     71#define TCPOLEN_MPTCP_ADD_ADDR6_BASE	20
     72#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT	22
     73#define TCPOLEN_MPTCP_PORT_LEN		2
     74#define TCPOLEN_MPTCP_PORT_ALIGN	2
     75#define TCPOLEN_MPTCP_RM_ADDR_BASE	3
     76#define TCPOLEN_MPTCP_PRIO		3
     77#define TCPOLEN_MPTCP_PRIO_ALIGN	4
     78#define TCPOLEN_MPTCP_FASTCLOSE		12
     79#define TCPOLEN_MPTCP_RST		4
     80#define TCPOLEN_MPTCP_FAIL		12
     81
     82#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM	(TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
     83
     84/* MPTCP MP_JOIN flags */
     85#define MPTCPOPT_BACKUP		BIT(0)
     86#define MPTCPOPT_HMAC_LEN	20
     87#define MPTCPOPT_THMAC_LEN	8
     88
     89/* MPTCP MP_CAPABLE flags */
     90#define MPTCP_VERSION_MASK	(0x0F)
     91#define MPTCP_CAP_CHECKSUM_REQD	BIT(7)
     92#define MPTCP_CAP_EXTENSIBILITY	BIT(6)
     93#define MPTCP_CAP_DENY_JOIN_ID0	BIT(5)
     94#define MPTCP_CAP_HMAC_SHA256	BIT(0)
     95#define MPTCP_CAP_FLAG_MASK	(0x1F)
     96
     97/* MPTCP DSS flags */
     98#define MPTCP_DSS_DATA_FIN	BIT(4)
     99#define MPTCP_DSS_DSN64		BIT(3)
    100#define MPTCP_DSS_HAS_MAP	BIT(2)
    101#define MPTCP_DSS_ACK64		BIT(1)
    102#define MPTCP_DSS_HAS_ACK	BIT(0)
    103#define MPTCP_DSS_FLAG_MASK	(0x1F)
    104
    105/* MPTCP ADD_ADDR flags */
    106#define MPTCP_ADDR_ECHO		BIT(0)
    107
    108/* MPTCP MP_PRIO flags */
    109#define MPTCP_PRIO_BKUP		BIT(0)
    110
    111/* MPTCP TCPRST flags */
    112#define MPTCP_RST_TRANSIENT	BIT(0)
    113
    114/* MPTCP socket atomic flags */
    115#define MPTCP_NOSPACE		1
    116#define MPTCP_WORK_RTX		2
    117#define MPTCP_WORK_EOF		3
    118#define MPTCP_FALLBACK_DONE	4
    119#define MPTCP_WORK_CLOSE_SUBFLOW 5
    120
    121/* MPTCP socket release cb flags */
    122#define MPTCP_PUSH_PENDING	1
    123#define MPTCP_CLEAN_UNA		2
    124#define MPTCP_ERROR_REPORT	3
    125#define MPTCP_RETRANSMIT	4
    126#define MPTCP_FLUSH_JOIN_LIST	5
    127#define MPTCP_CONNECTED		6
    128#define MPTCP_RESET_SCHEDULER	7
    129
    130static inline bool before64(__u64 seq1, __u64 seq2)
    131{
    132	return (__s64)(seq1 - seq2) < 0;
    133}
    134
    135#define after64(seq2, seq1)	before64(seq1, seq2)
    136
    137struct mptcp_options_received {
    138	u64	sndr_key;
    139	u64	rcvr_key;
    140	u64	data_ack;
    141	u64	data_seq;
    142	u32	subflow_seq;
    143	u16	data_len;
    144	__sum16	csum;
    145	u16	suboptions;
    146	u32	token;
    147	u32	nonce;
    148	u16	use_map:1,
    149		dsn64:1,
    150		data_fin:1,
    151		use_ack:1,
    152		ack64:1,
    153		mpc_map:1,
    154		reset_reason:4,
    155		reset_transient:1,
    156		echo:1,
    157		backup:1,
    158		deny_join_id0:1,
    159		__unused:2;
    160	u8	join_id;
    161	u64	thmac;
    162	u8	hmac[MPTCPOPT_HMAC_LEN];
    163	struct mptcp_addr_info addr;
    164	struct mptcp_rm_list rm_list;
    165	u64	ahmac;
    166	u64	fail_seq;
    167};
    168
    169static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
    170{
    171	return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
    172		     ((nib & 0xF) << 8) | field);
    173}
    174
    175enum mptcp_pm_status {
    176	MPTCP_PM_ADD_ADDR_RECEIVED,
    177	MPTCP_PM_ADD_ADDR_SEND_ACK,
    178	MPTCP_PM_RM_ADDR_RECEIVED,
    179	MPTCP_PM_ESTABLISHED,
    180	MPTCP_PM_SUBFLOW_ESTABLISHED,
    181	MPTCP_PM_ALREADY_ESTABLISHED,	/* persistent status, set after ESTABLISHED event */
    182	MPTCP_PM_MPC_ENDPOINT_ACCOUNTED /* persistent status, set after MPC local address is
    183					 * accounted int id_avail_bitmap
    184					 */
    185};
    186
    187enum mptcp_pm_type {
    188	MPTCP_PM_TYPE_KERNEL = 0,
    189	MPTCP_PM_TYPE_USERSPACE,
    190
    191	__MPTCP_PM_TYPE_NR,
    192	__MPTCP_PM_TYPE_MAX = __MPTCP_PM_TYPE_NR - 1,
    193};
    194
    195/* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */
    196#define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1)
    197
    198enum mptcp_addr_signal_status {
    199	MPTCP_ADD_ADDR_SIGNAL,
    200	MPTCP_ADD_ADDR_ECHO,
    201	MPTCP_RM_ADDR_SIGNAL,
    202};
    203
    204/* max value of mptcp_addr_info.id */
    205#define MPTCP_PM_MAX_ADDR_ID		U8_MAX
    206
    207struct mptcp_pm_data {
    208	struct mptcp_addr_info local;
    209	struct mptcp_addr_info remote;
    210	struct list_head anno_list;
    211	struct list_head userspace_pm_local_addr_list;
    212
    213	spinlock_t	lock;		/*protects the whole PM data */
    214
    215	u8		addr_signal;
    216	bool		server_side;
    217	bool		work_pending;
    218	bool		accept_addr;
    219	bool		accept_subflow;
    220	bool		remote_deny_join_id0;
    221	u8		add_addr_signaled;
    222	u8		add_addr_accepted;
    223	u8		local_addr_used;
    224	u8		pm_type;
    225	u8		subflows;
    226	u8		status;
    227	DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
    228	struct mptcp_rm_list rm_list_tx;
    229	struct mptcp_rm_list rm_list_rx;
    230};
    231
    232struct mptcp_pm_addr_entry {
    233	struct list_head	list;
    234	struct mptcp_addr_info	addr;
    235	u8			flags;
    236	int			ifindex;
    237	struct socket		*lsk;
    238};
    239
    240struct mptcp_data_frag {
    241	struct list_head list;
    242	u64 data_seq;
    243	u16 data_len;
    244	u16 offset;
    245	u16 overhead;
    246	u16 already_sent;
    247	struct page *page;
    248};
    249
    250/* MPTCP connection sock */
    251struct mptcp_sock {
    252	/* inet_connection_sock must be the first member */
    253	struct inet_connection_sock sk;
    254	u64		local_key;
    255	u64		remote_key;
    256	u64		write_seq;
    257	u64		snd_nxt;
    258	u64		ack_seq;
    259	atomic64_t	rcv_wnd_sent;
    260	u64		rcv_data_fin_seq;
    261	int		rmem_fwd_alloc;
    262	struct sock	*last_snd;
    263	int		snd_burst;
    264	int		old_wspace;
    265	u64		recovery_snd_nxt;	/* in recovery mode accept up to this seq;
    266						 * recovery related fields are under data_lock
    267						 * protection
    268						 */
    269	u64		snd_una;
    270	u64		wnd_end;
    271	unsigned long	timer_ival;
    272	u32		token;
    273	int		rmem_released;
    274	unsigned long	flags;
    275	unsigned long	cb_flags;
    276	unsigned long	push_pending;
    277	bool		recovery;		/* closing subflow write queue reinjected */
    278	bool		can_ack;
    279	bool		fully_established;
    280	bool		rcv_data_fin;
    281	bool		snd_data_fin_enable;
    282	bool		rcv_fastclose;
    283	bool		use_64bit_ack; /* Set when we received a 64-bit DSN */
    284	bool		csum_enabled;
    285	bool		allow_infinite_fallback;
    286	u8		recvmsg_inq:1,
    287			cork:1,
    288			nodelay:1;
    289	struct work_struct work;
    290	struct sk_buff  *ooo_last_skb;
    291	struct rb_root  out_of_order_queue;
    292	struct sk_buff_head receive_queue;
    293	struct list_head conn_list;
    294	struct list_head rtx_queue;
    295	struct mptcp_data_frag *first_pending;
    296	struct list_head join_list;
    297	struct socket	*subflow; /* outgoing connect/listener/!mp_capable */
    298	struct sock	*first;
    299	struct mptcp_pm_data	pm;
    300	struct {
    301		u32	space;	/* bytes copied in last measurement window */
    302		u32	copied; /* bytes copied in this measurement window */
    303		u64	time;	/* start time of measurement window */
    304		u64	rtt_us; /* last maximum rtt of subflows */
    305	} rcvq_space;
    306
    307	u32 setsockopt_seq;
    308	char		ca_name[TCP_CA_NAME_MAX];
    309	struct mptcp_sock	*dl_next;
    310};
    311
    312#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
    313#define mptcp_data_unlock(sk) spin_unlock_bh(&(sk)->sk_lock.slock)
    314
    315#define mptcp_for_each_subflow(__msk, __subflow)			\
    316	list_for_each_entry(__subflow, &((__msk)->conn_list), node)
    317
    318static inline void msk_owned_by_me(const struct mptcp_sock *msk)
    319{
    320	sock_owned_by_me((const struct sock *)msk);
    321}
    322
    323static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
    324{
    325	return (struct mptcp_sock *)sk;
    326}
    327
    328/* the msk socket don't use the backlog, also account for the bulk
    329 * free memory
    330 */
    331static inline int __mptcp_rmem(const struct sock *sk)
    332{
    333	return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released);
    334}
    335
    336static inline int __mptcp_space(const struct sock *sk)
    337{
    338	return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
    339}
    340
    341static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
    342{
    343	const struct mptcp_sock *msk = mptcp_sk(sk);
    344
    345	return READ_ONCE(msk->first_pending);
    346}
    347
    348static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk)
    349{
    350	struct mptcp_sock *msk = mptcp_sk(sk);
    351	struct mptcp_data_frag *cur;
    352
    353	cur = msk->first_pending;
    354	return list_is_last(&cur->list, &msk->rtx_queue) ? NULL :
    355						     list_next_entry(cur, list);
    356}
    357
    358static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
    359{
    360	struct mptcp_sock *msk = mptcp_sk(sk);
    361
    362	if (!msk->first_pending)
    363		return NULL;
    364
    365	if (WARN_ON_ONCE(list_empty(&msk->rtx_queue)))
    366		return NULL;
    367
    368	return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
    369}
    370
    371static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
    372{
    373	struct mptcp_sock *msk = mptcp_sk(sk);
    374
    375	if (msk->snd_una == READ_ONCE(msk->snd_nxt))
    376		return NULL;
    377
    378	return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
    379}
    380
    381struct csum_pseudo_header {
    382	__be64 data_seq;
    383	__be32 subflow_seq;
    384	__be16 data_len;
    385	__sum16 csum;
    386};
    387
    388struct mptcp_subflow_request_sock {
    389	struct	tcp_request_sock sk;
    390	u16	mp_capable : 1,
    391		mp_join : 1,
    392		backup : 1,
    393		csum_reqd : 1,
    394		allow_join_id0 : 1;
    395	u8	local_id;
    396	u8	remote_id;
    397	u64	local_key;
    398	u64	idsn;
    399	u32	token;
    400	u32	ssn_offset;
    401	u64	thmac;
    402	u32	local_nonce;
    403	u32	remote_nonce;
    404	struct mptcp_sock	*msk;
    405	struct hlist_nulls_node token_node;
    406};
    407
    408static inline struct mptcp_subflow_request_sock *
    409mptcp_subflow_rsk(const struct request_sock *rsk)
    410{
    411	return (struct mptcp_subflow_request_sock *)rsk;
    412}
    413
    414enum mptcp_data_avail {
    415	MPTCP_SUBFLOW_NODATA,
    416	MPTCP_SUBFLOW_DATA_AVAIL,
    417};
    418
    419struct mptcp_delegated_action {
    420	struct napi_struct napi;
    421	struct list_head head;
    422};
    423
    424DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
    425
    426#define MPTCP_DELEGATE_SEND		0
    427#define MPTCP_DELEGATE_ACK		1
    428
    429/* MPTCP subflow context */
    430struct mptcp_subflow_context {
    431	struct	list_head node;/* conn_list of subflows */
    432
    433	struct_group(reset,
    434
    435	unsigned long avg_pacing_rate; /* protected by msk socket lock */
    436	u64	local_key;
    437	u64	remote_key;
    438	u64	idsn;
    439	u64	map_seq;
    440	u32	snd_isn;
    441	u32	token;
    442	u32	rel_write_seq;
    443	u32	map_subflow_seq;
    444	u32	ssn_offset;
    445	u32	map_data_len;
    446	__wsum	map_data_csum;
    447	u32	map_csum_len;
    448	u32	request_mptcp : 1,  /* send MP_CAPABLE */
    449		request_join : 1,   /* send MP_JOIN */
    450		request_bkup : 1,
    451		mp_capable : 1,	    /* remote is MPTCP capable */
    452		mp_join : 1,	    /* remote is JOINing */
    453		fully_established : 1,	    /* path validated */
    454		pm_notified : 1,    /* PM hook called for established status */
    455		conn_finished : 1,
    456		map_valid : 1,
    457		map_csum_reqd : 1,
    458		map_data_fin : 1,
    459		mpc_map : 1,
    460		backup : 1,
    461		send_mp_prio : 1,
    462		send_mp_fail : 1,
    463		send_fastclose : 1,
    464		send_infinite_map : 1,
    465		rx_eof : 1,
    466		can_ack : 1,        /* only after processing the remote a key */
    467		disposable : 1,	    /* ctx can be free at ulp release time */
    468		stale : 1,	    /* unable to snd/rcv data, do not use for xmit */
    469		local_id_valid : 1, /* local_id is correctly initialized */
    470		valid_csum_seen : 1;        /* at least one csum validated */
    471	enum mptcp_data_avail data_avail;
    472	u32	remote_nonce;
    473	u64	thmac;
    474	u32	local_nonce;
    475	u32	remote_token;
    476	u8	hmac[MPTCPOPT_HMAC_LEN];
    477	u8	local_id;
    478	u8	remote_id;
    479	u8	reset_seen:1;
    480	u8	reset_transient:1;
    481	u8	reset_reason:4;
    482	u8	stale_count;
    483
    484	long	delegated_status;
    485	unsigned long	fail_tout;
    486
    487	);
    488
    489	struct	list_head delegated_node;   /* link into delegated_action, protected by local BH */
    490
    491	u32	setsockopt_seq;
    492	u32	stale_rcv_tstamp;
    493
    494	struct	sock *tcp_sock;	    /* tcp sk backpointer */
    495	struct	sock *conn;	    /* parent mptcp_sock */
    496	const	struct inet_connection_sock_af_ops *icsk_af_ops;
    497	void	(*tcp_state_change)(struct sock *sk);
    498	void	(*tcp_error_report)(struct sock *sk);
    499
    500	struct	rcu_head rcu;
    501};
    502
    503static inline struct mptcp_subflow_context *
    504mptcp_subflow_ctx(const struct sock *sk)
    505{
    506	struct inet_connection_sock *icsk = inet_csk(sk);
    507
    508	/* Use RCU on icsk_ulp_data only for sock diag code */
    509	return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data;
    510}
    511
    512static inline struct sock *
    513mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
    514{
    515	return subflow->tcp_sock;
    516}
    517
    518static inline void
    519mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
    520{
    521	memset(&subflow->reset, 0, sizeof(subflow->reset));
    522	subflow->request_mptcp = 1;
    523}
    524
    525static inline u64
    526mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow)
    527{
    528	return tcp_sk(mptcp_subflow_tcp_sock(subflow))->copied_seq -
    529		      subflow->ssn_offset -
    530		      subflow->map_subflow_seq;
    531}
    532
    533static inline u64
    534mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
    535{
    536	return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
    537}
    538
    539void mptcp_subflow_process_delegated(struct sock *ssk);
    540
    541static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action)
    542{
    543	struct mptcp_delegated_action *delegated;
    544	bool schedule;
    545
    546	/* the caller held the subflow bh socket lock */
    547	lockdep_assert_in_softirq();
    548
    549	/* The implied barrier pairs with mptcp_subflow_delegated_done(), and
    550	 * ensures the below list check sees list updates done prior to status
    551	 * bit changes
    552	 */
    553	if (!test_and_set_bit(action, &subflow->delegated_status)) {
    554		/* still on delegated list from previous scheduling */
    555		if (!list_empty(&subflow->delegated_node))
    556			return;
    557
    558		delegated = this_cpu_ptr(&mptcp_delegated_actions);
    559		schedule = list_empty(&delegated->head);
    560		list_add_tail(&subflow->delegated_node, &delegated->head);
    561		sock_hold(mptcp_subflow_tcp_sock(subflow));
    562		if (schedule)
    563			napi_schedule(&delegated->napi);
    564	}
    565}
    566
    567static inline struct mptcp_subflow_context *
    568mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)
    569{
    570	struct mptcp_subflow_context *ret;
    571
    572	if (list_empty(&delegated->head))
    573		return NULL;
    574
    575	ret = list_first_entry(&delegated->head, struct mptcp_subflow_context, delegated_node);
    576	list_del_init(&ret->delegated_node);
    577	return ret;
    578}
    579
    580static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow)
    581{
    582	return !!READ_ONCE(subflow->delegated_status);
    583}
    584
    585static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow, int action)
    586{
    587	/* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before
    588	 * touching the status bit
    589	 */
    590	smp_wmb();
    591	clear_bit(action, &subflow->delegated_status);
    592}
    593
    594int mptcp_is_enabled(const struct net *net);
    595unsigned int mptcp_get_add_addr_timeout(const struct net *net);
    596int mptcp_is_checksum_enabled(const struct net *net);
    597int mptcp_allow_join_id0(const struct net *net);
    598unsigned int mptcp_stale_loss_cnt(const struct net *net);
    599int mptcp_get_pm_type(const struct net *net);
    600void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
    601				     struct mptcp_options_received *mp_opt);
    602bool __mptcp_retransmit_pending_data(struct sock *sk);
    603void mptcp_check_and_set_pending(struct sock *sk);
    604void __mptcp_push_pending(struct sock *sk, unsigned int flags);
    605bool mptcp_subflow_data_available(struct sock *sk);
    606void __init mptcp_subflow_init(void);
    607void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
    608void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
    609		     struct mptcp_subflow_context *subflow);
    610void __mptcp_subflow_send_ack(struct sock *ssk);
    611void mptcp_subflow_send_ack(struct sock *ssk);
    612void mptcp_subflow_reset(struct sock *ssk);
    613void mptcp_subflow_queue_clean(struct sock *ssk);
    614void mptcp_sock_graft(struct sock *sk, struct socket *parent);
    615struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
    616
    617bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
    618			   const struct mptcp_addr_info *b, bool use_port);
    619
    620/* called with sk socket lock held */
    621int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
    622			    const struct mptcp_addr_info *remote);
    623int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
    624void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
    625			 struct sockaddr_storage *addr,
    626			 unsigned short family);
    627
    628static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
    629{
    630	struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
    631
    632	/* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */
    633	if (subflow->request_join && !subflow->fully_established)
    634		return false;
    635
    636	/* only send if our side has not closed yet */
    637	return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT));
    638}
    639
    640void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow);
    641
    642bool mptcp_subflow_active(struct mptcp_subflow_context *subflow);
    643
    644static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
    645					      struct mptcp_subflow_context *ctx)
    646{
    647	sk->sk_data_ready = sock_def_readable;
    648	sk->sk_state_change = ctx->tcp_state_change;
    649	sk->sk_write_space = sk_stream_write_space;
    650	sk->sk_error_report = ctx->tcp_error_report;
    651
    652	inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
    653}
    654
    655void __init mptcp_proto_init(void);
    656#if IS_ENABLED(CONFIG_MPTCP_IPV6)
    657int __init mptcp_proto_v6_init(void);
    658#endif
    659
    660struct sock *mptcp_sk_clone(const struct sock *sk,
    661			    const struct mptcp_options_received *mp_opt,
    662			    struct request_sock *req);
    663void mptcp_get_options(const struct sk_buff *skb,
    664		       struct mptcp_options_received *mp_opt);
    665
    666void mptcp_finish_connect(struct sock *sk);
    667void __mptcp_set_connected(struct sock *sk);
    668void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout);
    669static inline bool mptcp_is_fully_established(struct sock *sk)
    670{
    671	return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
    672	       READ_ONCE(mptcp_sk(sk)->fully_established);
    673}
    674void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
    675void mptcp_data_ready(struct sock *sk, struct sock *ssk);
    676bool mptcp_finish_join(struct sock *sk);
    677bool mptcp_schedule_work(struct sock *sk);
    678int mptcp_setsockopt(struct sock *sk, int level, int optname,
    679		     sockptr_t optval, unsigned int optlen);
    680int mptcp_getsockopt(struct sock *sk, int level, int optname,
    681		     char __user *optval, int __user *option);
    682
    683u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq);
    684static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit)
    685{
    686	if (use_64bit)
    687		return cur_seq;
    688
    689	return __mptcp_expand_seq(old_seq, cur_seq);
    690}
    691void __mptcp_check_push(struct sock *sk, struct sock *ssk);
    692void __mptcp_data_acked(struct sock *sk);
    693void __mptcp_error_report(struct sock *sk);
    694void mptcp_subflow_eof(struct sock *sk);
    695bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
    696static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
    697{
    698	return READ_ONCE(msk->snd_data_fin_enable) &&
    699	       READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
    700}
    701
    702static inline bool mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
    703{
    704	if ((sk->sk_userlocks & SOCK_SNDBUF_LOCK) || ssk->sk_sndbuf <= READ_ONCE(sk->sk_sndbuf))
    705		return false;
    706
    707	WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf);
    708	return true;
    709}
    710
    711static inline void mptcp_write_space(struct sock *sk)
    712{
    713	if (sk_stream_is_writeable(sk)) {
    714		/* pairs with memory barrier in mptcp_poll */
    715		smp_mb();
    716		if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
    717			sk_stream_write_space(sk);
    718	}
    719}
    720
    721void mptcp_destroy_common(struct mptcp_sock *msk);
    722
    723#define MPTCP_TOKEN_MAX_RETRIES	4
    724
    725void __init mptcp_token_init(void);
    726static inline void mptcp_token_init_request(struct request_sock *req)
    727{
    728	mptcp_subflow_rsk(req)->token_node.pprev = NULL;
    729}
    730
    731int mptcp_token_new_request(struct request_sock *req);
    732void mptcp_token_destroy_request(struct request_sock *req);
    733int mptcp_token_new_connect(struct sock *sk);
    734void mptcp_token_accept(struct mptcp_subflow_request_sock *r,
    735			struct mptcp_sock *msk);
    736bool mptcp_token_exists(u32 token);
    737struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token);
    738struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot,
    739					 long *s_num);
    740void mptcp_token_destroy(struct mptcp_sock *msk);
    741
    742void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
    743
    744void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
    745__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum);
    746
    747void __init mptcp_pm_init(void);
    748void mptcp_pm_data_init(struct mptcp_sock *msk);
    749void mptcp_pm_data_reset(struct mptcp_sock *msk);
    750int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
    751			struct mptcp_addr_info *addr);
    752int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
    753			 bool require_family,
    754			 struct mptcp_pm_addr_entry *entry);
    755void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
    756void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
    757void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
    758void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp);
    759bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
    760void mptcp_pm_connection_closed(struct mptcp_sock *msk);
    761void mptcp_pm_subflow_established(struct mptcp_sock *msk);
    762bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk);
    763void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
    764				 const struct mptcp_subflow_context *subflow);
    765void mptcp_pm_add_addr_received(const struct sock *ssk,
    766				const struct mptcp_addr_info *addr);
    767void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
    768			      const struct mptcp_addr_info *addr);
    769void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk);
    770void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk);
    771void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
    772			       const struct mptcp_rm_list *rm_list);
    773void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
    774void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
    775int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
    776				 struct mptcp_addr_info *addr,
    777				 struct mptcp_addr_info *rem,
    778				 u8 bkup);
    779bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
    780			      const struct mptcp_pm_addr_entry *entry);
    781void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
    782bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
    783struct mptcp_pm_add_entry *
    784mptcp_pm_del_add_timer(struct mptcp_sock *msk,
    785		       const struct mptcp_addr_info *addr, bool check_id);
    786struct mptcp_pm_add_entry *
    787mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
    788				const struct mptcp_addr_info *addr);
    789int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
    790					 unsigned int id,
    791					 u8 *flags, int *ifindex);
    792int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
    793						   unsigned int id,
    794						   u8 *flags, int *ifindex);
    795int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
    796				 struct mptcp_pm_addr_entry *loc,
    797				 struct mptcp_pm_addr_entry *rem, u8 bkup);
    798int mptcp_pm_announce_addr(struct mptcp_sock *msk,
    799			   const struct mptcp_addr_info *addr,
    800			   bool echo);
    801int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
    802int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
    803void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
    804					struct list_head *rm_list);
    805
    806int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
    807					     struct mptcp_pm_addr_entry *entry);
    808void mptcp_free_local_addr_list(struct mptcp_sock *msk);
    809int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info);
    810int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info);
    811int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info);
    812int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info);
    813
    814void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
    815		 const struct sock *ssk, gfp_t gfp);
    816void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info);
    817void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
    818bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
    819
    820static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
    821{
    822	return READ_ONCE(msk->pm.addr_signal) &
    823		(BIT(MPTCP_ADD_ADDR_SIGNAL) | BIT(MPTCP_ADD_ADDR_ECHO));
    824}
    825
    826static inline bool mptcp_pm_should_add_signal_addr(struct mptcp_sock *msk)
    827{
    828	return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL);
    829}
    830
    831static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk)
    832{
    833	return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO);
    834}
    835
    836static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk)
    837{
    838	return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_RM_ADDR_SIGNAL);
    839}
    840
    841static inline bool mptcp_pm_is_userspace(const struct mptcp_sock *msk)
    842{
    843	return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_USERSPACE;
    844}
    845
    846static inline bool mptcp_pm_is_kernel(const struct mptcp_sock *msk)
    847{
    848	return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_KERNEL;
    849}
    850
    851static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port)
    852{
    853	u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
    854
    855	if (family == AF_INET6)
    856		len = TCPOLEN_MPTCP_ADD_ADDR6_BASE;
    857	if (!echo)
    858		len += MPTCPOPT_THMAC_LEN;
    859	/* account for 2 trailing 'nop' options */
    860	if (port)
    861		len += TCPOLEN_MPTCP_PORT_LEN + TCPOLEN_MPTCP_PORT_ALIGN;
    862
    863	return len;
    864}
    865
    866static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list)
    867{
    868	if (rm_list->nr == 0 || rm_list->nr > MPTCP_RM_IDS_MAX)
    869		return -EINVAL;
    870
    871	return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1;
    872}
    873
    874bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
    875			      unsigned int opt_size, unsigned int remaining,
    876			      struct mptcp_addr_info *addr, bool *echo,
    877			      bool *drop_other_suboptions);
    878bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
    879			     struct mptcp_rm_list *rm_list);
    880int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
    881int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
    882
    883void __init mptcp_pm_nl_init(void);
    884void mptcp_pm_nl_work(struct mptcp_sock *msk);
    885void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
    886				     const struct mptcp_rm_list *rm_list);
    887int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
    888unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk);
    889unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk);
    890unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk);
    891unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk);
    892
    893/* called under PM lock */
    894static inline void __mptcp_pm_close_subflow(struct mptcp_sock *msk)
    895{
    896	if (--msk->pm.subflows < mptcp_pm_get_subflows_max(msk))
    897		WRITE_ONCE(msk->pm.accept_subflow, true);
    898}
    899
    900static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk)
    901{
    902	spin_lock_bh(&msk->pm.lock);
    903	__mptcp_pm_close_subflow(msk);
    904	spin_unlock_bh(&msk->pm.lock);
    905}
    906
    907void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
    908void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
    909
    910static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
    911{
    912	return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
    913}
    914
    915void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops);
    916
    917static inline bool __mptcp_check_fallback(const struct mptcp_sock *msk)
    918{
    919	return test_bit(MPTCP_FALLBACK_DONE, &msk->flags);
    920}
    921
    922static inline bool mptcp_check_fallback(const struct sock *sk)
    923{
    924	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
    925	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
    926
    927	return __mptcp_check_fallback(msk);
    928}
    929
    930static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
    931{
    932	if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) {
    933		pr_debug("TCP fallback already done (msk=%p)", msk);
    934		return;
    935	}
    936	set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
    937}
    938
    939static inline void mptcp_do_fallback(struct sock *ssk)
    940{
    941	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
    942	struct sock *sk = subflow->conn;
    943	struct mptcp_sock *msk;
    944
    945	msk = mptcp_sk(sk);
    946	__mptcp_do_fallback(msk);
    947	if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) {
    948		gfp_t saved_allocation = ssk->sk_allocation;
    949
    950		/* we are in a atomic (BH) scope, override ssk default for data
    951		 * fin allocation
    952		 */
    953		ssk->sk_allocation = GFP_ATOMIC;
    954		ssk->sk_shutdown |= SEND_SHUTDOWN;
    955		tcp_shutdown(ssk, SEND_SHUTDOWN);
    956		ssk->sk_allocation = saved_allocation;
    957	}
    958}
    959
    960#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a)
    961
    962static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
    963{
    964	struct mptcp_ext *mpext;
    965
    966	mpext = skb ? mptcp_get_ext(skb) : NULL;
    967	if (mpext && mpext->infinite_map)
    968		return true;
    969
    970	return false;
    971}
    972
    973static inline bool is_active_ssk(struct mptcp_subflow_context *subflow)
    974{
    975	return (subflow->request_mptcp || subflow->request_join);
    976}
    977
    978static inline bool subflow_simultaneous_connect(struct sock *sk)
    979{
    980	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
    981
    982	return sk->sk_state == TCP_ESTABLISHED &&
    983	       is_active_ssk(subflow) &&
    984	       !subflow->conn_finished;
    985}
    986
    987#ifdef CONFIG_SYN_COOKIES
    988void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req,
    989				       struct sk_buff *skb);
    990bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req,
    991					struct sk_buff *skb);
    992void __init mptcp_join_cookie_init(void);
    993#else
    994static inline void
    995subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req,
    996				  struct sk_buff *skb) {}
    997static inline bool
    998mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req,
    999				   struct sk_buff *skb)
   1000{
   1001	return false;
   1002}
   1003
   1004static inline void mptcp_join_cookie_init(void) {}
   1005#endif
   1006
   1007#endif /* __MPTCP_PROTOCOL_H */