inet_connection_sock.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
inet_connection_sock.c (36518B)
      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * INET		An implementation of the TCP/IP protocol suite for the LINUX
      4 *		operating system.  INET is implemented using the  BSD Socket
      5 *		interface as the means of communication with the user level.
      6 *
      7 *		Support for INET connection oriented protocols.
      8 *
      9 * Authors:	See the TCP sources
     10 */
     11
     12#include <linux/module.h>
     13#include <linux/jhash.h>
     14
     15#include <net/inet_connection_sock.h>
     16#include <net/inet_hashtables.h>
     17#include <net/inet_timewait_sock.h>
     18#include <net/ip.h>
     19#include <net/route.h>
     20#include <net/tcp_states.h>
     21#include <net/xfrm.h>
     22#include <net/tcp.h>
     23#include <net/sock_reuseport.h>
     24#include <net/addrconf.h>
     25
     26#if IS_ENABLED(CONFIG_IPV6)
     27/* match_sk*_wildcard == true:  IPV6_ADDR_ANY equals to any IPv6 addresses
     28 *				if IPv6 only, and any IPv4 addresses
     29 *				if not IPv6 only
     30 * match_sk*_wildcard == false: addresses must be exactly the same, i.e.
     31 *				IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
     32 *				and 0.0.0.0 equals to 0.0.0.0 only
     33 */
     34static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
     35				 const struct in6_addr *sk2_rcv_saddr6,
     36				 __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
     37				 bool sk1_ipv6only, bool sk2_ipv6only,
     38				 bool match_sk1_wildcard,
     39				 bool match_sk2_wildcard)
     40{
     41	int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
     42	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
     43
     44	/* if both are mapped, treat as IPv4 */
     45	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
     46		if (!sk2_ipv6only) {
     47			if (sk1_rcv_saddr == sk2_rcv_saddr)
     48				return true;
     49			return (match_sk1_wildcard && !sk1_rcv_saddr) ||
     50				(match_sk2_wildcard && !sk2_rcv_saddr);
     51		}
     52		return false;
     53	}
     54
     55	if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
     56		return true;
     57
     58	if (addr_type2 == IPV6_ADDR_ANY && match_sk2_wildcard &&
     59	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
     60		return true;
     61
     62	if (addr_type == IPV6_ADDR_ANY && match_sk1_wildcard &&
     63	    !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
     64		return true;
     65
     66	if (sk2_rcv_saddr6 &&
     67	    ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
     68		return true;
     69
     70	return false;
     71}
     72#endif
     73
     74/* match_sk*_wildcard == true:  0.0.0.0 equals to any IPv4 addresses
     75 * match_sk*_wildcard == false: addresses must be exactly the same, i.e.
     76 *				0.0.0.0 only equals to 0.0.0.0
     77 */
     78static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
     79				 bool sk2_ipv6only, bool match_sk1_wildcard,
     80				 bool match_sk2_wildcard)
     81{
     82	if (!sk2_ipv6only) {
     83		if (sk1_rcv_saddr == sk2_rcv_saddr)
     84			return true;
     85		return (match_sk1_wildcard && !sk1_rcv_saddr) ||
     86			(match_sk2_wildcard && !sk2_rcv_saddr);
     87	}
     88	return false;
     89}
     90
     91bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
     92			  bool match_wildcard)
     93{
     94#if IS_ENABLED(CONFIG_IPV6)
     95	if (sk->sk_family == AF_INET6)
     96		return ipv6_rcv_saddr_equal(&sk->sk_v6_rcv_saddr,
     97					    inet6_rcv_saddr(sk2),
     98					    sk->sk_rcv_saddr,
     99					    sk2->sk_rcv_saddr,
    100					    ipv6_only_sock(sk),
    101					    ipv6_only_sock(sk2),
    102					    match_wildcard,
    103					    match_wildcard);
    104#endif
    105	return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr,
    106				    ipv6_only_sock(sk2), match_wildcard,
    107				    match_wildcard);
    108}
    109EXPORT_SYMBOL(inet_rcv_saddr_equal);
    110
    111bool inet_rcv_saddr_any(const struct sock *sk)
    112{
    113#if IS_ENABLED(CONFIG_IPV6)
    114	if (sk->sk_family == AF_INET6)
    115		return ipv6_addr_any(&sk->sk_v6_rcv_saddr);
    116#endif
    117	return !sk->sk_rcv_saddr;
    118}
    119
    120void inet_get_local_port_range(struct net *net, int *low, int *high)
    121{
    122	unsigned int seq;
    123
    124	do {
    125		seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
    126
    127		*low = net->ipv4.ip_local_ports.range[0];
    128		*high = net->ipv4.ip_local_ports.range[1];
    129	} while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
    130}
    131EXPORT_SYMBOL(inet_get_local_port_range);
    132
    133static int inet_csk_bind_conflict(const struct sock *sk,
    134				  const struct inet_bind_bucket *tb,
    135				  bool relax, bool reuseport_ok)
    136{
    137	struct sock *sk2;
    138	bool reuseport_cb_ok;
    139	bool reuse = sk->sk_reuse;
    140	bool reuseport = !!sk->sk_reuseport;
    141	struct sock_reuseport *reuseport_cb;
    142	kuid_t uid = sock_i_uid((struct sock *)sk);
    143
    144	rcu_read_lock();
    145	reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
    146	/* paired with WRITE_ONCE() in __reuseport_(add|detach)_closed_sock */
    147	reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks);
    148	rcu_read_unlock();
    149
    150	/*
    151	 * Unlike other sk lookup places we do not check
    152	 * for sk_net here, since _all_ the socks listed
    153	 * in tb->owners list belong to the same net - the
    154	 * one this bucket belongs to.
    155	 */
    156
    157	sk_for_each_bound(sk2, &tb->owners) {
    158		int bound_dev_if2;
    159
    160		if (sk == sk2)
    161			continue;
    162		bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
    163		if ((!sk->sk_bound_dev_if ||
    164		     !bound_dev_if2 ||
    165		     sk->sk_bound_dev_if == bound_dev_if2)) {
    166			if (reuse && sk2->sk_reuse &&
    167			    sk2->sk_state != TCP_LISTEN) {
    168				if ((!relax ||
    169				     (!reuseport_ok &&
    170				      reuseport && sk2->sk_reuseport &&
    171				      reuseport_cb_ok &&
    172				      (sk2->sk_state == TCP_TIME_WAIT ||
    173				       uid_eq(uid, sock_i_uid(sk2))))) &&
    174				    inet_rcv_saddr_equal(sk, sk2, true))
    175					break;
    176			} else if (!reuseport_ok ||
    177				   !reuseport || !sk2->sk_reuseport ||
    178				   !reuseport_cb_ok ||
    179				   (sk2->sk_state != TCP_TIME_WAIT &&
    180				    !uid_eq(uid, sock_i_uid(sk2)))) {
    181				if (inet_rcv_saddr_equal(sk, sk2, true))
    182					break;
    183			}
    184		}
    185	}
    186	return sk2 != NULL;
    187}
    188
    189/*
    190 * Find an open port number for the socket.  Returns with the
    191 * inet_bind_hashbucket lock held.
    192 */
    193static struct inet_bind_hashbucket *
    194inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret)
    195{
    196	struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
    197	int port = 0;
    198	struct inet_bind_hashbucket *head;
    199	struct net *net = sock_net(sk);
    200	bool relax = false;
    201	int i, low, high, attempt_half;
    202	struct inet_bind_bucket *tb;
    203	u32 remaining, offset;
    204	int l3mdev;
    205
    206	l3mdev = inet_sk_bound_l3mdev(sk);
    207ports_exhausted:
    208	attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
    209other_half_scan:
    210	inet_get_local_port_range(net, &low, &high);
    211	high++; /* [32768, 60999] -> [32768, 61000[ */
    212	if (high - low < 4)
    213		attempt_half = 0;
    214	if (attempt_half) {
    215		int half = low + (((high - low) >> 2) << 1);
    216
    217		if (attempt_half == 1)
    218			high = half;
    219		else
    220			low = half;
    221	}
    222	remaining = high - low;
    223	if (likely(remaining > 1))
    224		remaining &= ~1U;
    225
    226	offset = prandom_u32() % remaining;
    227	/* __inet_hash_connect() favors ports having @low parity
    228	 * We do the opposite to not pollute connect() users.
    229	 */
    230	offset |= 1U;
    231
    232other_parity_scan:
    233	port = low + offset;
    234	for (i = 0; i < remaining; i += 2, port += 2) {
    235		if (unlikely(port >= high))
    236			port -= remaining;
    237		if (inet_is_local_reserved_port(net, port))
    238			continue;
    239		head = &hinfo->bhash[inet_bhashfn(net, port,
    240						  hinfo->bhash_size)];
    241		spin_lock_bh(&head->lock);
    242		inet_bind_bucket_for_each(tb, &head->chain)
    243			if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
    244			    tb->port == port) {
    245				if (!inet_csk_bind_conflict(sk, tb, relax, false))
    246					goto success;
    247				goto next_port;
    248			}
    249		tb = NULL;
    250		goto success;
    251next_port:
    252		spin_unlock_bh(&head->lock);
    253		cond_resched();
    254	}
    255
    256	offset--;
    257	if (!(offset & 1))
    258		goto other_parity_scan;
    259
    260	if (attempt_half == 1) {
    261		/* OK we now try the upper half of the range */
    262		attempt_half = 2;
    263		goto other_half_scan;
    264	}
    265
    266	if (net->ipv4.sysctl_ip_autobind_reuse && !relax) {
    267		/* We still have a chance to connect to different destinations */
    268		relax = true;
    269		goto ports_exhausted;
    270	}
    271	return NULL;
    272success:
    273	*port_ret = port;
    274	*tb_ret = tb;
    275	return head;
    276}
    277
    278static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
    279				     struct sock *sk)
    280{
    281	kuid_t uid = sock_i_uid(sk);
    282
    283	if (tb->fastreuseport <= 0)
    284		return 0;
    285	if (!sk->sk_reuseport)
    286		return 0;
    287	if (rcu_access_pointer(sk->sk_reuseport_cb))
    288		return 0;
    289	if (!uid_eq(tb->fastuid, uid))
    290		return 0;
    291	/* We only need to check the rcv_saddr if this tb was once marked
    292	 * without fastreuseport and then was reset, as we can only know that
    293	 * the fast_*rcv_saddr doesn't have any conflicts with the socks on the
    294	 * owners list.
    295	 */
    296	if (tb->fastreuseport == FASTREUSEPORT_ANY)
    297		return 1;
    298#if IS_ENABLED(CONFIG_IPV6)
    299	if (tb->fast_sk_family == AF_INET6)
    300		return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr,
    301					    inet6_rcv_saddr(sk),
    302					    tb->fast_rcv_saddr,
    303					    sk->sk_rcv_saddr,
    304					    tb->fast_ipv6_only,
    305					    ipv6_only_sock(sk), true, false);
    306#endif
    307	return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr,
    308				    ipv6_only_sock(sk), true, false);
    309}
    310
    311void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
    312			       struct sock *sk)
    313{
    314	kuid_t uid = sock_i_uid(sk);
    315	bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
    316
    317	if (hlist_empty(&tb->owners)) {
    318		tb->fastreuse = reuse;
    319		if (sk->sk_reuseport) {
    320			tb->fastreuseport = FASTREUSEPORT_ANY;
    321			tb->fastuid = uid;
    322			tb->fast_rcv_saddr = sk->sk_rcv_saddr;
    323			tb->fast_ipv6_only = ipv6_only_sock(sk);
    324			tb->fast_sk_family = sk->sk_family;
    325#if IS_ENABLED(CONFIG_IPV6)
    326			tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
    327#endif
    328		} else {
    329			tb->fastreuseport = 0;
    330		}
    331	} else {
    332		if (!reuse)
    333			tb->fastreuse = 0;
    334		if (sk->sk_reuseport) {
    335			/* We didn't match or we don't have fastreuseport set on
    336			 * the tb, but we have sk_reuseport set on this socket
    337			 * and we know that there are no bind conflicts with
    338			 * this socket in this tb, so reset our tb's reuseport
    339			 * settings so that any subsequent sockets that match
    340			 * our current socket will be put on the fast path.
    341			 *
    342			 * If we reset we need to set FASTREUSEPORT_STRICT so we
    343			 * do extra checking for all subsequent sk_reuseport
    344			 * socks.
    345			 */
    346			if (!sk_reuseport_match(tb, sk)) {
    347				tb->fastreuseport = FASTREUSEPORT_STRICT;
    348				tb->fastuid = uid;
    349				tb->fast_rcv_saddr = sk->sk_rcv_saddr;
    350				tb->fast_ipv6_only = ipv6_only_sock(sk);
    351				tb->fast_sk_family = sk->sk_family;
    352#if IS_ENABLED(CONFIG_IPV6)
    353				tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
    354#endif
    355			}
    356		} else {
    357			tb->fastreuseport = 0;
    358		}
    359	}
    360}
    361
    362/* Obtain a reference to a local port for the given sock,
    363 * if snum is zero it means select any available local port.
    364 * We try to allocate an odd port (and leave even ports for connect())
    365 */
    366int inet_csk_get_port(struct sock *sk, unsigned short snum)
    367{
    368	bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
    369	struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
    370	int ret = 1, port = snum;
    371	struct inet_bind_hashbucket *head;
    372	struct net *net = sock_net(sk);
    373	struct inet_bind_bucket *tb = NULL;
    374	int l3mdev;
    375
    376	l3mdev = inet_sk_bound_l3mdev(sk);
    377
    378	if (!port) {
    379		head = inet_csk_find_open_port(sk, &tb, &port);
    380		if (!head)
    381			return ret;
    382		if (!tb)
    383			goto tb_not_found;
    384		goto success;
    385	}
    386	head = &hinfo->bhash[inet_bhashfn(net, port,
    387					  hinfo->bhash_size)];
    388	spin_lock_bh(&head->lock);
    389	inet_bind_bucket_for_each(tb, &head->chain)
    390		if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
    391		    tb->port == port)
    392			goto tb_found;
    393tb_not_found:
    394	tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
    395				     net, head, port, l3mdev);
    396	if (!tb)
    397		goto fail_unlock;
    398tb_found:
    399	if (!hlist_empty(&tb->owners)) {
    400		if (sk->sk_reuse == SK_FORCE_REUSE)
    401			goto success;
    402
    403		if ((tb->fastreuse > 0 && reuse) ||
    404		    sk_reuseport_match(tb, sk))
    405			goto success;
    406		if (inet_csk_bind_conflict(sk, tb, true, true))
    407			goto fail_unlock;
    408	}
    409success:
    410	inet_csk_update_fastreuse(tb, sk);
    411
    412	if (!inet_csk(sk)->icsk_bind_hash)
    413		inet_bind_hash(sk, tb, port);
    414	WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
    415	ret = 0;
    416
    417fail_unlock:
    418	spin_unlock_bh(&head->lock);
    419	return ret;
    420}
    421EXPORT_SYMBOL_GPL(inet_csk_get_port);
    422
    423/*
    424 * Wait for an incoming connection, avoid race conditions. This must be called
    425 * with the socket locked.
    426 */
    427static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
    428{
    429	struct inet_connection_sock *icsk = inet_csk(sk);
    430	DEFINE_WAIT(wait);
    431	int err;
    432
    433	/*
    434	 * True wake-one mechanism for incoming connections: only
    435	 * one process gets woken up, not the 'whole herd'.
    436	 * Since we do not 'race & poll' for established sockets
    437	 * anymore, the common case will execute the loop only once.
    438	 *
    439	 * Subtle issue: "add_wait_queue_exclusive()" will be added
    440	 * after any current non-exclusive waiters, and we know that
    441	 * it will always _stay_ after any new non-exclusive waiters
    442	 * because all non-exclusive waiters are added at the
    443	 * beginning of the wait-queue. As such, it's ok to "drop"
    444	 * our exclusiveness temporarily when we get woken up without
    445	 * having to remove and re-insert us on the wait queue.
    446	 */
    447	for (;;) {
    448		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
    449					  TASK_INTERRUPTIBLE);
    450		release_sock(sk);
    451		if (reqsk_queue_empty(&icsk->icsk_accept_queue))
    452			timeo = schedule_timeout(timeo);
    453		sched_annotate_sleep();
    454		lock_sock(sk);
    455		err = 0;
    456		if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
    457			break;
    458		err = -EINVAL;
    459		if (sk->sk_state != TCP_LISTEN)
    460			break;
    461		err = sock_intr_errno(timeo);
    462		if (signal_pending(current))
    463			break;
    464		err = -EAGAIN;
    465		if (!timeo)
    466			break;
    467	}
    468	finish_wait(sk_sleep(sk), &wait);
    469	return err;
    470}
    471
    472/*
    473 * This will accept the next outstanding connection.
    474 */
    475struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
    476{
    477	struct inet_connection_sock *icsk = inet_csk(sk);
    478	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
    479	struct request_sock *req;
    480	struct sock *newsk;
    481	int error;
    482
    483	lock_sock(sk);
    484
    485	/* We need to make sure that this socket is listening,
    486	 * and that it has something pending.
    487	 */
    488	error = -EINVAL;
    489	if (sk->sk_state != TCP_LISTEN)
    490		goto out_err;
    491
    492	/* Find already established connection */
    493	if (reqsk_queue_empty(queue)) {
    494		long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
    495
    496		/* If this is a non blocking socket don't sleep */
    497		error = -EAGAIN;
    498		if (!timeo)
    499			goto out_err;
    500
    501		error = inet_csk_wait_for_connect(sk, timeo);
    502		if (error)
    503			goto out_err;
    504	}
    505	req = reqsk_queue_remove(queue, sk);
    506	newsk = req->sk;
    507
    508	if (sk->sk_protocol == IPPROTO_TCP &&
    509	    tcp_rsk(req)->tfo_listener) {
    510		spin_lock_bh(&queue->fastopenq.lock);
    511		if (tcp_rsk(req)->tfo_listener) {
    512			/* We are still waiting for the final ACK from 3WHS
    513			 * so can't free req now. Instead, we set req->sk to
    514			 * NULL to signify that the child socket is taken
    515			 * so reqsk_fastopen_remove() will free the req
    516			 * when 3WHS finishes (or is aborted).
    517			 */
    518			req->sk = NULL;
    519			req = NULL;
    520		}
    521		spin_unlock_bh(&queue->fastopenq.lock);
    522	}
    523
    524out:
    525	release_sock(sk);
    526	if (newsk && mem_cgroup_sockets_enabled) {
    527		int amt;
    528
    529		/* atomically get the memory usage, set and charge the
    530		 * newsk->sk_memcg.
    531		 */
    532		lock_sock(newsk);
    533
    534		/* The socket has not been accepted yet, no need to look at
    535		 * newsk->sk_wmem_queued.
    536		 */
    537		amt = sk_mem_pages(newsk->sk_forward_alloc +
    538				   atomic_read(&newsk->sk_rmem_alloc));
    539		mem_cgroup_sk_alloc(newsk);
    540		if (newsk->sk_memcg && amt)
    541			mem_cgroup_charge_skmem(newsk->sk_memcg, amt,
    542						GFP_KERNEL | __GFP_NOFAIL);
    543
    544		release_sock(newsk);
    545	}
    546	if (req)
    547		reqsk_put(req);
    548	return newsk;
    549out_err:
    550	newsk = NULL;
    551	req = NULL;
    552	*err = error;
    553	goto out;
    554}
    555EXPORT_SYMBOL(inet_csk_accept);
    556
    557/*
    558 * Using different timers for retransmit, delayed acks and probes
    559 * We may wish use just one timer maintaining a list of expire jiffies
    560 * to optimize.
    561 */
    562void inet_csk_init_xmit_timers(struct sock *sk,
    563			       void (*retransmit_handler)(struct timer_list *t),
    564			       void (*delack_handler)(struct timer_list *t),
    565			       void (*keepalive_handler)(struct timer_list *t))
    566{
    567	struct inet_connection_sock *icsk = inet_csk(sk);
    568
    569	timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0);
    570	timer_setup(&icsk->icsk_delack_timer, delack_handler, 0);
    571	timer_setup(&sk->sk_timer, keepalive_handler, 0);
    572	icsk->icsk_pending = icsk->icsk_ack.pending = 0;
    573}
    574EXPORT_SYMBOL(inet_csk_init_xmit_timers);
    575
    576void inet_csk_clear_xmit_timers(struct sock *sk)
    577{
    578	struct inet_connection_sock *icsk = inet_csk(sk);
    579
    580	icsk->icsk_pending = icsk->icsk_ack.pending = 0;
    581
    582	sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
    583	sk_stop_timer(sk, &icsk->icsk_delack_timer);
    584	sk_stop_timer(sk, &sk->sk_timer);
    585}
    586EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
    587
    588void inet_csk_delete_keepalive_timer(struct sock *sk)
    589{
    590	sk_stop_timer(sk, &sk->sk_timer);
    591}
    592EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);
    593
    594void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
    595{
    596	sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
    597}
    598EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
    599
    600struct dst_entry *inet_csk_route_req(const struct sock *sk,
    601				     struct flowi4 *fl4,
    602				     const struct request_sock *req)
    603{
    604	const struct inet_request_sock *ireq = inet_rsk(req);
    605	struct net *net = read_pnet(&ireq->ireq_net);
    606	struct ip_options_rcu *opt;
    607	struct rtable *rt;
    608
    609	rcu_read_lock();
    610	opt = rcu_dereference(ireq->ireq_opt);
    611
    612	flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
    613			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
    614			   sk->sk_protocol, inet_sk_flowi_flags(sk),
    615			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
    616			   ireq->ir_loc_addr, ireq->ir_rmt_port,
    617			   htons(ireq->ir_num), sk->sk_uid);
    618	security_req_classify_flow(req, flowi4_to_flowi_common(fl4));
    619	rt = ip_route_output_flow(net, fl4, sk);
    620	if (IS_ERR(rt))
    621		goto no_route;
    622	if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
    623		goto route_err;
    624	rcu_read_unlock();
    625	return &rt->dst;
    626
    627route_err:
    628	ip_rt_put(rt);
    629no_route:
    630	rcu_read_unlock();
    631	__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
    632	return NULL;
    633}
    634EXPORT_SYMBOL_GPL(inet_csk_route_req);
    635
    636struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
    637					    struct sock *newsk,
    638					    const struct request_sock *req)
    639{
    640	const struct inet_request_sock *ireq = inet_rsk(req);
    641	struct net *net = read_pnet(&ireq->ireq_net);
    642	struct inet_sock *newinet = inet_sk(newsk);
    643	struct ip_options_rcu *opt;
    644	struct flowi4 *fl4;
    645	struct rtable *rt;
    646
    647	opt = rcu_dereference(ireq->ireq_opt);
    648	fl4 = &newinet->cork.fl.u.ip4;
    649
    650	flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
    651			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
    652			   sk->sk_protocol, inet_sk_flowi_flags(sk),
    653			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
    654			   ireq->ir_loc_addr, ireq->ir_rmt_port,
    655			   htons(ireq->ir_num), sk->sk_uid);
    656	security_req_classify_flow(req, flowi4_to_flowi_common(fl4));
    657	rt = ip_route_output_flow(net, fl4, sk);
    658	if (IS_ERR(rt))
    659		goto no_route;
    660	if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
    661		goto route_err;
    662	return &rt->dst;
    663
    664route_err:
    665	ip_rt_put(rt);
    666no_route:
    667	__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
    668	return NULL;
    669}
    670EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
    671
    672/* Decide when to expire the request and when to resend SYN-ACK */
    673static void syn_ack_recalc(struct request_sock *req,
    674			   const int max_syn_ack_retries,
    675			   const u8 rskq_defer_accept,
    676			   int *expire, int *resend)
    677{
    678	if (!rskq_defer_accept) {
    679		*expire = req->num_timeout >= max_syn_ack_retries;
    680		*resend = 1;
    681		return;
    682	}
    683	*expire = req->num_timeout >= max_syn_ack_retries &&
    684		  (!inet_rsk(req)->acked || req->num_timeout >= rskq_defer_accept);
    685	/* Do not resend while waiting for data after ACK,
    686	 * start to resend on end of deferring period to give
    687	 * last chance for data or ACK to create established socket.
    688	 */
    689	*resend = !inet_rsk(req)->acked ||
    690		  req->num_timeout >= rskq_defer_accept - 1;
    691}
    692
    693int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
    694{
    695	int err = req->rsk_ops->rtx_syn_ack(parent, req);
    696
    697	if (!err)
    698		req->num_retrans++;
    699	return err;
    700}
    701EXPORT_SYMBOL(inet_rtx_syn_ack);
    702
    703static struct request_sock *inet_reqsk_clone(struct request_sock *req,
    704					     struct sock *sk)
    705{
    706	struct sock *req_sk, *nreq_sk;
    707	struct request_sock *nreq;
    708
    709	nreq = kmem_cache_alloc(req->rsk_ops->slab, GFP_ATOMIC | __GFP_NOWARN);
    710	if (!nreq) {
    711		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
    712
    713		/* paired with refcount_inc_not_zero() in reuseport_migrate_sock() */
    714		sock_put(sk);
    715		return NULL;
    716	}
    717
    718	req_sk = req_to_sk(req);
    719	nreq_sk = req_to_sk(nreq);
    720
    721	memcpy(nreq_sk, req_sk,
    722	       offsetof(struct sock, sk_dontcopy_begin));
    723	memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end,
    724	       req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end));
    725
    726	sk_node_init(&nreq_sk->sk_node);
    727	nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping;
    728#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
    729	nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping;
    730#endif
    731	nreq_sk->sk_incoming_cpu = req_sk->sk_incoming_cpu;
    732
    733	nreq->rsk_listener = sk;
    734
    735	/* We need not acquire fastopenq->lock
    736	 * because the child socket is locked in inet_csk_listen_stop().
    737	 */
    738	if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(nreq)->tfo_listener)
    739		rcu_assign_pointer(tcp_sk(nreq->sk)->fastopen_rsk, nreq);
    740
    741	return nreq;
    742}
    743
    744static void reqsk_queue_migrated(struct request_sock_queue *queue,
    745				 const struct request_sock *req)
    746{
    747	if (req->num_timeout == 0)
    748		atomic_inc(&queue->young);
    749	atomic_inc(&queue->qlen);
    750}
    751
    752static void reqsk_migrate_reset(struct request_sock *req)
    753{
    754	req->saved_syn = NULL;
    755#if IS_ENABLED(CONFIG_IPV6)
    756	inet_rsk(req)->ipv6_opt = NULL;
    757	inet_rsk(req)->pktopts = NULL;
    758#else
    759	inet_rsk(req)->ireq_opt = NULL;
    760#endif
    761}
    762
    763/* return true if req was found in the ehash table */
    764static bool reqsk_queue_unlink(struct request_sock *req)
    765{
    766	struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo;
    767	bool found = false;
    768
    769	if (sk_hashed(req_to_sk(req))) {
    770		spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
    771
    772		spin_lock(lock);
    773		found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
    774		spin_unlock(lock);
    775	}
    776	if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
    777		reqsk_put(req);
    778	return found;
    779}
    780
    781bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
    782{
    783	bool unlinked = reqsk_queue_unlink(req);
    784
    785	if (unlinked) {
    786		reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
    787		reqsk_put(req);
    788	}
    789	return unlinked;
    790}
    791EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
    792
    793void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req)
    794{
    795	inet_csk_reqsk_queue_drop(sk, req);
    796	reqsk_put(req);
    797}
    798EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
    799
    800static void reqsk_timer_handler(struct timer_list *t)
    801{
    802	struct request_sock *req = from_timer(req, t, rsk_timer);
    803	struct request_sock *nreq = NULL, *oreq = req;
    804	struct sock *sk_listener = req->rsk_listener;
    805	struct inet_connection_sock *icsk;
    806	struct request_sock_queue *queue;
    807	struct net *net;
    808	int max_syn_ack_retries, qlen, expire = 0, resend = 0;
    809
    810	if (inet_sk_state_load(sk_listener) != TCP_LISTEN) {
    811		struct sock *nsk;
    812
    813		nsk = reuseport_migrate_sock(sk_listener, req_to_sk(req), NULL);
    814		if (!nsk)
    815			goto drop;
    816
    817		nreq = inet_reqsk_clone(req, nsk);
    818		if (!nreq)
    819			goto drop;
    820
    821		/* The new timer for the cloned req can decrease the 2
    822		 * by calling inet_csk_reqsk_queue_drop_and_put(), so
    823		 * hold another count to prevent use-after-free and
    824		 * call reqsk_put() just before return.
    825		 */
    826		refcount_set(&nreq->rsk_refcnt, 2 + 1);
    827		timer_setup(&nreq->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
    828		reqsk_queue_migrated(&inet_csk(nsk)->icsk_accept_queue, req);
    829
    830		req = nreq;
    831		sk_listener = nsk;
    832	}
    833
    834	icsk = inet_csk(sk_listener);
    835	net = sock_net(sk_listener);
    836	max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
    837	/* Normally all the openreqs are young and become mature
    838	 * (i.e. converted to established socket) for first timeout.
    839	 * If synack was not acknowledged for 1 second, it means
    840	 * one of the following things: synack was lost, ack was lost,
    841	 * rtt is high or nobody planned to ack (i.e. synflood).
    842	 * When server is a bit loaded, queue is populated with old
    843	 * open requests, reducing effective size of queue.
    844	 * When server is well loaded, queue size reduces to zero
    845	 * after several minutes of work. It is not synflood,
    846	 * it is normal operation. The solution is pruning
    847	 * too old entries overriding normal timeout, when
    848	 * situation becomes dangerous.
    849	 *
    850	 * Essentially, we reserve half of room for young
    851	 * embrions; and abort old ones without pity, if old
    852	 * ones are about to clog our table.
    853	 */
    854	queue = &icsk->icsk_accept_queue;
    855	qlen = reqsk_queue_len(queue);
    856	if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) {
    857		int young = reqsk_queue_len_young(queue) << 1;
    858
    859		while (max_syn_ack_retries > 2) {
    860			if (qlen < young)
    861				break;
    862			max_syn_ack_retries--;
    863			young <<= 1;
    864		}
    865	}
    866	syn_ack_recalc(req, max_syn_ack_retries, READ_ONCE(queue->rskq_defer_accept),
    867		       &expire, &resend);
    868	req->rsk_ops->syn_ack_timeout(req);
    869	if (!expire &&
    870	    (!resend ||
    871	     !inet_rtx_syn_ack(sk_listener, req) ||
    872	     inet_rsk(req)->acked)) {
    873		if (req->num_timeout++ == 0)
    874			atomic_dec(&queue->young);
    875		mod_timer(&req->rsk_timer, jiffies + reqsk_timeout(req, TCP_RTO_MAX));
    876
    877		if (!nreq)
    878			return;
    879
    880		if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) {
    881			/* delete timer */
    882			inet_csk_reqsk_queue_drop(sk_listener, nreq);
    883			goto no_ownership;
    884		}
    885
    886		__NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQSUCCESS);
    887		reqsk_migrate_reset(oreq);
    888		reqsk_queue_removed(&inet_csk(oreq->rsk_listener)->icsk_accept_queue, oreq);
    889		reqsk_put(oreq);
    890
    891		reqsk_put(nreq);
    892		return;
    893	}
    894
    895	/* Even if we can clone the req, we may need not retransmit any more
    896	 * SYN+ACKs (nreq->num_timeout > max_syn_ack_retries, etc), or another
    897	 * CPU may win the "own_req" race so that inet_ehash_insert() fails.
    898	 */
    899	if (nreq) {
    900		__NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQFAILURE);
    901no_ownership:
    902		reqsk_migrate_reset(nreq);
    903		reqsk_queue_removed(queue, nreq);
    904		__reqsk_free(nreq);
    905	}
    906
    907drop:
    908	inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
    909}
    910
    911static void reqsk_queue_hash_req(struct request_sock *req,
    912				 unsigned long timeout)
    913{
    914	timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
    915	mod_timer(&req->rsk_timer, jiffies + timeout);
    916
    917	inet_ehash_insert(req_to_sk(req), NULL, NULL);
    918	/* before letting lookups find us, make sure all req fields
    919	 * are committed to memory and refcnt initialized.
    920	 */
    921	smp_wmb();
    922	refcount_set(&req->rsk_refcnt, 2 + 1);
    923}
    924
    925void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
    926				   unsigned long timeout)
    927{
    928	reqsk_queue_hash_req(req, timeout);
    929	inet_csk_reqsk_queue_added(sk);
    930}
    931EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
    932
    933static void inet_clone_ulp(const struct request_sock *req, struct sock *newsk,
    934			   const gfp_t priority)
    935{
    936	struct inet_connection_sock *icsk = inet_csk(newsk);
    937
    938	if (!icsk->icsk_ulp_ops)
    939		return;
    940
    941	if (icsk->icsk_ulp_ops->clone)
    942		icsk->icsk_ulp_ops->clone(req, newsk, priority);
    943}
    944
    945/**
    946 *	inet_csk_clone_lock - clone an inet socket, and lock its clone
    947 *	@sk: the socket to clone
    948 *	@req: request_sock
    949 *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
    950 *
    951 *	Caller must unlock socket even in error path (bh_unlock_sock(newsk))
    952 */
    953struct sock *inet_csk_clone_lock(const struct sock *sk,
    954				 const struct request_sock *req,
    955				 const gfp_t priority)
    956{
    957	struct sock *newsk = sk_clone_lock(sk, priority);
    958
    959	if (newsk) {
    960		struct inet_connection_sock *newicsk = inet_csk(newsk);
    961
    962		inet_sk_set_state(newsk, TCP_SYN_RECV);
    963		newicsk->icsk_bind_hash = NULL;
    964
    965		inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
    966		inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
    967		inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
    968
    969		/* listeners have SOCK_RCU_FREE, not the children */
    970		sock_reset_flag(newsk, SOCK_RCU_FREE);
    971
    972		inet_sk(newsk)->mc_list = NULL;
    973
    974		newsk->sk_mark = inet_rsk(req)->ir_mark;
    975		atomic64_set(&newsk->sk_cookie,
    976			     atomic64_read(&inet_rsk(req)->ir_cookie));
    977
    978		newicsk->icsk_retransmits = 0;
    979		newicsk->icsk_backoff	  = 0;
    980		newicsk->icsk_probes_out  = 0;
    981		newicsk->icsk_probes_tstamp = 0;
    982
    983		/* Deinitialize accept_queue to trap illegal accesses. */
    984		memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
    985
    986		inet_clone_ulp(req, newsk, priority);
    987
    988		security_inet_csk_clone(newsk, req);
    989	}
    990	return newsk;
    991}
    992EXPORT_SYMBOL_GPL(inet_csk_clone_lock);
    993
    994/*
    995 * At this point, there should be no process reference to this
    996 * socket, and thus no user references at all.  Therefore we
    997 * can assume the socket waitqueue is inactive and nobody will
    998 * try to jump onto it.
    999 */
   1000void inet_csk_destroy_sock(struct sock *sk)
   1001{
   1002	WARN_ON(sk->sk_state != TCP_CLOSE);
   1003	WARN_ON(!sock_flag(sk, SOCK_DEAD));
   1004
   1005	/* It cannot be in hash table! */
   1006	WARN_ON(!sk_unhashed(sk));
   1007
   1008	/* If it has not 0 inet_sk(sk)->inet_num, it must be bound */
   1009	WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash);
   1010
   1011	sk->sk_prot->destroy(sk);
   1012
   1013	sk_stream_kill_queues(sk);
   1014
   1015	xfrm_sk_free_policy(sk);
   1016
   1017	sk_refcnt_debug_release(sk);
   1018
   1019	this_cpu_dec(*sk->sk_prot->orphan_count);
   1020
   1021	sock_put(sk);
   1022}
   1023EXPORT_SYMBOL(inet_csk_destroy_sock);
   1024
   1025/* This function allows to force a closure of a socket after the call to
   1026 * tcp/dccp_create_openreq_child().
   1027 */
   1028void inet_csk_prepare_forced_close(struct sock *sk)
   1029	__releases(&sk->sk_lock.slock)
   1030{
   1031	/* sk_clone_lock locked the socket and set refcnt to 2 */
   1032	bh_unlock_sock(sk);
   1033	sock_put(sk);
   1034	inet_csk_prepare_for_destroy_sock(sk);
   1035	inet_sk(sk)->inet_num = 0;
   1036}
   1037EXPORT_SYMBOL(inet_csk_prepare_forced_close);
   1038
   1039int inet_csk_listen_start(struct sock *sk)
   1040{
   1041	struct inet_connection_sock *icsk = inet_csk(sk);
   1042	struct inet_sock *inet = inet_sk(sk);
   1043	int err = -EADDRINUSE;
   1044
   1045	reqsk_queue_alloc(&icsk->icsk_accept_queue);
   1046
   1047	sk->sk_ack_backlog = 0;
   1048	inet_csk_delack_init(sk);
   1049
   1050	if (sk->sk_txrehash == SOCK_TXREHASH_DEFAULT)
   1051		sk->sk_txrehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
   1052
   1053	/* There is race window here: we announce ourselves listening,
   1054	 * but this transition is still not validated by get_port().
   1055	 * It is OK, because this socket enters to hash table only
   1056	 * after validation is complete.
   1057	 */
   1058	inet_sk_state_store(sk, TCP_LISTEN);
   1059	if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
   1060		inet->inet_sport = htons(inet->inet_num);
   1061
   1062		sk_dst_reset(sk);
   1063		err = sk->sk_prot->hash(sk);
   1064
   1065		if (likely(!err))
   1066			return 0;
   1067	}
   1068
   1069	inet_sk_set_state(sk, TCP_CLOSE);
   1070	return err;
   1071}
   1072EXPORT_SYMBOL_GPL(inet_csk_listen_start);
   1073
   1074static void inet_child_forget(struct sock *sk, struct request_sock *req,
   1075			      struct sock *child)
   1076{
   1077	sk->sk_prot->disconnect(child, O_NONBLOCK);
   1078
   1079	sock_orphan(child);
   1080
   1081	this_cpu_inc(*sk->sk_prot->orphan_count);
   1082
   1083	if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
   1084		BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req);
   1085		BUG_ON(sk != req->rsk_listener);
   1086
   1087		/* Paranoid, to prevent race condition if
   1088		 * an inbound pkt destined for child is
   1089		 * blocked by sock lock in tcp_v4_rcv().
   1090		 * Also to satisfy an assertion in
   1091		 * tcp_v4_destroy_sock().
   1092		 */
   1093		RCU_INIT_POINTER(tcp_sk(child)->fastopen_rsk, NULL);
   1094	}
   1095	inet_csk_destroy_sock(child);
   1096}
   1097
   1098struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
   1099				      struct request_sock *req,
   1100				      struct sock *child)
   1101{
   1102	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
   1103
   1104	spin_lock(&queue->rskq_lock);
   1105	if (unlikely(sk->sk_state != TCP_LISTEN)) {
   1106		inet_child_forget(sk, req, child);
   1107		child = NULL;
   1108	} else {
   1109		req->sk = child;
   1110		req->dl_next = NULL;
   1111		if (queue->rskq_accept_head == NULL)
   1112			WRITE_ONCE(queue->rskq_accept_head, req);
   1113		else
   1114			queue->rskq_accept_tail->dl_next = req;
   1115		queue->rskq_accept_tail = req;
   1116		sk_acceptq_added(sk);
   1117	}
   1118	spin_unlock(&queue->rskq_lock);
   1119	return child;
   1120}
   1121EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
   1122
   1123struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
   1124					 struct request_sock *req, bool own_req)
   1125{
   1126	if (own_req) {
   1127		inet_csk_reqsk_queue_drop(req->rsk_listener, req);
   1128		reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req);
   1129
   1130		if (sk != req->rsk_listener) {
   1131			/* another listening sk has been selected,
   1132			 * migrate the req to it.
   1133			 */
   1134			struct request_sock *nreq;
   1135
   1136			/* hold a refcnt for the nreq->rsk_listener
   1137			 * which is assigned in inet_reqsk_clone()
   1138			 */
   1139			sock_hold(sk);
   1140			nreq = inet_reqsk_clone(req, sk);
   1141			if (!nreq) {
   1142				inet_child_forget(sk, req, child);
   1143				goto child_put;
   1144			}
   1145
   1146			refcount_set(&nreq->rsk_refcnt, 1);
   1147			if (inet_csk_reqsk_queue_add(sk, nreq, child)) {
   1148				__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQSUCCESS);
   1149				reqsk_migrate_reset(req);
   1150				reqsk_put(req);
   1151				return child;
   1152			}
   1153
   1154			__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
   1155			reqsk_migrate_reset(nreq);
   1156			__reqsk_free(nreq);
   1157		} else if (inet_csk_reqsk_queue_add(sk, req, child)) {
   1158			return child;
   1159		}
   1160	}
   1161	/* Too bad, another child took ownership of the request, undo. */
   1162child_put:
   1163	bh_unlock_sock(child);
   1164	sock_put(child);
   1165	return NULL;
   1166}
   1167EXPORT_SYMBOL(inet_csk_complete_hashdance);
   1168
   1169/*
   1170 *	This routine closes sockets which have been at least partially
   1171 *	opened, but not yet accepted.
   1172 */
   1173void inet_csk_listen_stop(struct sock *sk)
   1174{
   1175	struct inet_connection_sock *icsk = inet_csk(sk);
   1176	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
   1177	struct request_sock *next, *req;
   1178
   1179	/* Following specs, it would be better either to send FIN
   1180	 * (and enter FIN-WAIT-1, it is normal close)
   1181	 * or to send active reset (abort).
   1182	 * Certainly, it is pretty dangerous while synflood, but it is
   1183	 * bad justification for our negligence 8)
   1184	 * To be honest, we are not able to make either
   1185	 * of the variants now.			--ANK
   1186	 */
   1187	while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
   1188		struct sock *child = req->sk, *nsk;
   1189		struct request_sock *nreq;
   1190
   1191		local_bh_disable();
   1192		bh_lock_sock(child);
   1193		WARN_ON(sock_owned_by_user(child));
   1194		sock_hold(child);
   1195
   1196		nsk = reuseport_migrate_sock(sk, child, NULL);
   1197		if (nsk) {
   1198			nreq = inet_reqsk_clone(req, nsk);
   1199			if (nreq) {
   1200				refcount_set(&nreq->rsk_refcnt, 1);
   1201
   1202				if (inet_csk_reqsk_queue_add(nsk, nreq, child)) {
   1203					__NET_INC_STATS(sock_net(nsk),
   1204							LINUX_MIB_TCPMIGRATEREQSUCCESS);
   1205					reqsk_migrate_reset(req);
   1206				} else {
   1207					__NET_INC_STATS(sock_net(nsk),
   1208							LINUX_MIB_TCPMIGRATEREQFAILURE);
   1209					reqsk_migrate_reset(nreq);
   1210					__reqsk_free(nreq);
   1211				}
   1212
   1213				/* inet_csk_reqsk_queue_add() has already
   1214				 * called inet_child_forget() on failure case.
   1215				 */
   1216				goto skip_child_forget;
   1217			}
   1218		}
   1219
   1220		inet_child_forget(sk, req, child);
   1221skip_child_forget:
   1222		reqsk_put(req);
   1223		bh_unlock_sock(child);
   1224		local_bh_enable();
   1225		sock_put(child);
   1226
   1227		cond_resched();
   1228	}
   1229	if (queue->fastopenq.rskq_rst_head) {
   1230		/* Free all the reqs queued in rskq_rst_head. */
   1231		spin_lock_bh(&queue->fastopenq.lock);
   1232		req = queue->fastopenq.rskq_rst_head;
   1233		queue->fastopenq.rskq_rst_head = NULL;
   1234		spin_unlock_bh(&queue->fastopenq.lock);
   1235		while (req != NULL) {
   1236			next = req->dl_next;
   1237			reqsk_put(req);
   1238			req = next;
   1239		}
   1240	}
   1241	WARN_ON_ONCE(sk->sk_ack_backlog);
   1242}
   1243EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
   1244
   1245void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
   1246{
   1247	struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
   1248	const struct inet_sock *inet = inet_sk(sk);
   1249
   1250	sin->sin_family		= AF_INET;
   1251	sin->sin_addr.s_addr	= inet->inet_daddr;
   1252	sin->sin_port		= inet->inet_dport;
   1253}
   1254EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
   1255
   1256static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl)
   1257{
   1258	const struct inet_sock *inet = inet_sk(sk);
   1259	const struct ip_options_rcu *inet_opt;
   1260	__be32 daddr = inet->inet_daddr;
   1261	struct flowi4 *fl4;
   1262	struct rtable *rt;
   1263
   1264	rcu_read_lock();
   1265	inet_opt = rcu_dereference(inet->inet_opt);
   1266	if (inet_opt && inet_opt->opt.srr)
   1267		daddr = inet_opt->opt.faddr;
   1268	fl4 = &fl->u.ip4;
   1269	rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr,
   1270				   inet->inet_saddr, inet->inet_dport,
   1271				   inet->inet_sport, sk->sk_protocol,
   1272				   RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
   1273	if (IS_ERR(rt))
   1274		rt = NULL;
   1275	if (rt)
   1276		sk_setup_caps(sk, &rt->dst);
   1277	rcu_read_unlock();
   1278
   1279	return &rt->dst;
   1280}
   1281
   1282struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu)
   1283{
   1284	struct dst_entry *dst = __sk_dst_check(sk, 0);
   1285	struct inet_sock *inet = inet_sk(sk);
   1286
   1287	if (!dst) {
   1288		dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
   1289		if (!dst)
   1290			goto out;
   1291	}
   1292	dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
   1293
   1294	dst = __sk_dst_check(sk, 0);
   1295	if (!dst)
   1296		dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
   1297out:
   1298	return dst;
   1299}
   1300EXPORT_SYMBOL_GPL(inet_csk_update_pmtu);