cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

chtls_cm.c (57638B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (c) 2018 Chelsio Communications, Inc.
      4 *
      5 * Written by: Atul Gupta (atul.gupta@chelsio.com)
      6 */
      7
      8#include <linux/module.h>
      9#include <linux/list.h>
     10#include <linux/workqueue.h>
     11#include <linux/skbuff.h>
     12#include <linux/timer.h>
     13#include <linux/notifier.h>
     14#include <linux/inetdevice.h>
     15#include <linux/ip.h>
     16#include <linux/tcp.h>
     17#include <linux/sched/signal.h>
     18#include <linux/kallsyms.h>
     19#include <linux/kprobes.h>
     20#include <linux/if_vlan.h>
     21#include <linux/ipv6.h>
     22#include <net/ipv6.h>
     23#include <net/transp_v6.h>
     24#include <net/ip6_route.h>
     25#include <net/inet_common.h>
     26#include <net/tcp.h>
     27#include <net/dst.h>
     28#include <net/tls.h>
     29#include <net/addrconf.h>
     30#include <net/secure_seq.h>
     31
     32#include "chtls.h"
     33#include "chtls_cm.h"
     34#include "clip_tbl.h"
     35#include "t4_tcb.h"
     36
     37/*
     38 * State transitions and actions for close.  Note that if we are in SYN_SENT
     39 * we remain in that state as we cannot control a connection while it's in
     40 * SYN_SENT; such connections are allowed to establish and are then aborted.
     41 */
     42static unsigned char new_state[16] = {
     43	/* current state:     new state:      action: */
     44	/* (Invalid)       */ TCP_CLOSE,
     45	/* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
     46	/* TCP_SYN_SENT    */ TCP_SYN_SENT,
     47	/* TCP_SYN_RECV    */ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
     48	/* TCP_FIN_WAIT1   */ TCP_FIN_WAIT1,
     49	/* TCP_FIN_WAIT2   */ TCP_FIN_WAIT2,
     50	/* TCP_TIME_WAIT   */ TCP_CLOSE,
     51	/* TCP_CLOSE       */ TCP_CLOSE,
     52	/* TCP_CLOSE_WAIT  */ TCP_LAST_ACK | TCP_ACTION_FIN,
     53	/* TCP_LAST_ACK    */ TCP_LAST_ACK,
     54	/* TCP_LISTEN      */ TCP_CLOSE,
     55	/* TCP_CLOSING     */ TCP_CLOSING,
     56};
     57
     58static struct chtls_sock *chtls_sock_create(struct chtls_dev *cdev)
     59{
     60	struct chtls_sock *csk = kzalloc(sizeof(*csk), GFP_ATOMIC);
     61
     62	if (!csk)
     63		return NULL;
     64
     65	csk->txdata_skb_cache = alloc_skb(TXDATA_SKB_LEN, GFP_ATOMIC);
     66	if (!csk->txdata_skb_cache) {
     67		kfree(csk);
     68		return NULL;
     69	}
     70
     71	kref_init(&csk->kref);
     72	csk->cdev = cdev;
     73	skb_queue_head_init(&csk->txq);
     74	csk->wr_skb_head = NULL;
     75	csk->wr_skb_tail = NULL;
     76	csk->mss = MAX_MSS;
     77	csk->tlshws.ofld = 1;
     78	csk->tlshws.txkey = -1;
     79	csk->tlshws.rxkey = -1;
     80	csk->tlshws.mfs = TLS_MFS;
     81	skb_queue_head_init(&csk->tlshws.sk_recv_queue);
     82	return csk;
     83}
     84
     85static void chtls_sock_release(struct kref *ref)
     86{
     87	struct chtls_sock *csk =
     88		container_of(ref, struct chtls_sock, kref);
     89
     90	kfree(csk);
     91}
     92
     93static struct net_device *chtls_find_netdev(struct chtls_dev *cdev,
     94					    struct sock *sk)
     95{
     96	struct adapter *adap = pci_get_drvdata(cdev->pdev);
     97	struct net_device *ndev = cdev->ports[0];
     98#if IS_ENABLED(CONFIG_IPV6)
     99	struct net_device *temp;
    100	int addr_type;
    101#endif
    102	int i;
    103
    104	switch (sk->sk_family) {
    105	case PF_INET:
    106		if (likely(!inet_sk(sk)->inet_rcv_saddr))
    107			return ndev;
    108		ndev = __ip_dev_find(&init_net, inet_sk(sk)->inet_rcv_saddr, false);
    109		break;
    110#if IS_ENABLED(CONFIG_IPV6)
    111	case PF_INET6:
    112		addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
    113		if (likely(addr_type == IPV6_ADDR_ANY))
    114			return ndev;
    115
    116		for_each_netdev_rcu(&init_net, temp) {
    117			if (ipv6_chk_addr(&init_net, (struct in6_addr *)
    118					  &sk->sk_v6_rcv_saddr, temp, 1)) {
    119				ndev = temp;
    120				break;
    121			}
    122		}
    123	break;
    124#endif
    125	default:
    126		return NULL;
    127	}
    128
    129	if (!ndev)
    130		return NULL;
    131
    132	if (is_vlan_dev(ndev))
    133		ndev = vlan_dev_real_dev(ndev);
    134
    135	for_each_port(adap, i)
    136		if (cdev->ports[i] == ndev)
    137			return ndev;
    138	return NULL;
    139}
    140
    141static void assign_rxopt(struct sock *sk, unsigned int opt)
    142{
    143	const struct chtls_dev *cdev;
    144	struct chtls_sock *csk;
    145	struct tcp_sock *tp;
    146
    147	csk = rcu_dereference_sk_user_data(sk);
    148	tp = tcp_sk(sk);
    149
    150	cdev = csk->cdev;
    151	tp->tcp_header_len           = sizeof(struct tcphdr);
    152	tp->rx_opt.mss_clamp         = cdev->mtus[TCPOPT_MSS_G(opt)] - 40;
    153	tp->mss_cache                = tp->rx_opt.mss_clamp;
    154	tp->rx_opt.tstamp_ok         = TCPOPT_TSTAMP_G(opt);
    155	tp->rx_opt.snd_wscale        = TCPOPT_SACK_G(opt);
    156	tp->rx_opt.wscale_ok         = TCPOPT_WSCALE_OK_G(opt);
    157	SND_WSCALE(tp)               = TCPOPT_SND_WSCALE_G(opt);
    158	if (!tp->rx_opt.wscale_ok)
    159		tp->rx_opt.rcv_wscale = 0;
    160	if (tp->rx_opt.tstamp_ok) {
    161		tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
    162		tp->rx_opt.mss_clamp -= TCPOLEN_TSTAMP_ALIGNED;
    163	} else if (csk->opt2 & TSTAMPS_EN_F) {
    164		csk->opt2 &= ~TSTAMPS_EN_F;
    165		csk->mtu_idx = TCPOPT_MSS_G(opt);
    166	}
    167}
    168
    169static void chtls_purge_receive_queue(struct sock *sk)
    170{
    171	struct sk_buff *skb;
    172
    173	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
    174		skb_dst_set(skb, (void *)NULL);
    175		kfree_skb(skb);
    176	}
    177}
    178
    179static void chtls_purge_write_queue(struct sock *sk)
    180{
    181	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
    182	struct sk_buff *skb;
    183
    184	while ((skb = __skb_dequeue(&csk->txq))) {
    185		sk->sk_wmem_queued -= skb->truesize;
    186		__kfree_skb(skb);
    187	}
    188}
    189
    190static void chtls_purge_recv_queue(struct sock *sk)
    191{
    192	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
    193	struct chtls_hws *tlsk = &csk->tlshws;
    194	struct sk_buff *skb;
    195
    196	while ((skb = __skb_dequeue(&tlsk->sk_recv_queue)) != NULL) {
    197		skb_dst_set(skb, NULL);
    198		kfree_skb(skb);
    199	}
    200}
    201
    202static void abort_arp_failure(void *handle, struct sk_buff *skb)
    203{
    204	struct cpl_abort_req *req = cplhdr(skb);
    205	struct chtls_dev *cdev;
    206
    207	cdev = (struct chtls_dev *)handle;
    208	req->cmd = CPL_ABORT_NO_RST;
    209	cxgb4_ofld_send(cdev->lldi->ports[0], skb);
    210}
    211
    212static struct sk_buff *alloc_ctrl_skb(struct sk_buff *skb, int len)
    213{
    214	if (likely(skb && !skb_shared(skb) && !skb_cloned(skb))) {
    215		__skb_trim(skb, 0);
    216		refcount_inc(&skb->users);
    217	} else {
    218		skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL);
    219	}
    220	return skb;
    221}
    222
    223static void chtls_send_abort(struct sock *sk, int mode, struct sk_buff *skb)
    224{
    225	struct cpl_abort_req *req;
    226	struct chtls_sock *csk;
    227	struct tcp_sock *tp;
    228
    229	csk = rcu_dereference_sk_user_data(sk);
    230	tp = tcp_sk(sk);
    231
    232	if (!skb)
    233		skb = alloc_ctrl_skb(csk->txdata_skb_cache, sizeof(*req));
    234
    235	req = (struct cpl_abort_req *)skb_put(skb, sizeof(*req));
    236	INIT_TP_WR_CPL(req, CPL_ABORT_REQ, csk->tid);
    237	skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
    238	req->rsvd0 = htonl(tp->snd_nxt);
    239	req->rsvd1 = !csk_flag_nochk(csk, CSK_TX_DATA_SENT);
    240	req->cmd = mode;
    241	t4_set_arp_err_handler(skb, csk->cdev, abort_arp_failure);
    242	send_or_defer(sk, tp, skb, mode == CPL_ABORT_SEND_RST);
    243}
    244
    245static void chtls_send_reset(struct sock *sk, int mode, struct sk_buff *skb)
    246{
    247	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
    248
    249	if (unlikely(csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN) ||
    250		     !csk->cdev)) {
    251		if (sk->sk_state == TCP_SYN_RECV)
    252			csk_set_flag(csk, CSK_RST_ABORTED);
    253		goto out;
    254	}
    255
    256	if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
    257		struct tcp_sock *tp = tcp_sk(sk);
    258
    259		if (send_tx_flowc_wr(sk, 0, tp->snd_nxt, tp->rcv_nxt) < 0)
    260			WARN_ONCE(1, "send tx flowc error");
    261		csk_set_flag(csk, CSK_TX_DATA_SENT);
    262	}
    263
    264	csk_set_flag(csk, CSK_ABORT_RPL_PENDING);
    265	chtls_purge_write_queue(sk);
    266
    267	csk_set_flag(csk, CSK_ABORT_SHUTDOWN);
    268	if (sk->sk_state != TCP_SYN_RECV)
    269		chtls_send_abort(sk, mode, skb);
    270	else
    271		chtls_set_tcb_field_rpl_skb(sk, TCB_T_FLAGS_W,
    272					    TCB_T_FLAGS_V(TCB_T_FLAGS_M), 0,
    273					    TCB_FIELD_COOKIE_TFLAG, 1);
    274
    275	return;
    276out:
    277	kfree_skb(skb);
    278}
    279
    280static void release_tcp_port(struct sock *sk)
    281{
    282	if (inet_csk(sk)->icsk_bind_hash)
    283		inet_put_port(sk);
    284}
    285
    286static void tcp_uncork(struct sock *sk)
    287{
    288	struct tcp_sock *tp = tcp_sk(sk);
    289
    290	if (tp->nonagle & TCP_NAGLE_CORK) {
    291		tp->nonagle &= ~TCP_NAGLE_CORK;
    292		chtls_tcp_push(sk, 0);
    293	}
    294}
    295
    296static void chtls_close_conn(struct sock *sk)
    297{
    298	struct cpl_close_con_req *req;
    299	struct chtls_sock *csk;
    300	struct sk_buff *skb;
    301	unsigned int tid;
    302	unsigned int len;
    303
    304	len = roundup(sizeof(struct cpl_close_con_req), 16);
    305	csk = rcu_dereference_sk_user_data(sk);
    306	tid = csk->tid;
    307
    308	skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL);
    309	req = (struct cpl_close_con_req *)__skb_put(skb, len);
    310	memset(req, 0, len);
    311	req->wr.wr_hi = htonl(FW_WR_OP_V(FW_TP_WR) |
    312			      FW_WR_IMMDLEN_V(sizeof(*req) -
    313					      sizeof(req->wr)));
    314	req->wr.wr_mid = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)) |
    315			       FW_WR_FLOWID_V(tid));
    316
    317	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
    318
    319	tcp_uncork(sk);
    320	skb_entail(sk, skb, ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
    321	if (sk->sk_state != TCP_SYN_SENT)
    322		chtls_push_frames(csk, 1);
    323}
    324
    325/*
    326 * Perform a state transition during close and return the actions indicated
    327 * for the transition.  Do not make this function inline, the main reason
    328 * it exists at all is to avoid multiple inlining of tcp_set_state.
    329 */
    330static int make_close_transition(struct sock *sk)
    331{
    332	int next = (int)new_state[sk->sk_state];
    333
    334	tcp_set_state(sk, next & TCP_STATE_MASK);
    335	return next & TCP_ACTION_FIN;
    336}
    337
    338void chtls_close(struct sock *sk, long timeout)
    339{
    340	int data_lost, prev_state;
    341	struct chtls_sock *csk;
    342
    343	csk = rcu_dereference_sk_user_data(sk);
    344
    345	lock_sock(sk);
    346	sk->sk_shutdown |= SHUTDOWN_MASK;
    347
    348	data_lost = skb_queue_len(&sk->sk_receive_queue);
    349	data_lost |= skb_queue_len(&csk->tlshws.sk_recv_queue);
    350	chtls_purge_recv_queue(sk);
    351	chtls_purge_receive_queue(sk);
    352
    353	if (sk->sk_state == TCP_CLOSE) {
    354		goto wait;
    355	} else if (data_lost || sk->sk_state == TCP_SYN_SENT) {
    356		chtls_send_reset(sk, CPL_ABORT_SEND_RST, NULL);
    357		release_tcp_port(sk);
    358		goto unlock;
    359	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
    360		sk->sk_prot->disconnect(sk, 0);
    361	} else if (make_close_transition(sk)) {
    362		chtls_close_conn(sk);
    363	}
    364wait:
    365	if (timeout)
    366		sk_stream_wait_close(sk, timeout);
    367
    368unlock:
    369	prev_state = sk->sk_state;
    370	sock_hold(sk);
    371	sock_orphan(sk);
    372
    373	release_sock(sk);
    374
    375	local_bh_disable();
    376	bh_lock_sock(sk);
    377
    378	if (prev_state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
    379		goto out;
    380
    381	if (sk->sk_state == TCP_FIN_WAIT2 && tcp_sk(sk)->linger2 < 0 &&
    382	    !csk_flag(sk, CSK_ABORT_SHUTDOWN)) {
    383		struct sk_buff *skb;
    384
    385		skb = alloc_skb(sizeof(struct cpl_abort_req), GFP_ATOMIC);
    386		if (skb)
    387			chtls_send_reset(sk, CPL_ABORT_SEND_RST, skb);
    388	}
    389
    390	if (sk->sk_state == TCP_CLOSE)
    391		inet_csk_destroy_sock(sk);
    392
    393out:
    394	bh_unlock_sock(sk);
    395	local_bh_enable();
    396	sock_put(sk);
    397}
    398
    399/*
    400 * Wait until a socket enters on of the given states.
    401 */
    402static int wait_for_states(struct sock *sk, unsigned int states)
    403{
    404	DECLARE_WAITQUEUE(wait, current);
    405	struct socket_wq _sk_wq;
    406	long current_timeo;
    407	int err = 0;
    408
    409	current_timeo = 200;
    410
    411	/*
    412	 * We want this to work even when there's no associated struct socket.
    413	 * In that case we provide a temporary wait_queue_head_t.
    414	 */
    415	if (!sk->sk_wq) {
    416		init_waitqueue_head(&_sk_wq.wait);
    417		_sk_wq.fasync_list = NULL;
    418		init_rcu_head_on_stack(&_sk_wq.rcu);
    419		RCU_INIT_POINTER(sk->sk_wq, &_sk_wq);
    420	}
    421
    422	add_wait_queue(sk_sleep(sk), &wait);
    423	while (!sk_in_state(sk, states)) {
    424		if (!current_timeo) {
    425			err = -EBUSY;
    426			break;
    427		}
    428		if (signal_pending(current)) {
    429			err = sock_intr_errno(current_timeo);
    430			break;
    431		}
    432		set_current_state(TASK_UNINTERRUPTIBLE);
    433		release_sock(sk);
    434		if (!sk_in_state(sk, states))
    435			current_timeo = schedule_timeout(current_timeo);
    436		__set_current_state(TASK_RUNNING);
    437		lock_sock(sk);
    438	}
    439	remove_wait_queue(sk_sleep(sk), &wait);
    440
    441	if (rcu_dereference(sk->sk_wq) == &_sk_wq)
    442		sk->sk_wq = NULL;
    443	return err;
    444}
    445
    446int chtls_disconnect(struct sock *sk, int flags)
    447{
    448	struct tcp_sock *tp;
    449	int err;
    450
    451	tp = tcp_sk(sk);
    452	chtls_purge_recv_queue(sk);
    453	chtls_purge_receive_queue(sk);
    454	chtls_purge_write_queue(sk);
    455
    456	if (sk->sk_state != TCP_CLOSE) {
    457		sk->sk_err = ECONNRESET;
    458		chtls_send_reset(sk, CPL_ABORT_SEND_RST, NULL);
    459		err = wait_for_states(sk, TCPF_CLOSE);
    460		if (err)
    461			return err;
    462	}
    463	chtls_purge_recv_queue(sk);
    464	chtls_purge_receive_queue(sk);
    465	tp->max_window = 0xFFFF << (tp->rx_opt.snd_wscale);
    466	return tcp_disconnect(sk, flags);
    467}
    468
    469#define SHUTDOWN_ELIGIBLE_STATE (TCPF_ESTABLISHED | \
    470				 TCPF_SYN_RECV | TCPF_CLOSE_WAIT)
    471void chtls_shutdown(struct sock *sk, int how)
    472{
    473	if ((how & SEND_SHUTDOWN) &&
    474	    sk_in_state(sk, SHUTDOWN_ELIGIBLE_STATE) &&
    475	    make_close_transition(sk))
    476		chtls_close_conn(sk);
    477}
    478
    479void chtls_destroy_sock(struct sock *sk)
    480{
    481	struct chtls_sock *csk;
    482
    483	csk = rcu_dereference_sk_user_data(sk);
    484	chtls_purge_recv_queue(sk);
    485	csk->ulp_mode = ULP_MODE_NONE;
    486	chtls_purge_write_queue(sk);
    487	free_tls_keyid(sk);
    488	kref_put(&csk->kref, chtls_sock_release);
    489	if (sk->sk_family == AF_INET)
    490		sk->sk_prot = &tcp_prot;
    491#if IS_ENABLED(CONFIG_IPV6)
    492	else
    493		sk->sk_prot = &tcpv6_prot;
    494#endif
    495	sk->sk_prot->destroy(sk);
    496}
    497
    498static void reset_listen_child(struct sock *child)
    499{
    500	struct chtls_sock *csk = rcu_dereference_sk_user_data(child);
    501	struct sk_buff *skb;
    502
    503	skb = alloc_ctrl_skb(csk->txdata_skb_cache,
    504			     sizeof(struct cpl_abort_req));
    505
    506	chtls_send_reset(child, CPL_ABORT_SEND_RST, skb);
    507	sock_orphan(child);
    508	INC_ORPHAN_COUNT(child);
    509	if (child->sk_state == TCP_CLOSE)
    510		inet_csk_destroy_sock(child);
    511}
    512
    513static void chtls_disconnect_acceptq(struct sock *listen_sk)
    514{
    515	struct request_sock **pprev;
    516
    517	pprev = ACCEPT_QUEUE(listen_sk);
    518	while (*pprev) {
    519		struct request_sock *req = *pprev;
    520
    521		if (req->rsk_ops == &chtls_rsk_ops ||
    522		    req->rsk_ops == &chtls_rsk_opsv6) {
    523			struct sock *child = req->sk;
    524
    525			*pprev = req->dl_next;
    526			sk_acceptq_removed(listen_sk);
    527			reqsk_put(req);
    528			sock_hold(child);
    529			local_bh_disable();
    530			bh_lock_sock(child);
    531			release_tcp_port(child);
    532			reset_listen_child(child);
    533			bh_unlock_sock(child);
    534			local_bh_enable();
    535			sock_put(child);
    536		} else {
    537			pprev = &req->dl_next;
    538		}
    539	}
    540}
    541
    542static int listen_hashfn(const struct sock *sk)
    543{
    544	return ((unsigned long)sk >> 10) & (LISTEN_INFO_HASH_SIZE - 1);
    545}
    546
    547static struct listen_info *listen_hash_add(struct chtls_dev *cdev,
    548					   struct sock *sk,
    549					   unsigned int stid)
    550{
    551	struct listen_info *p = kmalloc(sizeof(*p), GFP_KERNEL);
    552
    553	if (p) {
    554		int key = listen_hashfn(sk);
    555
    556		p->sk = sk;
    557		p->stid = stid;
    558		spin_lock(&cdev->listen_lock);
    559		p->next = cdev->listen_hash_tab[key];
    560		cdev->listen_hash_tab[key] = p;
    561		spin_unlock(&cdev->listen_lock);
    562	}
    563	return p;
    564}
    565
    566static int listen_hash_find(struct chtls_dev *cdev,
    567			    struct sock *sk)
    568{
    569	struct listen_info *p;
    570	int stid = -1;
    571	int key;
    572
    573	key = listen_hashfn(sk);
    574
    575	spin_lock(&cdev->listen_lock);
    576	for (p = cdev->listen_hash_tab[key]; p; p = p->next)
    577		if (p->sk == sk) {
    578			stid = p->stid;
    579			break;
    580		}
    581	spin_unlock(&cdev->listen_lock);
    582	return stid;
    583}
    584
    585static int listen_hash_del(struct chtls_dev *cdev,
    586			   struct sock *sk)
    587{
    588	struct listen_info *p, **prev;
    589	int stid = -1;
    590	int key;
    591
    592	key = listen_hashfn(sk);
    593	prev = &cdev->listen_hash_tab[key];
    594
    595	spin_lock(&cdev->listen_lock);
    596	for (p = *prev; p; prev = &p->next, p = p->next)
    597		if (p->sk == sk) {
    598			stid = p->stid;
    599			*prev = p->next;
    600			kfree(p);
    601			break;
    602		}
    603	spin_unlock(&cdev->listen_lock);
    604	return stid;
    605}
    606
    607static void cleanup_syn_rcv_conn(struct sock *child, struct sock *parent)
    608{
    609	struct request_sock *req;
    610	struct chtls_sock *csk;
    611
    612	csk = rcu_dereference_sk_user_data(child);
    613	req = csk->passive_reap_next;
    614
    615	reqsk_queue_removed(&inet_csk(parent)->icsk_accept_queue, req);
    616	__skb_unlink((struct sk_buff *)&csk->synq, &csk->listen_ctx->synq);
    617	chtls_reqsk_free(req);
    618	csk->passive_reap_next = NULL;
    619}
    620
    621static void chtls_reset_synq(struct listen_ctx *listen_ctx)
    622{
    623	struct sock *listen_sk = listen_ctx->lsk;
    624
    625	while (!skb_queue_empty(&listen_ctx->synq)) {
    626		struct chtls_sock *csk =
    627			container_of((struct synq *)skb_peek
    628				(&listen_ctx->synq), struct chtls_sock, synq);
    629		struct sock *child = csk->sk;
    630
    631		cleanup_syn_rcv_conn(child, listen_sk);
    632		sock_hold(child);
    633		local_bh_disable();
    634		bh_lock_sock(child);
    635		release_tcp_port(child);
    636		reset_listen_child(child);
    637		bh_unlock_sock(child);
    638		local_bh_enable();
    639		sock_put(child);
    640	}
    641}
    642
    643int chtls_listen_start(struct chtls_dev *cdev, struct sock *sk)
    644{
    645	struct net_device *ndev;
    646#if IS_ENABLED(CONFIG_IPV6)
    647	bool clip_valid = false;
    648#endif
    649	struct listen_ctx *ctx;
    650	struct adapter *adap;
    651	struct port_info *pi;
    652	int ret = 0;
    653	int stid;
    654
    655	rcu_read_lock();
    656	ndev = chtls_find_netdev(cdev, sk);
    657	rcu_read_unlock();
    658	if (!ndev)
    659		return -EBADF;
    660
    661	pi = netdev_priv(ndev);
    662	adap = pi->adapter;
    663	if (!(adap->flags & CXGB4_FULL_INIT_DONE))
    664		return -EBADF;
    665
    666	if (listen_hash_find(cdev, sk) >= 0)   /* already have it */
    667		return -EADDRINUSE;
    668
    669	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
    670	if (!ctx)
    671		return -ENOMEM;
    672
    673	__module_get(THIS_MODULE);
    674	ctx->lsk = sk;
    675	ctx->cdev = cdev;
    676	ctx->state = T4_LISTEN_START_PENDING;
    677	skb_queue_head_init(&ctx->synq);
    678
    679	stid = cxgb4_alloc_stid(cdev->tids, sk->sk_family, ctx);
    680	if (stid < 0)
    681		goto free_ctx;
    682
    683	sock_hold(sk);
    684	if (!listen_hash_add(cdev, sk, stid))
    685		goto free_stid;
    686
    687	if (sk->sk_family == PF_INET) {
    688		ret = cxgb4_create_server(ndev, stid,
    689					  inet_sk(sk)->inet_rcv_saddr,
    690					  inet_sk(sk)->inet_sport, 0,
    691					  cdev->lldi->rxq_ids[0]);
    692#if IS_ENABLED(CONFIG_IPV6)
    693	} else {
    694		int addr_type;
    695
    696		addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
    697		if (addr_type != IPV6_ADDR_ANY) {
    698			ret = cxgb4_clip_get(ndev, (const u32 *)
    699					     &sk->sk_v6_rcv_saddr, 1);
    700			if (ret)
    701				goto del_hash;
    702			clip_valid = true;
    703		}
    704		ret = cxgb4_create_server6(ndev, stid,
    705					   &sk->sk_v6_rcv_saddr,
    706					   inet_sk(sk)->inet_sport,
    707					   cdev->lldi->rxq_ids[0]);
    708#endif
    709	}
    710	if (ret > 0)
    711		ret = net_xmit_errno(ret);
    712	if (ret)
    713		goto del_hash;
    714	return 0;
    715del_hash:
    716#if IS_ENABLED(CONFIG_IPV6)
    717	if (clip_valid)
    718		cxgb4_clip_release(ndev, (const u32 *)&sk->sk_v6_rcv_saddr, 1);
    719#endif
    720	listen_hash_del(cdev, sk);
    721free_stid:
    722	cxgb4_free_stid(cdev->tids, stid, sk->sk_family);
    723	sock_put(sk);
    724free_ctx:
    725	kfree(ctx);
    726	module_put(THIS_MODULE);
    727	return -EBADF;
    728}
    729
    730void chtls_listen_stop(struct chtls_dev *cdev, struct sock *sk)
    731{
    732	struct listen_ctx *listen_ctx;
    733	int stid;
    734
    735	stid = listen_hash_del(cdev, sk);
    736	if (stid < 0)
    737		return;
    738
    739	listen_ctx = (struct listen_ctx *)lookup_stid(cdev->tids, stid);
    740	chtls_reset_synq(listen_ctx);
    741
    742	cxgb4_remove_server(cdev->lldi->ports[0], stid,
    743			    cdev->lldi->rxq_ids[0], sk->sk_family == PF_INET6);
    744
    745#if IS_ENABLED(CONFIG_IPV6)
    746	if (sk->sk_family == PF_INET6) {
    747		struct net_device *ndev = chtls_find_netdev(cdev, sk);
    748		int addr_type = 0;
    749
    750		addr_type = ipv6_addr_type((const struct in6_addr *)
    751					  &sk->sk_v6_rcv_saddr);
    752		if (addr_type != IPV6_ADDR_ANY)
    753			cxgb4_clip_release(ndev, (const u32 *)
    754					   &sk->sk_v6_rcv_saddr, 1);
    755	}
    756#endif
    757	chtls_disconnect_acceptq(sk);
    758}
    759
    760static int chtls_pass_open_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
    761{
    762	struct cpl_pass_open_rpl *rpl = cplhdr(skb) + RSS_HDR;
    763	unsigned int stid = GET_TID(rpl);
    764	struct listen_ctx *listen_ctx;
    765
    766	listen_ctx = (struct listen_ctx *)lookup_stid(cdev->tids, stid);
    767	if (!listen_ctx)
    768		return CPL_RET_BUF_DONE;
    769
    770	if (listen_ctx->state == T4_LISTEN_START_PENDING) {
    771		listen_ctx->state = T4_LISTEN_STARTED;
    772		return CPL_RET_BUF_DONE;
    773	}
    774
    775	if (rpl->status != CPL_ERR_NONE) {
    776		pr_info("Unexpected PASS_OPEN_RPL status %u for STID %u\n",
    777			rpl->status, stid);
    778	} else {
    779		cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
    780		sock_put(listen_ctx->lsk);
    781		kfree(listen_ctx);
    782		module_put(THIS_MODULE);
    783	}
    784	return CPL_RET_BUF_DONE;
    785}
    786
    787static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
    788{
    789	struct cpl_close_listsvr_rpl *rpl = cplhdr(skb) + RSS_HDR;
    790	struct listen_ctx *listen_ctx;
    791	unsigned int stid;
    792	void *data;
    793
    794	stid = GET_TID(rpl);
    795	data = lookup_stid(cdev->tids, stid);
    796	listen_ctx = (struct listen_ctx *)data;
    797
    798	if (rpl->status != CPL_ERR_NONE) {
    799		pr_info("Unexpected CLOSE_LISTSRV_RPL status %u for STID %u\n",
    800			rpl->status, stid);
    801	} else {
    802		cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
    803		sock_put(listen_ctx->lsk);
    804		kfree(listen_ctx);
    805		module_put(THIS_MODULE);
    806	}
    807	return CPL_RET_BUF_DONE;
    808}
    809
    810static void chtls_purge_wr_queue(struct sock *sk)
    811{
    812	struct sk_buff *skb;
    813
    814	while ((skb = dequeue_wr(sk)) != NULL)
    815		kfree_skb(skb);
    816}
    817
    818static void chtls_release_resources(struct sock *sk)
    819{
    820	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
    821	struct chtls_dev *cdev = csk->cdev;
    822	unsigned int tid = csk->tid;
    823	struct tid_info *tids;
    824
    825	if (!cdev)
    826		return;
    827
    828	tids = cdev->tids;
    829	kfree_skb(csk->txdata_skb_cache);
    830	csk->txdata_skb_cache = NULL;
    831
    832	if (csk->wr_credits != csk->wr_max_credits) {
    833		chtls_purge_wr_queue(sk);
    834		chtls_reset_wr_list(csk);
    835	}
    836
    837	if (csk->l2t_entry) {
    838		cxgb4_l2t_release(csk->l2t_entry);
    839		csk->l2t_entry = NULL;
    840	}
    841
    842	if (sk->sk_state != TCP_SYN_SENT) {
    843		cxgb4_remove_tid(tids, csk->port_id, tid, sk->sk_family);
    844		sock_put(sk);
    845	}
    846}
    847
    848static void chtls_conn_done(struct sock *sk)
    849{
    850	if (sock_flag(sk, SOCK_DEAD))
    851		chtls_purge_receive_queue(sk);
    852	sk_wakeup_sleepers(sk, 0);
    853	tcp_done(sk);
    854}
    855
    856static void do_abort_syn_rcv(struct sock *child, struct sock *parent)
    857{
    858	/*
    859	 * If the server is still open we clean up the child connection,
    860	 * otherwise the server already did the clean up as it was purging
    861	 * its SYN queue and the skb was just sitting in its backlog.
    862	 */
    863	if (likely(parent->sk_state == TCP_LISTEN)) {
    864		cleanup_syn_rcv_conn(child, parent);
    865		/* Without the below call to sock_orphan,
    866		 * we leak the socket resource with syn_flood test
    867		 * as inet_csk_destroy_sock will not be called
    868		 * in tcp_done since SOCK_DEAD flag is not set.
    869		 * Kernel handles this differently where new socket is
    870		 * created only after 3 way handshake is done.
    871		 */
    872		sock_orphan(child);
    873		INC_ORPHAN_COUNT(child);
    874		chtls_release_resources(child);
    875		chtls_conn_done(child);
    876	} else {
    877		if (csk_flag(child, CSK_RST_ABORTED)) {
    878			chtls_release_resources(child);
    879			chtls_conn_done(child);
    880		}
    881	}
    882}
    883
    884static void pass_open_abort(struct sock *child, struct sock *parent,
    885			    struct sk_buff *skb)
    886{
    887	do_abort_syn_rcv(child, parent);
    888	kfree_skb(skb);
    889}
    890
    891static void bl_pass_open_abort(struct sock *lsk, struct sk_buff *skb)
    892{
    893	pass_open_abort(skb->sk, lsk, skb);
    894}
    895
    896static void chtls_pass_open_arp_failure(struct sock *sk,
    897					struct sk_buff *skb)
    898{
    899	const struct request_sock *oreq;
    900	struct chtls_sock *csk;
    901	struct chtls_dev *cdev;
    902	struct sock *parent;
    903	void *data;
    904
    905	csk = rcu_dereference_sk_user_data(sk);
    906	cdev = csk->cdev;
    907
    908	/*
    909	 * If the connection is being aborted due to the parent listening
    910	 * socket going away there's nothing to do, the ABORT_REQ will close
    911	 * the connection.
    912	 */
    913	if (csk_flag(sk, CSK_ABORT_RPL_PENDING)) {
    914		kfree_skb(skb);
    915		return;
    916	}
    917
    918	oreq = csk->passive_reap_next;
    919	data = lookup_stid(cdev->tids, oreq->ts_recent);
    920	parent = ((struct listen_ctx *)data)->lsk;
    921
    922	bh_lock_sock(parent);
    923	if (!sock_owned_by_user(parent)) {
    924		pass_open_abort(sk, parent, skb);
    925	} else {
    926		BLOG_SKB_CB(skb)->backlog_rcv = bl_pass_open_abort;
    927		__sk_add_backlog(parent, skb);
    928	}
    929	bh_unlock_sock(parent);
    930}
    931
    932static void chtls_accept_rpl_arp_failure(void *handle,
    933					 struct sk_buff *skb)
    934{
    935	struct sock *sk = (struct sock *)handle;
    936
    937	sock_hold(sk);
    938	process_cpl_msg(chtls_pass_open_arp_failure, sk, skb);
    939	sock_put(sk);
    940}
    941
    942static unsigned int chtls_select_mss(const struct chtls_sock *csk,
    943				     unsigned int pmtu,
    944				     struct cpl_pass_accept_req *req)
    945{
    946	struct chtls_dev *cdev;
    947	struct dst_entry *dst;
    948	unsigned int tcpoptsz;
    949	unsigned int iphdrsz;
    950	unsigned int mtu_idx;
    951	struct tcp_sock *tp;
    952	unsigned int mss;
    953	struct sock *sk;
    954
    955	mss = ntohs(req->tcpopt.mss);
    956	sk = csk->sk;
    957	dst = __sk_dst_get(sk);
    958	cdev = csk->cdev;
    959	tp = tcp_sk(sk);
    960	tcpoptsz = 0;
    961
    962#if IS_ENABLED(CONFIG_IPV6)
    963	if (sk->sk_family == AF_INET6)
    964		iphdrsz = sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
    965	else
    966#endif
    967		iphdrsz = sizeof(struct iphdr) + sizeof(struct tcphdr);
    968	if (req->tcpopt.tstamp)
    969		tcpoptsz += round_up(TCPOLEN_TIMESTAMP, 4);
    970
    971	tp->advmss = dst_metric_advmss(dst);
    972	if (USER_MSS(tp) && tp->advmss > USER_MSS(tp))
    973		tp->advmss = USER_MSS(tp);
    974	if (tp->advmss > pmtu - iphdrsz)
    975		tp->advmss = pmtu - iphdrsz;
    976	if (mss && tp->advmss > mss)
    977		tp->advmss = mss;
    978
    979	tp->advmss = cxgb4_best_aligned_mtu(cdev->lldi->mtus,
    980					    iphdrsz + tcpoptsz,
    981					    tp->advmss - tcpoptsz,
    982					    8, &mtu_idx);
    983	tp->advmss -= iphdrsz;
    984
    985	inet_csk(sk)->icsk_pmtu_cookie = pmtu;
    986	return mtu_idx;
    987}
    988
    989static unsigned int select_rcv_wscale(int space, int wscale_ok, int win_clamp)
    990{
    991	int wscale = 0;
    992
    993	if (space > MAX_RCV_WND)
    994		space = MAX_RCV_WND;
    995	if (win_clamp && win_clamp < space)
    996		space = win_clamp;
    997
    998	if (wscale_ok) {
    999		while (wscale < 14 && (65535 << wscale) < space)
   1000			wscale++;
   1001	}
   1002	return wscale;
   1003}
   1004
   1005static void chtls_pass_accept_rpl(struct sk_buff *skb,
   1006				  struct cpl_pass_accept_req *req,
   1007				  unsigned int tid)
   1008
   1009{
   1010	struct cpl_t5_pass_accept_rpl *rpl5;
   1011	struct cxgb4_lld_info *lldi;
   1012	const struct tcphdr *tcph;
   1013	const struct tcp_sock *tp;
   1014	struct chtls_sock *csk;
   1015	unsigned int len;
   1016	struct sock *sk;
   1017	u32 opt2, hlen;
   1018	u64 opt0;
   1019
   1020	sk = skb->sk;
   1021	tp = tcp_sk(sk);
   1022	csk = sk->sk_user_data;
   1023	csk->tid = tid;
   1024	lldi = csk->cdev->lldi;
   1025	len = roundup(sizeof(*rpl5), 16);
   1026
   1027	rpl5 = __skb_put_zero(skb, len);
   1028	INIT_TP_WR(rpl5, tid);
   1029
   1030	OPCODE_TID(rpl5) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
   1031						     csk->tid));
   1032	csk->mtu_idx = chtls_select_mss(csk, dst_mtu(__sk_dst_get(sk)),
   1033					req);
   1034	opt0 = TCAM_BYPASS_F |
   1035	       WND_SCALE_V(RCV_WSCALE(tp)) |
   1036	       MSS_IDX_V(csk->mtu_idx) |
   1037	       L2T_IDX_V(csk->l2t_entry->idx) |
   1038	       NAGLE_V(!(tp->nonagle & TCP_NAGLE_OFF)) |
   1039	       TX_CHAN_V(csk->tx_chan) |
   1040	       SMAC_SEL_V(csk->smac_idx) |
   1041	       DSCP_V(csk->tos >> 2) |
   1042	       ULP_MODE_V(ULP_MODE_TLS) |
   1043	       RCV_BUFSIZ_V(min(tp->rcv_wnd >> 10, RCV_BUFSIZ_M));
   1044
   1045	opt2 = RX_CHANNEL_V(0) |
   1046		RSS_QUEUE_VALID_F | RSS_QUEUE_V(csk->rss_qid);
   1047
   1048	if (!is_t5(lldi->adapter_type))
   1049		opt2 |= RX_FC_DISABLE_F;
   1050	if (req->tcpopt.tstamp)
   1051		opt2 |= TSTAMPS_EN_F;
   1052	if (req->tcpopt.sack)
   1053		opt2 |= SACK_EN_F;
   1054	hlen = ntohl(req->hdr_len);
   1055
   1056	tcph = (struct tcphdr *)((u8 *)(req + 1) +
   1057			T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen));
   1058	if (tcph->ece && tcph->cwr)
   1059		opt2 |= CCTRL_ECN_V(1);
   1060	opt2 |= CONG_CNTRL_V(CONG_ALG_NEWRENO);
   1061	opt2 |= T5_ISS_F;
   1062	opt2 |= T5_OPT_2_VALID_F;
   1063	opt2 |= WND_SCALE_EN_V(WSCALE_OK(tp));
   1064	rpl5->opt0 = cpu_to_be64(opt0);
   1065	rpl5->opt2 = cpu_to_be32(opt2);
   1066	rpl5->iss = cpu_to_be32((prandom_u32() & ~7UL) - 1);
   1067	set_wr_txq(skb, CPL_PRIORITY_SETUP, csk->port_id);
   1068	t4_set_arp_err_handler(skb, sk, chtls_accept_rpl_arp_failure);
   1069	cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
   1070}
   1071
   1072static void inet_inherit_port(struct inet_hashinfo *hash_info,
   1073			      struct sock *lsk, struct sock *newsk)
   1074{
   1075	local_bh_disable();
   1076	__inet_inherit_port(lsk, newsk);
   1077	local_bh_enable();
   1078}
   1079
   1080static int chtls_backlog_rcv(struct sock *sk, struct sk_buff *skb)
   1081{
   1082	if (skb->protocol) {
   1083		kfree_skb(skb);
   1084		return 0;
   1085	}
   1086	BLOG_SKB_CB(skb)->backlog_rcv(sk, skb);
   1087	return 0;
   1088}
   1089
   1090static void chtls_set_tcp_window(struct chtls_sock *csk)
   1091{
   1092	struct net_device *ndev = csk->egress_dev;
   1093	struct port_info *pi = netdev_priv(ndev);
   1094	unsigned int linkspeed;
   1095	u8 scale;
   1096
   1097	linkspeed = pi->link_cfg.speed;
   1098	scale = linkspeed / SPEED_10000;
   1099#define CHTLS_10G_RCVWIN (256 * 1024)
   1100	csk->rcv_win = CHTLS_10G_RCVWIN;
   1101	if (scale)
   1102		csk->rcv_win *= scale;
   1103#define CHTLS_10G_SNDWIN (256 * 1024)
   1104	csk->snd_win = CHTLS_10G_SNDWIN;
   1105	if (scale)
   1106		csk->snd_win *= scale;
   1107}
   1108
   1109static struct sock *chtls_recv_sock(struct sock *lsk,
   1110				    struct request_sock *oreq,
   1111				    void *network_hdr,
   1112				    const struct cpl_pass_accept_req *req,
   1113				    struct chtls_dev *cdev)
   1114{
   1115	struct adapter *adap = pci_get_drvdata(cdev->pdev);
   1116	struct neighbour *n = NULL;
   1117	struct inet_sock *newinet;
   1118	const struct iphdr *iph;
   1119	struct tls_context *ctx;
   1120	struct net_device *ndev;
   1121	struct chtls_sock *csk;
   1122	struct dst_entry *dst;
   1123	struct tcp_sock *tp;
   1124	struct sock *newsk;
   1125	bool found = false;
   1126	u16 port_id;
   1127	int rxq_idx;
   1128	int step, i;
   1129
   1130	iph = (const struct iphdr *)network_hdr;
   1131	newsk = tcp_create_openreq_child(lsk, oreq, cdev->askb);
   1132	if (!newsk)
   1133		goto free_oreq;
   1134
   1135	if (lsk->sk_family == AF_INET) {
   1136		dst = inet_csk_route_child_sock(lsk, newsk, oreq);
   1137		if (!dst)
   1138			goto free_sk;
   1139
   1140		n = dst_neigh_lookup(dst, &iph->saddr);
   1141#if IS_ENABLED(CONFIG_IPV6)
   1142	} else {
   1143		const struct ipv6hdr *ip6h;
   1144		struct flowi6 fl6;
   1145
   1146		ip6h = (const struct ipv6hdr *)network_hdr;
   1147		memset(&fl6, 0, sizeof(fl6));
   1148		fl6.flowi6_proto = IPPROTO_TCP;
   1149		fl6.saddr = ip6h->daddr;
   1150		fl6.daddr = ip6h->saddr;
   1151		fl6.fl6_dport = inet_rsk(oreq)->ir_rmt_port;
   1152		fl6.fl6_sport = htons(inet_rsk(oreq)->ir_num);
   1153		security_req_classify_flow(oreq, flowi6_to_flowi_common(&fl6));
   1154		dst = ip6_dst_lookup_flow(sock_net(lsk), lsk, &fl6, NULL);
   1155		if (IS_ERR(dst))
   1156			goto free_sk;
   1157		n = dst_neigh_lookup(dst, &ip6h->saddr);
   1158#endif
   1159	}
   1160	if (!n || !n->dev)
   1161		goto free_dst;
   1162
   1163	ndev = n->dev;
   1164	if (is_vlan_dev(ndev))
   1165		ndev = vlan_dev_real_dev(ndev);
   1166
   1167	for_each_port(adap, i)
   1168		if (cdev->ports[i] == ndev)
   1169			found = true;
   1170
   1171	if (!found)
   1172		goto free_dst;
   1173
   1174	port_id = cxgb4_port_idx(ndev);
   1175
   1176	csk = chtls_sock_create(cdev);
   1177	if (!csk)
   1178		goto free_dst;
   1179
   1180	csk->l2t_entry = cxgb4_l2t_get(cdev->lldi->l2t, n, ndev, 0);
   1181	if (!csk->l2t_entry)
   1182		goto free_csk;
   1183
   1184	newsk->sk_user_data = csk;
   1185	newsk->sk_backlog_rcv = chtls_backlog_rcv;
   1186
   1187	tp = tcp_sk(newsk);
   1188	newinet = inet_sk(newsk);
   1189
   1190	if (iph->version == 0x4) {
   1191		newinet->inet_daddr = iph->saddr;
   1192		newinet->inet_rcv_saddr = iph->daddr;
   1193		newinet->inet_saddr = iph->daddr;
   1194#if IS_ENABLED(CONFIG_IPV6)
   1195	} else {
   1196		struct tcp6_sock *newtcp6sk = (struct tcp6_sock *)newsk;
   1197		struct inet_request_sock *treq = inet_rsk(oreq);
   1198		struct ipv6_pinfo *newnp = inet6_sk(newsk);
   1199		struct ipv6_pinfo *np = inet6_sk(lsk);
   1200
   1201		inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
   1202		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
   1203		newsk->sk_v6_daddr = treq->ir_v6_rmt_addr;
   1204		newsk->sk_v6_rcv_saddr = treq->ir_v6_loc_addr;
   1205		inet6_sk(newsk)->saddr = treq->ir_v6_loc_addr;
   1206		newnp->ipv6_fl_list = NULL;
   1207		newnp->pktoptions = NULL;
   1208		newsk->sk_bound_dev_if = treq->ir_iif;
   1209		newinet->inet_opt = NULL;
   1210		newinet->inet_daddr = LOOPBACK4_IPV6;
   1211		newinet->inet_saddr = LOOPBACK4_IPV6;
   1212#endif
   1213	}
   1214
   1215	oreq->ts_recent = PASS_OPEN_TID_G(ntohl(req->tos_stid));
   1216	sk_setup_caps(newsk, dst);
   1217	ctx = tls_get_ctx(lsk);
   1218	newsk->sk_destruct = ctx->sk_destruct;
   1219	newsk->sk_prot_creator = lsk->sk_prot_creator;
   1220	csk->sk = newsk;
   1221	csk->passive_reap_next = oreq;
   1222	csk->tx_chan = cxgb4_port_chan(ndev);
   1223	csk->port_id = port_id;
   1224	csk->egress_dev = ndev;
   1225	csk->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
   1226	chtls_set_tcp_window(csk);
   1227	tp->rcv_wnd = csk->rcv_win;
   1228	csk->sndbuf = csk->snd_win;
   1229	csk->ulp_mode = ULP_MODE_TLS;
   1230	step = cdev->lldi->nrxq / cdev->lldi->nchan;
   1231	rxq_idx = port_id * step;
   1232	rxq_idx += cdev->round_robin_cnt++ % step;
   1233	csk->rss_qid = cdev->lldi->rxq_ids[rxq_idx];
   1234	csk->txq_idx = (rxq_idx < cdev->lldi->ntxq) ? rxq_idx :
   1235			port_id * step;
   1236	csk->sndbuf = newsk->sk_sndbuf;
   1237	csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx;
   1238	RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk),
   1239					   sock_net(newsk)->
   1240						ipv4.sysctl_tcp_window_scaling,
   1241					   tp->window_clamp);
   1242	neigh_release(n);
   1243	inet_inherit_port(&tcp_hashinfo, lsk, newsk);
   1244	csk_set_flag(csk, CSK_CONN_INLINE);
   1245	bh_unlock_sock(newsk); /* tcp_create_openreq_child ->sk_clone_lock */
   1246
   1247	return newsk;
   1248free_csk:
   1249	chtls_sock_release(&csk->kref);
   1250free_dst:
   1251	if (n)
   1252		neigh_release(n);
   1253	dst_release(dst);
   1254free_sk:
   1255	inet_csk_prepare_forced_close(newsk);
   1256	tcp_done(newsk);
   1257free_oreq:
   1258	chtls_reqsk_free(oreq);
   1259	return NULL;
   1260}
   1261
   1262/*
   1263 * Populate a TID_RELEASE WR.  The skb must be already propely sized.
   1264 */
   1265static  void mk_tid_release(struct sk_buff *skb,
   1266			    unsigned int chan, unsigned int tid)
   1267{
   1268	struct cpl_tid_release *req;
   1269	unsigned int len;
   1270
   1271	len = roundup(sizeof(struct cpl_tid_release), 16);
   1272	req = (struct cpl_tid_release *)__skb_put(skb, len);
   1273	memset(req, 0, len);
   1274	set_wr_txq(skb, CPL_PRIORITY_SETUP, chan);
   1275	INIT_TP_WR_CPL(req, CPL_TID_RELEASE, tid);
   1276}
   1277
   1278static int chtls_get_module(struct sock *sk)
   1279{
   1280	struct inet_connection_sock *icsk = inet_csk(sk);
   1281
   1282	if (!try_module_get(icsk->icsk_ulp_ops->owner))
   1283		return -1;
   1284
   1285	return 0;
   1286}
   1287
   1288static void chtls_pass_accept_request(struct sock *sk,
   1289				      struct sk_buff *skb)
   1290{
   1291	struct cpl_t5_pass_accept_rpl *rpl;
   1292	struct cpl_pass_accept_req *req;
   1293	struct listen_ctx *listen_ctx;
   1294	struct vlan_ethhdr *vlan_eh;
   1295	struct request_sock *oreq;
   1296	struct sk_buff *reply_skb;
   1297	struct chtls_sock *csk;
   1298	struct chtls_dev *cdev;
   1299	struct ipv6hdr *ip6h;
   1300	struct tcphdr *tcph;
   1301	struct sock *newsk;
   1302	struct ethhdr *eh;
   1303	struct iphdr *iph;
   1304	void *network_hdr;
   1305	unsigned int stid;
   1306	unsigned int len;
   1307	unsigned int tid;
   1308	bool th_ecn, ect;
   1309	__u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
   1310	u16 eth_hdr_len;
   1311	bool ecn_ok;
   1312
   1313	req = cplhdr(skb) + RSS_HDR;
   1314	tid = GET_TID(req);
   1315	cdev = BLOG_SKB_CB(skb)->cdev;
   1316	newsk = lookup_tid(cdev->tids, tid);
   1317	stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
   1318	if (newsk) {
   1319		pr_info("tid (%d) already in use\n", tid);
   1320		return;
   1321	}
   1322
   1323	len = roundup(sizeof(*rpl), 16);
   1324	reply_skb = alloc_skb(len, GFP_ATOMIC);
   1325	if (!reply_skb) {
   1326		cxgb4_remove_tid(cdev->tids, 0, tid, sk->sk_family);
   1327		kfree_skb(skb);
   1328		return;
   1329	}
   1330
   1331	if (sk->sk_state != TCP_LISTEN)
   1332		goto reject;
   1333
   1334	if (inet_csk_reqsk_queue_is_full(sk))
   1335		goto reject;
   1336
   1337	if (sk_acceptq_is_full(sk))
   1338		goto reject;
   1339
   1340
   1341	eth_hdr_len = T6_ETH_HDR_LEN_G(ntohl(req->hdr_len));
   1342	if (eth_hdr_len == ETH_HLEN) {
   1343		eh = (struct ethhdr *)(req + 1);
   1344		iph = (struct iphdr *)(eh + 1);
   1345		ip6h = (struct ipv6hdr *)(eh + 1);
   1346		network_hdr = (void *)(eh + 1);
   1347	} else {
   1348		vlan_eh = (struct vlan_ethhdr *)(req + 1);
   1349		iph = (struct iphdr *)(vlan_eh + 1);
   1350		ip6h = (struct ipv6hdr *)(vlan_eh + 1);
   1351		network_hdr = (void *)(vlan_eh + 1);
   1352	}
   1353
   1354	if (iph->version == 0x4) {
   1355		tcph = (struct tcphdr *)(iph + 1);
   1356		skb_set_network_header(skb, (void *)iph - (void *)req);
   1357		oreq = inet_reqsk_alloc(&chtls_rsk_ops, sk, true);
   1358	} else {
   1359		tcph = (struct tcphdr *)(ip6h + 1);
   1360		skb_set_network_header(skb, (void *)ip6h - (void *)req);
   1361		oreq = inet_reqsk_alloc(&chtls_rsk_opsv6, sk, false);
   1362	}
   1363
   1364	if (!oreq)
   1365		goto reject;
   1366
   1367	oreq->rsk_rcv_wnd = 0;
   1368	oreq->rsk_window_clamp = 0;
   1369	oreq->syncookie = 0;
   1370	oreq->mss = 0;
   1371	oreq->ts_recent = 0;
   1372
   1373	tcp_rsk(oreq)->tfo_listener = false;
   1374	tcp_rsk(oreq)->rcv_isn = ntohl(tcph->seq);
   1375	chtls_set_req_port(oreq, tcph->source, tcph->dest);
   1376	if (iph->version == 0x4) {
   1377		chtls_set_req_addr(oreq, iph->daddr, iph->saddr);
   1378		ip_dsfield = ipv4_get_dsfield(iph);
   1379#if IS_ENABLED(CONFIG_IPV6)
   1380	} else {
   1381		inet_rsk(oreq)->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
   1382		inet_rsk(oreq)->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
   1383		ip_dsfield = ipv6_get_dsfield(ipv6_hdr(skb));
   1384#endif
   1385	}
   1386	if (req->tcpopt.wsf <= 14 &&
   1387	    sock_net(sk)->ipv4.sysctl_tcp_window_scaling) {
   1388		inet_rsk(oreq)->wscale_ok = 1;
   1389		inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf;
   1390	}
   1391	inet_rsk(oreq)->ir_iif = sk->sk_bound_dev_if;
   1392	th_ecn = tcph->ece && tcph->cwr;
   1393	if (th_ecn) {
   1394		ect = !INET_ECN_is_not_ect(ip_dsfield);
   1395		ecn_ok = sock_net(sk)->ipv4.sysctl_tcp_ecn;
   1396		if ((!ect && ecn_ok) || tcp_ca_needs_ecn(sk))
   1397			inet_rsk(oreq)->ecn_ok = 1;
   1398	}
   1399
   1400	newsk = chtls_recv_sock(sk, oreq, network_hdr, req, cdev);
   1401	if (!newsk)
   1402		goto reject;
   1403
   1404	if (chtls_get_module(newsk))
   1405		goto reject;
   1406	inet_csk_reqsk_queue_added(sk);
   1407	reply_skb->sk = newsk;
   1408	chtls_install_cpl_ops(newsk);
   1409	cxgb4_insert_tid(cdev->tids, newsk, tid, newsk->sk_family);
   1410	csk = rcu_dereference_sk_user_data(newsk);
   1411	listen_ctx = (struct listen_ctx *)lookup_stid(cdev->tids, stid);
   1412	csk->listen_ctx = listen_ctx;
   1413	__skb_queue_tail(&listen_ctx->synq, (struct sk_buff *)&csk->synq);
   1414	chtls_pass_accept_rpl(reply_skb, req, tid);
   1415	kfree_skb(skb);
   1416	return;
   1417
   1418reject:
   1419	mk_tid_release(reply_skb, 0, tid);
   1420	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
   1421	kfree_skb(skb);
   1422}
   1423
   1424/*
   1425 * Handle a CPL_PASS_ACCEPT_REQ message.
   1426 */
   1427static int chtls_pass_accept_req(struct chtls_dev *cdev, struct sk_buff *skb)
   1428{
   1429	struct cpl_pass_accept_req *req = cplhdr(skb) + RSS_HDR;
   1430	struct listen_ctx *ctx;
   1431	unsigned int stid;
   1432	unsigned int tid;
   1433	struct sock *lsk;
   1434	void *data;
   1435
   1436	stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
   1437	tid = GET_TID(req);
   1438
   1439	data = lookup_stid(cdev->tids, stid);
   1440	if (!data)
   1441		return 1;
   1442
   1443	ctx = (struct listen_ctx *)data;
   1444	lsk = ctx->lsk;
   1445
   1446	if (unlikely(tid_out_of_range(cdev->tids, tid))) {
   1447		pr_info("passive open TID %u too large\n", tid);
   1448		return 1;
   1449	}
   1450
   1451	BLOG_SKB_CB(skb)->cdev = cdev;
   1452	process_cpl_msg(chtls_pass_accept_request, lsk, skb);
   1453	return 0;
   1454}
   1455
   1456/*
   1457 * Completes some final bits of initialization for just established connections
   1458 * and changes their state to TCP_ESTABLISHED.
   1459 *
   1460 * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1.
   1461 */
   1462static void make_established(struct sock *sk, u32 snd_isn, unsigned int opt)
   1463{
   1464	struct tcp_sock *tp = tcp_sk(sk);
   1465
   1466	tp->pushed_seq = snd_isn;
   1467	tp->write_seq = snd_isn;
   1468	tp->snd_nxt = snd_isn;
   1469	tp->snd_una = snd_isn;
   1470	inet_sk(sk)->inet_id = prandom_u32();
   1471	assign_rxopt(sk, opt);
   1472
   1473	if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10))
   1474		tp->rcv_wup -= tp->rcv_wnd - (RCV_BUFSIZ_M << 10);
   1475
   1476	smp_mb();
   1477	tcp_set_state(sk, TCP_ESTABLISHED);
   1478}
   1479
   1480static void chtls_abort_conn(struct sock *sk, struct sk_buff *skb)
   1481{
   1482	struct sk_buff *abort_skb;
   1483
   1484	abort_skb = alloc_skb(sizeof(struct cpl_abort_req), GFP_ATOMIC);
   1485	if (abort_skb)
   1486		chtls_send_reset(sk, CPL_ABORT_SEND_RST, abort_skb);
   1487}
   1488
   1489static struct sock *reap_list;
   1490static DEFINE_SPINLOCK(reap_list_lock);
   1491
   1492/*
   1493 * Process the reap list.
   1494 */
   1495DECLARE_TASK_FUNC(process_reap_list, task_param)
   1496{
   1497	spin_lock_bh(&reap_list_lock);
   1498	while (reap_list) {
   1499		struct sock *sk = reap_list;
   1500		struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
   1501
   1502		reap_list = csk->passive_reap_next;
   1503		csk->passive_reap_next = NULL;
   1504		spin_unlock(&reap_list_lock);
   1505		sock_hold(sk);
   1506
   1507		bh_lock_sock(sk);
   1508		chtls_abort_conn(sk, NULL);
   1509		sock_orphan(sk);
   1510		if (sk->sk_state == TCP_CLOSE)
   1511			inet_csk_destroy_sock(sk);
   1512		bh_unlock_sock(sk);
   1513		sock_put(sk);
   1514		spin_lock(&reap_list_lock);
   1515	}
   1516	spin_unlock_bh(&reap_list_lock);
   1517}
   1518
   1519static DECLARE_WORK(reap_task, process_reap_list);
   1520
   1521static void add_to_reap_list(struct sock *sk)
   1522{
   1523	struct chtls_sock *csk = sk->sk_user_data;
   1524
   1525	local_bh_disable();
   1526	release_tcp_port(sk); /* release the port immediately */
   1527
   1528	spin_lock(&reap_list_lock);
   1529	csk->passive_reap_next = reap_list;
   1530	reap_list = sk;
   1531	if (!csk->passive_reap_next)
   1532		schedule_work(&reap_task);
   1533	spin_unlock(&reap_list_lock);
   1534	local_bh_enable();
   1535}
   1536
   1537static void add_pass_open_to_parent(struct sock *child, struct sock *lsk,
   1538				    struct chtls_dev *cdev)
   1539{
   1540	struct request_sock *oreq;
   1541	struct chtls_sock *csk;
   1542
   1543	if (lsk->sk_state != TCP_LISTEN)
   1544		return;
   1545
   1546	csk = child->sk_user_data;
   1547	oreq = csk->passive_reap_next;
   1548	csk->passive_reap_next = NULL;
   1549
   1550	reqsk_queue_removed(&inet_csk(lsk)->icsk_accept_queue, oreq);
   1551	__skb_unlink((struct sk_buff *)&csk->synq, &csk->listen_ctx->synq);
   1552
   1553	if (sk_acceptq_is_full(lsk)) {
   1554		chtls_reqsk_free(oreq);
   1555		add_to_reap_list(child);
   1556	} else {
   1557		refcount_set(&oreq->rsk_refcnt, 1);
   1558		inet_csk_reqsk_queue_add(lsk, oreq, child);
   1559		lsk->sk_data_ready(lsk);
   1560	}
   1561}
   1562
   1563static void bl_add_pass_open_to_parent(struct sock *lsk, struct sk_buff *skb)
   1564{
   1565	struct sock *child = skb->sk;
   1566
   1567	skb->sk = NULL;
   1568	add_pass_open_to_parent(child, lsk, BLOG_SKB_CB(skb)->cdev);
   1569	kfree_skb(skb);
   1570}
   1571
   1572static int chtls_pass_establish(struct chtls_dev *cdev, struct sk_buff *skb)
   1573{
   1574	struct cpl_pass_establish *req = cplhdr(skb) + RSS_HDR;
   1575	struct chtls_sock *csk;
   1576	struct sock *lsk, *sk;
   1577	unsigned int hwtid;
   1578
   1579	hwtid = GET_TID(req);
   1580	sk = lookup_tid(cdev->tids, hwtid);
   1581	if (!sk)
   1582		return (CPL_RET_UNKNOWN_TID | CPL_RET_BUF_DONE);
   1583
   1584	bh_lock_sock(sk);
   1585	if (unlikely(sock_owned_by_user(sk))) {
   1586		kfree_skb(skb);
   1587	} else {
   1588		unsigned int stid;
   1589		void *data;
   1590
   1591		csk = sk->sk_user_data;
   1592		csk->wr_max_credits = 64;
   1593		csk->wr_credits = 64;
   1594		csk->wr_unacked = 0;
   1595		make_established(sk, ntohl(req->snd_isn), ntohs(req->tcp_opt));
   1596		stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
   1597		sk->sk_state_change(sk);
   1598		if (unlikely(sk->sk_socket))
   1599			sk_wake_async(sk, 0, POLL_OUT);
   1600
   1601		data = lookup_stid(cdev->tids, stid);
   1602		if (!data) {
   1603			/* listening server close */
   1604			kfree_skb(skb);
   1605			goto unlock;
   1606		}
   1607		lsk = ((struct listen_ctx *)data)->lsk;
   1608
   1609		bh_lock_sock(lsk);
   1610		if (unlikely(skb_queue_empty(&csk->listen_ctx->synq))) {
   1611			/* removed from synq */
   1612			bh_unlock_sock(lsk);
   1613			kfree_skb(skb);
   1614			goto unlock;
   1615		}
   1616
   1617		if (likely(!sock_owned_by_user(lsk))) {
   1618			kfree_skb(skb);
   1619			add_pass_open_to_parent(sk, lsk, cdev);
   1620		} else {
   1621			skb->sk = sk;
   1622			BLOG_SKB_CB(skb)->cdev = cdev;
   1623			BLOG_SKB_CB(skb)->backlog_rcv =
   1624				bl_add_pass_open_to_parent;
   1625			__sk_add_backlog(lsk, skb);
   1626		}
   1627		bh_unlock_sock(lsk);
   1628	}
   1629unlock:
   1630	bh_unlock_sock(sk);
   1631	return 0;
   1632}
   1633
   1634/*
   1635 * Handle receipt of an urgent pointer.
   1636 */
   1637static void handle_urg_ptr(struct sock *sk, u32 urg_seq)
   1638{
   1639	struct tcp_sock *tp = tcp_sk(sk);
   1640
   1641	urg_seq--;
   1642	if (tp->urg_data && !after(urg_seq, tp->urg_seq))
   1643		return;	/* duplicate pointer */
   1644
   1645	sk_send_sigurg(sk);
   1646	if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
   1647	    !sock_flag(sk, SOCK_URGINLINE) &&
   1648	    tp->copied_seq != tp->rcv_nxt) {
   1649		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
   1650
   1651		tp->copied_seq++;
   1652		if (skb && tp->copied_seq - ULP_SKB_CB(skb)->seq >= skb->len)
   1653			chtls_free_skb(sk, skb);
   1654	}
   1655
   1656	tp->urg_data = TCP_URG_NOTYET;
   1657	tp->urg_seq = urg_seq;
   1658}
   1659
   1660static void check_sk_callbacks(struct chtls_sock *csk)
   1661{
   1662	struct sock *sk = csk->sk;
   1663
   1664	if (unlikely(sk->sk_user_data &&
   1665		     !csk_flag_nochk(csk, CSK_CALLBACKS_CHKD)))
   1666		csk_set_flag(csk, CSK_CALLBACKS_CHKD);
   1667}
   1668
   1669/*
   1670 * Handles Rx data that arrives in a state where the socket isn't accepting
   1671 * new data.
   1672 */
   1673static void handle_excess_rx(struct sock *sk, struct sk_buff *skb)
   1674{
   1675	if (!csk_flag(sk, CSK_ABORT_SHUTDOWN))
   1676		chtls_abort_conn(sk, skb);
   1677
   1678	kfree_skb(skb);
   1679}
   1680
   1681static void chtls_recv_data(struct sock *sk, struct sk_buff *skb)
   1682{
   1683	struct cpl_rx_data *hdr = cplhdr(skb) + RSS_HDR;
   1684	struct chtls_sock *csk;
   1685	struct tcp_sock *tp;
   1686
   1687	csk = rcu_dereference_sk_user_data(sk);
   1688	tp = tcp_sk(sk);
   1689
   1690	if (unlikely(sk->sk_shutdown & RCV_SHUTDOWN)) {
   1691		handle_excess_rx(sk, skb);
   1692		return;
   1693	}
   1694
   1695	ULP_SKB_CB(skb)->seq = ntohl(hdr->seq);
   1696	ULP_SKB_CB(skb)->psh = hdr->psh;
   1697	skb_ulp_mode(skb) = ULP_MODE_NONE;
   1698
   1699	skb_reset_transport_header(skb);
   1700	__skb_pull(skb, sizeof(*hdr) + RSS_HDR);
   1701	if (!skb->data_len)
   1702		__skb_trim(skb, ntohs(hdr->len));
   1703
   1704	if (unlikely(hdr->urg))
   1705		handle_urg_ptr(sk, tp->rcv_nxt + ntohs(hdr->urg));
   1706	if (unlikely(tp->urg_data == TCP_URG_NOTYET &&
   1707		     tp->urg_seq - tp->rcv_nxt < skb->len))
   1708		tp->urg_data = TCP_URG_VALID |
   1709			       skb->data[tp->urg_seq - tp->rcv_nxt];
   1710
   1711	if (unlikely(hdr->dack_mode != csk->delack_mode)) {
   1712		csk->delack_mode = hdr->dack_mode;
   1713		csk->delack_seq = tp->rcv_nxt;
   1714	}
   1715
   1716	tcp_hdr(skb)->fin = 0;
   1717	tp->rcv_nxt += skb->len;
   1718
   1719	__skb_queue_tail(&sk->sk_receive_queue, skb);
   1720
   1721	if (!sock_flag(sk, SOCK_DEAD)) {
   1722		check_sk_callbacks(csk);
   1723		sk->sk_data_ready(sk);
   1724	}
   1725}
   1726
   1727static int chtls_rx_data(struct chtls_dev *cdev, struct sk_buff *skb)
   1728{
   1729	struct cpl_rx_data *req = cplhdr(skb) + RSS_HDR;
   1730	unsigned int hwtid = GET_TID(req);
   1731	struct sock *sk;
   1732
   1733	sk = lookup_tid(cdev->tids, hwtid);
   1734	if (unlikely(!sk)) {
   1735		pr_err("can't find conn. for hwtid %u.\n", hwtid);
   1736		return -EINVAL;
   1737	}
   1738	skb_dst_set(skb, NULL);
   1739	process_cpl_msg(chtls_recv_data, sk, skb);
   1740	return 0;
   1741}
   1742
   1743static void chtls_recv_pdu(struct sock *sk, struct sk_buff *skb)
   1744{
   1745	struct cpl_tls_data *hdr = cplhdr(skb);
   1746	struct chtls_sock *csk;
   1747	struct chtls_hws *tlsk;
   1748	struct tcp_sock *tp;
   1749
   1750	csk = rcu_dereference_sk_user_data(sk);
   1751	tlsk = &csk->tlshws;
   1752	tp = tcp_sk(sk);
   1753
   1754	if (unlikely(sk->sk_shutdown & RCV_SHUTDOWN)) {
   1755		handle_excess_rx(sk, skb);
   1756		return;
   1757	}
   1758
   1759	ULP_SKB_CB(skb)->seq = ntohl(hdr->seq);
   1760	ULP_SKB_CB(skb)->flags = 0;
   1761	skb_ulp_mode(skb) = ULP_MODE_TLS;
   1762
   1763	skb_reset_transport_header(skb);
   1764	__skb_pull(skb, sizeof(*hdr));
   1765	if (!skb->data_len)
   1766		__skb_trim(skb,
   1767			   CPL_TLS_DATA_LENGTH_G(ntohl(hdr->length_pkd)));
   1768
   1769	if (unlikely(tp->urg_data == TCP_URG_NOTYET && tp->urg_seq -
   1770		     tp->rcv_nxt < skb->len))
   1771		tp->urg_data = TCP_URG_VALID |
   1772			       skb->data[tp->urg_seq - tp->rcv_nxt];
   1773
   1774	tcp_hdr(skb)->fin = 0;
   1775	tlsk->pldlen = CPL_TLS_DATA_LENGTH_G(ntohl(hdr->length_pkd));
   1776	__skb_queue_tail(&tlsk->sk_recv_queue, skb);
   1777}
   1778
   1779static int chtls_rx_pdu(struct chtls_dev *cdev, struct sk_buff *skb)
   1780{
   1781	struct cpl_tls_data *req = cplhdr(skb);
   1782	unsigned int hwtid = GET_TID(req);
   1783	struct sock *sk;
   1784
   1785	sk = lookup_tid(cdev->tids, hwtid);
   1786	if (unlikely(!sk)) {
   1787		pr_err("can't find conn. for hwtid %u.\n", hwtid);
   1788		return -EINVAL;
   1789	}
   1790	skb_dst_set(skb, NULL);
   1791	process_cpl_msg(chtls_recv_pdu, sk, skb);
   1792	return 0;
   1793}
   1794
   1795static void chtls_set_hdrlen(struct sk_buff *skb, unsigned int nlen)
   1796{
   1797	struct tlsrx_cmp_hdr *tls_cmp_hdr = cplhdr(skb);
   1798
   1799	skb->hdr_len = ntohs((__force __be16)tls_cmp_hdr->length);
   1800	tls_cmp_hdr->length = ntohs((__force __be16)nlen);
   1801}
   1802
   1803static void chtls_rx_hdr(struct sock *sk, struct sk_buff *skb)
   1804{
   1805	struct tlsrx_cmp_hdr *tls_hdr_pkt;
   1806	struct cpl_rx_tls_cmp *cmp_cpl;
   1807	struct sk_buff *skb_rec;
   1808	struct chtls_sock *csk;
   1809	struct chtls_hws *tlsk;
   1810	struct tcp_sock *tp;
   1811
   1812	cmp_cpl = cplhdr(skb);
   1813	csk = rcu_dereference_sk_user_data(sk);
   1814	tlsk = &csk->tlshws;
   1815	tp = tcp_sk(sk);
   1816
   1817	ULP_SKB_CB(skb)->seq = ntohl(cmp_cpl->seq);
   1818	ULP_SKB_CB(skb)->flags = 0;
   1819
   1820	skb_reset_transport_header(skb);
   1821	__skb_pull(skb, sizeof(*cmp_cpl));
   1822	tls_hdr_pkt = (struct tlsrx_cmp_hdr *)skb->data;
   1823	if (tls_hdr_pkt->res_to_mac_error & TLSRX_HDR_PKT_ERROR_M)
   1824		tls_hdr_pkt->type = CONTENT_TYPE_ERROR;
   1825	if (!skb->data_len)
   1826		__skb_trim(skb, TLS_HEADER_LENGTH);
   1827
   1828	tp->rcv_nxt +=
   1829		CPL_RX_TLS_CMP_PDULENGTH_G(ntohl(cmp_cpl->pdulength_length));
   1830
   1831	ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_TLS_HDR;
   1832	skb_rec = __skb_dequeue(&tlsk->sk_recv_queue);
   1833	if (!skb_rec) {
   1834		__skb_queue_tail(&sk->sk_receive_queue, skb);
   1835	} else {
   1836		chtls_set_hdrlen(skb, tlsk->pldlen);
   1837		tlsk->pldlen = 0;
   1838		__skb_queue_tail(&sk->sk_receive_queue, skb);
   1839		__skb_queue_tail(&sk->sk_receive_queue, skb_rec);
   1840	}
   1841
   1842	if (!sock_flag(sk, SOCK_DEAD)) {
   1843		check_sk_callbacks(csk);
   1844		sk->sk_data_ready(sk);
   1845	}
   1846}
   1847
   1848static int chtls_rx_cmp(struct chtls_dev *cdev, struct sk_buff *skb)
   1849{
   1850	struct cpl_rx_tls_cmp *req = cplhdr(skb);
   1851	unsigned int hwtid = GET_TID(req);
   1852	struct sock *sk;
   1853
   1854	sk = lookup_tid(cdev->tids, hwtid);
   1855	if (unlikely(!sk)) {
   1856		pr_err("can't find conn. for hwtid %u.\n", hwtid);
   1857		return -EINVAL;
   1858	}
   1859	skb_dst_set(skb, NULL);
   1860	process_cpl_msg(chtls_rx_hdr, sk, skb);
   1861
   1862	return 0;
   1863}
   1864
   1865static void chtls_timewait(struct sock *sk)
   1866{
   1867	struct tcp_sock *tp = tcp_sk(sk);
   1868
   1869	tp->rcv_nxt++;
   1870	tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
   1871	tp->srtt_us = 0;
   1872	tcp_time_wait(sk, TCP_TIME_WAIT, 0);
   1873}
   1874
   1875static void chtls_peer_close(struct sock *sk, struct sk_buff *skb)
   1876{
   1877	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
   1878
   1879	if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
   1880		goto out;
   1881
   1882	sk->sk_shutdown |= RCV_SHUTDOWN;
   1883	sock_set_flag(sk, SOCK_DONE);
   1884
   1885	switch (sk->sk_state) {
   1886	case TCP_SYN_RECV:
   1887	case TCP_ESTABLISHED:
   1888		tcp_set_state(sk, TCP_CLOSE_WAIT);
   1889		break;
   1890	case TCP_FIN_WAIT1:
   1891		tcp_set_state(sk, TCP_CLOSING);
   1892		break;
   1893	case TCP_FIN_WAIT2:
   1894		chtls_release_resources(sk);
   1895		if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
   1896			chtls_conn_done(sk);
   1897		else
   1898			chtls_timewait(sk);
   1899		break;
   1900	default:
   1901		pr_info("cpl_peer_close in bad state %d\n", sk->sk_state);
   1902	}
   1903
   1904	if (!sock_flag(sk, SOCK_DEAD)) {
   1905		sk->sk_state_change(sk);
   1906		/* Do not send POLL_HUP for half duplex close. */
   1907
   1908		if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
   1909		    sk->sk_state == TCP_CLOSE)
   1910			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
   1911		else
   1912			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
   1913	}
   1914out:
   1915	kfree_skb(skb);
   1916}
   1917
   1918static void chtls_close_con_rpl(struct sock *sk, struct sk_buff *skb)
   1919{
   1920	struct cpl_close_con_rpl *rpl = cplhdr(skb) + RSS_HDR;
   1921	struct chtls_sock *csk;
   1922	struct tcp_sock *tp;
   1923
   1924	csk = rcu_dereference_sk_user_data(sk);
   1925
   1926	if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
   1927		goto out;
   1928
   1929	tp = tcp_sk(sk);
   1930
   1931	tp->snd_una = ntohl(rpl->snd_nxt) - 1;  /* exclude FIN */
   1932
   1933	switch (sk->sk_state) {
   1934	case TCP_CLOSING:
   1935		chtls_release_resources(sk);
   1936		if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
   1937			chtls_conn_done(sk);
   1938		else
   1939			chtls_timewait(sk);
   1940		break;
   1941	case TCP_LAST_ACK:
   1942		chtls_release_resources(sk);
   1943		chtls_conn_done(sk);
   1944		break;
   1945	case TCP_FIN_WAIT1:
   1946		tcp_set_state(sk, TCP_FIN_WAIT2);
   1947		sk->sk_shutdown |= SEND_SHUTDOWN;
   1948
   1949		if (!sock_flag(sk, SOCK_DEAD))
   1950			sk->sk_state_change(sk);
   1951		else if (tcp_sk(sk)->linger2 < 0 &&
   1952			 !csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN))
   1953			chtls_abort_conn(sk, skb);
   1954		else if (csk_flag_nochk(csk, CSK_TX_DATA_SENT))
   1955			chtls_set_quiesce_ctrl(sk, 0);
   1956		break;
   1957	default:
   1958		pr_info("close_con_rpl in bad state %d\n", sk->sk_state);
   1959	}
   1960out:
   1961	kfree_skb(skb);
   1962}
   1963
   1964static struct sk_buff *get_cpl_skb(struct sk_buff *skb,
   1965				   size_t len, gfp_t gfp)
   1966{
   1967	if (likely(!skb_is_nonlinear(skb) && !skb_cloned(skb))) {
   1968		WARN_ONCE(skb->len < len, "skb alloc error");
   1969		__skb_trim(skb, len);
   1970		skb_get(skb);
   1971	} else {
   1972		skb = alloc_skb(len, gfp);
   1973		if (skb)
   1974			__skb_put(skb, len);
   1975	}
   1976	return skb;
   1977}
   1978
   1979static void set_abort_rpl_wr(struct sk_buff *skb, unsigned int tid,
   1980			     int cmd)
   1981{
   1982	struct cpl_abort_rpl *rpl = cplhdr(skb);
   1983
   1984	INIT_TP_WR_CPL(rpl, CPL_ABORT_RPL, tid);
   1985	rpl->cmd = cmd;
   1986}
   1987
   1988static void send_defer_abort_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
   1989{
   1990	struct cpl_abort_req_rss *req = cplhdr(skb);
   1991	struct sk_buff *reply_skb;
   1992
   1993	reply_skb = alloc_skb(sizeof(struct cpl_abort_rpl),
   1994			      GFP_KERNEL | __GFP_NOFAIL);
   1995	__skb_put(reply_skb, sizeof(struct cpl_abort_rpl));
   1996	set_abort_rpl_wr(reply_skb, GET_TID(req),
   1997			 (req->status & CPL_ABORT_NO_RST));
   1998	set_wr_txq(reply_skb, CPL_PRIORITY_DATA, req->status >> 1);
   1999	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
   2000	kfree_skb(skb);
   2001}
   2002
   2003/*
   2004 * Add an skb to the deferred skb queue for processing from process context.
   2005 */
   2006static void t4_defer_reply(struct sk_buff *skb, struct chtls_dev *cdev,
   2007			   defer_handler_t handler)
   2008{
   2009	DEFERRED_SKB_CB(skb)->handler = handler;
   2010	spin_lock_bh(&cdev->deferq.lock);
   2011	__skb_queue_tail(&cdev->deferq, skb);
   2012	if (skb_queue_len(&cdev->deferq) == 1)
   2013		schedule_work(&cdev->deferq_task);
   2014	spin_unlock_bh(&cdev->deferq.lock);
   2015}
   2016
   2017static void chtls_send_abort_rpl(struct sock *sk, struct sk_buff *skb,
   2018				 struct chtls_dev *cdev,
   2019				 int status, int queue)
   2020{
   2021	struct cpl_abort_req_rss *req = cplhdr(skb) + RSS_HDR;
   2022	struct sk_buff *reply_skb;
   2023	struct chtls_sock *csk;
   2024	unsigned int tid;
   2025
   2026	csk = rcu_dereference_sk_user_data(sk);
   2027	tid = GET_TID(req);
   2028
   2029	reply_skb = get_cpl_skb(skb, sizeof(struct cpl_abort_rpl), gfp_any());
   2030	if (!reply_skb) {
   2031		req->status = (queue << 1) | status;
   2032		t4_defer_reply(skb, cdev, send_defer_abort_rpl);
   2033		return;
   2034	}
   2035
   2036	set_abort_rpl_wr(reply_skb, tid, status);
   2037	kfree_skb(skb);
   2038	set_wr_txq(reply_skb, CPL_PRIORITY_DATA, queue);
   2039	if (csk_conn_inline(csk)) {
   2040		struct l2t_entry *e = csk->l2t_entry;
   2041
   2042		if (e && sk->sk_state != TCP_SYN_RECV) {
   2043			cxgb4_l2t_send(csk->egress_dev, reply_skb, e);
   2044			return;
   2045		}
   2046	}
   2047	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
   2048}
   2049
   2050/*
   2051 * This is run from a listener's backlog to abort a child connection in
   2052 * SYN_RCV state (i.e., one on the listener's SYN queue).
   2053 */
   2054static void bl_abort_syn_rcv(struct sock *lsk, struct sk_buff *skb)
   2055{
   2056	struct chtls_sock *csk;
   2057	struct sock *child;
   2058	int queue;
   2059
   2060	child = skb->sk;
   2061	csk = rcu_dereference_sk_user_data(child);
   2062	queue = csk->txq_idx;
   2063
   2064	skb->sk	= NULL;
   2065	chtls_send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev,
   2066			     CPL_ABORT_NO_RST, queue);
   2067	do_abort_syn_rcv(child, lsk);
   2068}
   2069
   2070static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb)
   2071{
   2072	const struct request_sock *oreq;
   2073	struct listen_ctx *listen_ctx;
   2074	struct chtls_sock *csk;
   2075	struct chtls_dev *cdev;
   2076	struct sock *psk;
   2077	void *ctx;
   2078
   2079	csk = sk->sk_user_data;
   2080	oreq = csk->passive_reap_next;
   2081	cdev = csk->cdev;
   2082
   2083	if (!oreq)
   2084		return -1;
   2085
   2086	ctx = lookup_stid(cdev->tids, oreq->ts_recent);
   2087	if (!ctx)
   2088		return -1;
   2089
   2090	listen_ctx = (struct listen_ctx *)ctx;
   2091	psk = listen_ctx->lsk;
   2092
   2093	bh_lock_sock(psk);
   2094	if (!sock_owned_by_user(psk)) {
   2095		int queue = csk->txq_idx;
   2096
   2097		chtls_send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue);
   2098		do_abort_syn_rcv(sk, psk);
   2099	} else {
   2100		skb->sk = sk;
   2101		BLOG_SKB_CB(skb)->backlog_rcv = bl_abort_syn_rcv;
   2102		__sk_add_backlog(psk, skb);
   2103	}
   2104	bh_unlock_sock(psk);
   2105	return 0;
   2106}
   2107
   2108static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb)
   2109{
   2110	const struct cpl_abort_req_rss *req = cplhdr(skb) + RSS_HDR;
   2111	struct chtls_sock *csk = sk->sk_user_data;
   2112	int rst_status = CPL_ABORT_NO_RST;
   2113	int queue = csk->txq_idx;
   2114
   2115	if (is_neg_adv(req->status)) {
   2116		kfree_skb(skb);
   2117		return;
   2118	}
   2119
   2120	csk_reset_flag(csk, CSK_ABORT_REQ_RCVD);
   2121
   2122	if (!csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN) &&
   2123	    !csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
   2124		struct tcp_sock *tp = tcp_sk(sk);
   2125
   2126		if (send_tx_flowc_wr(sk, 0, tp->snd_nxt, tp->rcv_nxt) < 0)
   2127			WARN_ONCE(1, "send_tx_flowc error");
   2128		csk_set_flag(csk, CSK_TX_DATA_SENT);
   2129	}
   2130
   2131	csk_set_flag(csk, CSK_ABORT_SHUTDOWN);
   2132
   2133	if (!csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING)) {
   2134		sk->sk_err = ETIMEDOUT;
   2135
   2136		if (!sock_flag(sk, SOCK_DEAD))
   2137			sk_error_report(sk);
   2138
   2139		if (sk->sk_state == TCP_SYN_RECV && !abort_syn_rcv(sk, skb))
   2140			return;
   2141
   2142	}
   2143
   2144	chtls_send_abort_rpl(sk, skb, BLOG_SKB_CB(skb)->cdev,
   2145			     rst_status, queue);
   2146	chtls_release_resources(sk);
   2147	chtls_conn_done(sk);
   2148}
   2149
   2150static void chtls_abort_rpl_rss(struct sock *sk, struct sk_buff *skb)
   2151{
   2152	struct cpl_abort_rpl_rss *rpl = cplhdr(skb) + RSS_HDR;
   2153	struct chtls_sock *csk;
   2154	struct chtls_dev *cdev;
   2155
   2156	csk = rcu_dereference_sk_user_data(sk);
   2157	cdev = csk->cdev;
   2158
   2159	if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING)) {
   2160		csk_reset_flag(csk, CSK_ABORT_RPL_PENDING);
   2161		if (!csk_flag_nochk(csk, CSK_ABORT_REQ_RCVD)) {
   2162			if (sk->sk_state == TCP_SYN_SENT) {
   2163				cxgb4_remove_tid(cdev->tids,
   2164						 csk->port_id,
   2165						 GET_TID(rpl),
   2166						 sk->sk_family);
   2167				sock_put(sk);
   2168			}
   2169			chtls_release_resources(sk);
   2170			chtls_conn_done(sk);
   2171		}
   2172	}
   2173	kfree_skb(skb);
   2174}
   2175
   2176static int chtls_conn_cpl(struct chtls_dev *cdev, struct sk_buff *skb)
   2177{
   2178	struct cpl_peer_close *req = cplhdr(skb) + RSS_HDR;
   2179	void (*fn)(struct sock *sk, struct sk_buff *skb);
   2180	unsigned int hwtid = GET_TID(req);
   2181	struct chtls_sock *csk;
   2182	struct sock *sk;
   2183	u8 opcode;
   2184
   2185	opcode = ((const struct rss_header *)cplhdr(skb))->opcode;
   2186
   2187	sk = lookup_tid(cdev->tids, hwtid);
   2188	if (!sk)
   2189		goto rel_skb;
   2190
   2191	csk = sk->sk_user_data;
   2192
   2193	switch (opcode) {
   2194	case CPL_PEER_CLOSE:
   2195		fn = chtls_peer_close;
   2196		break;
   2197	case CPL_CLOSE_CON_RPL:
   2198		fn = chtls_close_con_rpl;
   2199		break;
   2200	case CPL_ABORT_REQ_RSS:
   2201		/*
   2202		 * Save the offload device in the skb, we may process this
   2203		 * message after the socket has closed.
   2204		 */
   2205		BLOG_SKB_CB(skb)->cdev = csk->cdev;
   2206		fn = chtls_abort_req_rss;
   2207		break;
   2208	case CPL_ABORT_RPL_RSS:
   2209		fn = chtls_abort_rpl_rss;
   2210		break;
   2211	default:
   2212		goto rel_skb;
   2213	}
   2214
   2215	process_cpl_msg(fn, sk, skb);
   2216	return 0;
   2217
   2218rel_skb:
   2219	kfree_skb(skb);
   2220	return 0;
   2221}
   2222
   2223static void chtls_rx_ack(struct sock *sk, struct sk_buff *skb)
   2224{
   2225	struct cpl_fw4_ack *hdr = cplhdr(skb) + RSS_HDR;
   2226	struct chtls_sock *csk = sk->sk_user_data;
   2227	struct tcp_sock *tp = tcp_sk(sk);
   2228	u32 credits = hdr->credits;
   2229	u32 snd_una;
   2230
   2231	snd_una = ntohl(hdr->snd_una);
   2232	csk->wr_credits += credits;
   2233
   2234	if (csk->wr_unacked > csk->wr_max_credits - csk->wr_credits)
   2235		csk->wr_unacked = csk->wr_max_credits - csk->wr_credits;
   2236
   2237	while (credits) {
   2238		struct sk_buff *pskb = csk->wr_skb_head;
   2239		u32 csum;
   2240
   2241		if (unlikely(!pskb)) {
   2242			if (csk->wr_nondata)
   2243				csk->wr_nondata -= credits;
   2244			break;
   2245		}
   2246		csum = (__force u32)pskb->csum;
   2247		if (unlikely(credits < csum)) {
   2248			pskb->csum = (__force __wsum)(csum - credits);
   2249			break;
   2250		}
   2251		dequeue_wr(sk);
   2252		credits -= csum;
   2253		kfree_skb(pskb);
   2254	}
   2255	if (hdr->seq_vld & CPL_FW4_ACK_FLAGS_SEQVAL) {
   2256		if (unlikely(before(snd_una, tp->snd_una))) {
   2257			kfree_skb(skb);
   2258			return;
   2259		}
   2260
   2261		if (tp->snd_una != snd_una) {
   2262			tp->snd_una = snd_una;
   2263			tp->rcv_tstamp = tcp_time_stamp(tp);
   2264			if (tp->snd_una == tp->snd_nxt &&
   2265			    !csk_flag_nochk(csk, CSK_TX_FAILOVER))
   2266				csk_reset_flag(csk, CSK_TX_WAIT_IDLE);
   2267		}
   2268	}
   2269
   2270	if (hdr->seq_vld & CPL_FW4_ACK_FLAGS_CH) {
   2271		unsigned int fclen16 = roundup(failover_flowc_wr_len, 16);
   2272
   2273		csk->wr_credits -= fclen16;
   2274		csk_reset_flag(csk, CSK_TX_WAIT_IDLE);
   2275		csk_reset_flag(csk, CSK_TX_FAILOVER);
   2276	}
   2277	if (skb_queue_len(&csk->txq) && chtls_push_frames(csk, 0))
   2278		sk->sk_write_space(sk);
   2279
   2280	kfree_skb(skb);
   2281}
   2282
   2283static int chtls_wr_ack(struct chtls_dev *cdev, struct sk_buff *skb)
   2284{
   2285	struct cpl_fw4_ack *rpl = cplhdr(skb) + RSS_HDR;
   2286	unsigned int hwtid = GET_TID(rpl);
   2287	struct sock *sk;
   2288
   2289	sk = lookup_tid(cdev->tids, hwtid);
   2290	if (unlikely(!sk)) {
   2291		pr_err("can't find conn. for hwtid %u.\n", hwtid);
   2292		return -EINVAL;
   2293	}
   2294	process_cpl_msg(chtls_rx_ack, sk, skb);
   2295
   2296	return 0;
   2297}
   2298
   2299static int chtls_set_tcb_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
   2300{
   2301	struct cpl_set_tcb_rpl *rpl = cplhdr(skb) + RSS_HDR;
   2302	unsigned int hwtid = GET_TID(rpl);
   2303	struct sock *sk;
   2304
   2305	sk = lookup_tid(cdev->tids, hwtid);
   2306
   2307	/* return EINVAL if socket doesn't exist */
   2308	if (!sk)
   2309		return -EINVAL;
   2310
   2311	/* Reusing the skb as size of cpl_set_tcb_field structure
   2312	 * is greater than cpl_abort_req
   2313	 */
   2314	if (TCB_COOKIE_G(rpl->cookie) == TCB_FIELD_COOKIE_TFLAG)
   2315		chtls_send_abort(sk, CPL_ABORT_SEND_RST, NULL);
   2316
   2317	kfree_skb(skb);
   2318	return 0;
   2319}
   2320
   2321chtls_handler_func chtls_handlers[NUM_CPL_CMDS] = {
   2322	[CPL_PASS_OPEN_RPL]     = chtls_pass_open_rpl,
   2323	[CPL_CLOSE_LISTSRV_RPL] = chtls_close_listsrv_rpl,
   2324	[CPL_PASS_ACCEPT_REQ]   = chtls_pass_accept_req,
   2325	[CPL_PASS_ESTABLISH]    = chtls_pass_establish,
   2326	[CPL_RX_DATA]           = chtls_rx_data,
   2327	[CPL_TLS_DATA]          = chtls_rx_pdu,
   2328	[CPL_RX_TLS_CMP]        = chtls_rx_cmp,
   2329	[CPL_PEER_CLOSE]        = chtls_conn_cpl,
   2330	[CPL_CLOSE_CON_RPL]     = chtls_conn_cpl,
   2331	[CPL_ABORT_REQ_RSS]     = chtls_conn_cpl,
   2332	[CPL_ABORT_RPL_RSS]     = chtls_conn_cpl,
   2333	[CPL_FW4_ACK]		= chtls_wr_ack,
   2334	[CPL_SET_TCB_RPL]	= chtls_set_tcb_rpl,
   2335};