cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

chtls_io.c (45828B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (c) 2018 Chelsio Communications, Inc.
      4 *
      5 * Written by: Atul Gupta (atul.gupta@chelsio.com)
      6 */
      7
      8#include <linux/module.h>
      9#include <linux/list.h>
     10#include <linux/workqueue.h>
     11#include <linux/skbuff.h>
     12#include <linux/timer.h>
     13#include <linux/notifier.h>
     14#include <linux/inetdevice.h>
     15#include <linux/ip.h>
     16#include <linux/tcp.h>
     17#include <linux/sched/signal.h>
     18#include <net/tcp.h>
     19#include <net/busy_poll.h>
     20#include <crypto/aes.h>
     21
     22#include "chtls.h"
     23#include "chtls_cm.h"
     24
     25static bool is_tls_tx(struct chtls_sock *csk)
     26{
     27	return csk->tlshws.txkey >= 0;
     28}
     29
     30static bool is_tls_rx(struct chtls_sock *csk)
     31{
     32	return csk->tlshws.rxkey >= 0;
     33}
     34
     35static int data_sgl_len(const struct sk_buff *skb)
     36{
     37	unsigned int cnt;
     38
     39	cnt = skb_shinfo(skb)->nr_frags;
     40	return sgl_len(cnt) * 8;
     41}
     42
     43static int nos_ivs(struct sock *sk, unsigned int size)
     44{
     45	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
     46
     47	return DIV_ROUND_UP(size, csk->tlshws.mfs);
     48}
     49
     50static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb)
     51{
     52	int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE;
     53	int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb);
     54
     55	if ((hlen + KEY_ON_MEM_SZ + ivs_size) <
     56	    MAX_IMM_OFLD_TX_DATA_WR_LEN) {
     57		ULP_SKB_CB(skb)->ulp.tls.iv = 1;
     58		return 1;
     59	}
     60	ULP_SKB_CB(skb)->ulp.tls.iv = 0;
     61	return 0;
     62}
     63
     64static int max_ivs_size(struct sock *sk, int size)
     65{
     66	return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE;
     67}
     68
     69static int ivs_size(struct sock *sk, const struct sk_buff *skb)
     70{
     71	return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) *
     72		 CIPHER_BLOCK_SIZE) : 0;
     73}
     74
     75static int flowc_wr_credits(int nparams, int *flowclenp)
     76{
     77	int flowclen16, flowclen;
     78
     79	flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
     80	flowclen16 = DIV_ROUND_UP(flowclen, 16);
     81	flowclen = flowclen16 * 16;
     82
     83	if (flowclenp)
     84		*flowclenp = flowclen;
     85
     86	return flowclen16;
     87}
     88
     89static struct sk_buff *create_flowc_wr_skb(struct sock *sk,
     90					   struct fw_flowc_wr *flowc,
     91					   int flowclen)
     92{
     93	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
     94	struct sk_buff *skb;
     95
     96	skb = alloc_skb(flowclen, GFP_ATOMIC);
     97	if (!skb)
     98		return NULL;
     99
    100	__skb_put_data(skb, flowc, flowclen);
    101	skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
    102
    103	return skb;
    104}
    105
    106static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc,
    107			 int flowclen)
    108{
    109	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
    110	struct tcp_sock *tp = tcp_sk(sk);
    111	struct sk_buff *skb;
    112	int flowclen16;
    113	int ret;
    114
    115	flowclen16 = flowclen / 16;
    116
    117	if (csk_flag(sk, CSK_TX_DATA_SENT)) {
    118		skb = create_flowc_wr_skb(sk, flowc, flowclen);
    119		if (!skb)
    120			return -ENOMEM;
    121
    122		skb_entail(sk, skb,
    123			   ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
    124		return 0;
    125	}
    126
    127	ret = cxgb4_immdata_send(csk->egress_dev,
    128				 csk->txq_idx,
    129				 flowc, flowclen);
    130	if (!ret)
    131		return flowclen16;
    132	skb = create_flowc_wr_skb(sk, flowc, flowclen);
    133	if (!skb)
    134		return -ENOMEM;
    135	send_or_defer(sk, tp, skb, 0);
    136	return flowclen16;
    137}
    138
    139static u8 tcp_state_to_flowc_state(u8 state)
    140{
    141	switch (state) {
    142	case TCP_ESTABLISHED:
    143		return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
    144	case TCP_CLOSE_WAIT:
    145		return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT;
    146	case TCP_FIN_WAIT1:
    147		return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1;
    148	case TCP_CLOSING:
    149		return FW_FLOWC_MNEM_TCPSTATE_CLOSING;
    150	case TCP_LAST_ACK:
    151		return FW_FLOWC_MNEM_TCPSTATE_LASTACK;
    152	case TCP_FIN_WAIT2:
    153		return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2;
    154	}
    155
    156	return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
    157}
    158
    159int send_tx_flowc_wr(struct sock *sk, int compl,
    160		     u32 snd_nxt, u32 rcv_nxt)
    161{
    162	struct flowc_packed {
    163		struct fw_flowc_wr fc;
    164		struct fw_flowc_mnemval mnemval[FW_FLOWC_MNEM_MAX];
    165	} __packed sflowc;
    166	int nparams, paramidx, flowclen16, flowclen;
    167	struct fw_flowc_wr *flowc;
    168	struct chtls_sock *csk;
    169	struct tcp_sock *tp;
    170
    171	csk = rcu_dereference_sk_user_data(sk);
    172	tp = tcp_sk(sk);
    173	memset(&sflowc, 0, sizeof(sflowc));
    174	flowc = &sflowc.fc;
    175
    176#define FLOWC_PARAM(__m, __v) \
    177	do { \
    178		flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \
    179		flowc->mnemval[paramidx].val = cpu_to_be32(__v); \
    180		paramidx++; \
    181	} while (0)
    182
    183	paramidx = 0;
    184
    185	FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf));
    186	FLOWC_PARAM(CH, csk->tx_chan);
    187	FLOWC_PARAM(PORT, csk->tx_chan);
    188	FLOWC_PARAM(IQID, csk->rss_qid);
    189	FLOWC_PARAM(SNDNXT, tp->snd_nxt);
    190	FLOWC_PARAM(RCVNXT, tp->rcv_nxt);
    191	FLOWC_PARAM(SNDBUF, csk->sndbuf);
    192	FLOWC_PARAM(MSS, tp->mss_cache);
    193	FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state));
    194
    195	if (SND_WSCALE(tp))
    196		FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp));
    197
    198	if (csk->ulp_mode == ULP_MODE_TLS)
    199		FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS);
    200
    201	if (csk->tlshws.fcplenmax)
    202		FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax);
    203
    204	nparams = paramidx;
    205#undef FLOWC_PARAM
    206
    207	flowclen16 = flowc_wr_credits(nparams, &flowclen);
    208	flowc->op_to_nparams =
    209		cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
    210			    FW_WR_COMPL_V(compl) |
    211			    FW_FLOWC_WR_NPARAMS_V(nparams));
    212	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
    213					  FW_WR_FLOWID_V(csk->tid));
    214
    215	return send_flowc_wr(sk, flowc, flowclen);
    216}
    217
    218/* Copy IVs to WR */
    219static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb)
    220
    221{
    222	struct chtls_sock *csk;
    223	unsigned char *iv_loc;
    224	struct chtls_hws *hws;
    225	unsigned char *ivs;
    226	u16 number_of_ivs;
    227	struct page *page;
    228	int err = 0;
    229
    230	csk = rcu_dereference_sk_user_data(sk);
    231	hws = &csk->tlshws;
    232	number_of_ivs = nos_ivs(sk, skb->len);
    233
    234	if (number_of_ivs > MAX_IVS_PAGE) {
    235		pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs);
    236		return -ENOMEM;
    237	}
    238
    239	/* generate the  IVs */
    240	ivs = kmalloc_array(CIPHER_BLOCK_SIZE, number_of_ivs, GFP_ATOMIC);
    241	if (!ivs)
    242		return -ENOMEM;
    243	get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
    244
    245	if (skb_ulp_tls_iv_imm(skb)) {
    246		/* send the IVs as immediate data in the WR */
    247		iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs *
    248						CIPHER_BLOCK_SIZE);
    249		if (iv_loc)
    250			memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
    251
    252		hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE;
    253	} else {
    254		/* Send the IVs as sgls */
    255		/* Already accounted IV DSGL for credits */
    256		skb_shinfo(skb)->nr_frags--;
    257		page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0);
    258		if (!page) {
    259			pr_info("%s : Page allocation for IVs failed\n",
    260				__func__);
    261			err = -ENOMEM;
    262			goto out;
    263		}
    264		memcpy(page_address(page), ivs, number_of_ivs *
    265		       CIPHER_BLOCK_SIZE);
    266		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0,
    267				   number_of_ivs * CIPHER_BLOCK_SIZE);
    268		hws->ivsize = 0;
    269	}
    270out:
    271	kfree(ivs);
    272	return err;
    273}
    274
    275/* Copy Key to WR */
    276static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb)
    277{
    278	struct ulptx_sc_memrd *sc_memrd;
    279	struct chtls_sock *csk;
    280	struct chtls_dev *cdev;
    281	struct ulptx_idata *sc;
    282	struct chtls_hws *hws;
    283	u32 immdlen;
    284	int kaddr;
    285
    286	csk = rcu_dereference_sk_user_data(sk);
    287	hws = &csk->tlshws;
    288	cdev = csk->cdev;
    289
    290	immdlen = sizeof(*sc) + sizeof(*sc_memrd);
    291	kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey);
    292	sc = (struct ulptx_idata *)__skb_push(skb, immdlen);
    293	if (sc) {
    294		sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
    295		sc->len = htonl(0);
    296		sc_memrd = (struct ulptx_sc_memrd *)(sc + 1);
    297		sc_memrd->cmd_to_len =
    298				htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) |
    299				ULP_TX_SC_MORE_V(1) |
    300				ULPTX_LEN16_V(hws->keylen >> 4));
    301		sc_memrd->addr = htonl(kaddr);
    302	}
    303}
    304
    305static u64 tlstx_incr_seqnum(struct chtls_hws *hws)
    306{
    307	return hws->tx_seq_no++;
    308}
    309
    310static bool is_sg_request(const struct sk_buff *skb)
    311{
    312	return skb->peeked ||
    313		(skb->len > MAX_IMM_ULPTX_WR_LEN);
    314}
    315
    316/*
    317 * Returns true if an sk_buff carries urgent data.
    318 */
    319static bool skb_urgent(struct sk_buff *skb)
    320{
    321	return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG;
    322}
    323
    324/* TLS content type for CPL SFO */
    325static unsigned char tls_content_type(unsigned char content_type)
    326{
    327	switch (content_type) {
    328	case TLS_HDR_TYPE_CCS:
    329		return CPL_TX_TLS_SFO_TYPE_CCS;
    330	case TLS_HDR_TYPE_ALERT:
    331		return CPL_TX_TLS_SFO_TYPE_ALERT;
    332	case TLS_HDR_TYPE_HANDSHAKE:
    333		return CPL_TX_TLS_SFO_TYPE_HANDSHAKE;
    334	case TLS_HDR_TYPE_HEARTBEAT:
    335		return CPL_TX_TLS_SFO_TYPE_HEARTBEAT;
    336	}
    337	return CPL_TX_TLS_SFO_TYPE_DATA;
    338}
    339
    340static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb,
    341			   int dlen, int tls_immd, u32 credits,
    342			   int expn, int pdus)
    343{
    344	struct fw_tlstx_data_wr *req_wr;
    345	struct cpl_tx_tls_sfo *req_cpl;
    346	unsigned int wr_ulp_mode_force;
    347	struct tls_scmd *updated_scmd;
    348	unsigned char data_type;
    349	struct chtls_sock *csk;
    350	struct net_device *dev;
    351	struct chtls_hws *hws;
    352	struct tls_scmd *scmd;
    353	struct adapter *adap;
    354	unsigned char *req;
    355	int immd_len;
    356	int iv_imm;
    357	int len;
    358
    359	csk = rcu_dereference_sk_user_data(sk);
    360	iv_imm = skb_ulp_tls_iv_imm(skb);
    361	dev = csk->egress_dev;
    362	adap = netdev2adap(dev);
    363	hws = &csk->tlshws;
    364	scmd = &hws->scmd;
    365	len = dlen + expn;
    366
    367	dlen = (dlen < hws->mfs) ? dlen : hws->mfs;
    368	atomic_inc(&adap->chcr_stats.tls_pdu_tx);
    369
    370	updated_scmd = scmd;
    371	updated_scmd->seqno_numivs &= 0xffffff80;
    372	updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus);
    373	hws->scmd = *updated_scmd;
    374
    375	req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo));
    376	req_cpl = (struct cpl_tx_tls_sfo *)req;
    377	req = (unsigned char *)__skb_push(skb, (sizeof(struct
    378				fw_tlstx_data_wr)));
    379
    380	req_wr = (struct fw_tlstx_data_wr *)req;
    381	immd_len = (tls_immd ? dlen : 0);
    382	req_wr->op_to_immdlen =
    383		htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) |
    384		FW_TLSTX_DATA_WR_COMPL_V(1) |
    385		FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len));
    386	req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) |
    387				     FW_TLSTX_DATA_WR_LEN16_V(credits));
    388	wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS);
    389
    390	if (is_sg_request(skb))
    391		wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
    392			((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
    393			FW_OFLD_TX_DATA_WR_SHOVE_F);
    394
    395	req_wr->lsodisable_to_flags =
    396			htonl(TX_ULP_MODE_V(ULP_MODE_TLS) |
    397			      TX_URG_V(skb_urgent(skb)) |
    398			      T6_TX_FORCE_F | wr_ulp_mode_force |
    399			      TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
    400					 skb_queue_empty(&csk->txq)));
    401
    402	req_wr->ctxloc_to_exp =
    403			htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) |
    404			      FW_TLSTX_DATA_WR_EXP_V(expn) |
    405			      FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) |
    406			      FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) |
    407			      FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4));
    408
    409	/* Fill in the length */
    410	req_wr->plen = htonl(len);
    411	req_wr->mfs = htons(hws->mfs);
    412	req_wr->adjustedplen_pkd =
    413		htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen));
    414	req_wr->expinplenmax_pkd =
    415		htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion));
    416	req_wr->pdusinplenmax_pkd =
    417		FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus);
    418	req_wr->r10 = 0;
    419
    420	data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type);
    421	req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) |
    422				       CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) |
    423				       CPL_TX_TLS_SFO_CPL_LEN_V(2) |
    424				       CPL_TX_TLS_SFO_SEG_LEN_V(dlen));
    425	req_cpl->pld_len = htonl(len - expn);
    426
    427	req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V
    428		((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ?
    429		TLS_HDR_TYPE_HEARTBEAT : 0) |
    430		CPL_TX_TLS_SFO_PROTOVER_V(0));
    431
    432	/* create the s-command */
    433	req_cpl->r1_lo = 0;
    434	req_cpl->seqno_numivs  = cpu_to_be32(hws->scmd.seqno_numivs);
    435	req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen);
    436	req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws));
    437}
    438
    439/*
    440 * Calculate the TLS data expansion size
    441 */
    442static int chtls_expansion_size(struct sock *sk, int data_len,
    443				int fullpdu,
    444				unsigned short *pducnt)
    445{
    446	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
    447	struct chtls_hws *hws = &csk->tlshws;
    448	struct tls_scmd *scmd = &hws->scmd;
    449	int fragsize = hws->mfs;
    450	int expnsize = 0;
    451	int fragleft;
    452	int fragcnt;
    453	int expppdu;
    454
    455	if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) ==
    456	    SCMD_CIPH_MODE_AES_GCM) {
    457		expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE +
    458			  TLS_HEADER_LENGTH;
    459
    460		if (fullpdu) {
    461			*pducnt = data_len / (expppdu + fragsize);
    462			if (*pducnt > 32)
    463				*pducnt = 32;
    464			else if (!*pducnt)
    465				*pducnt = 1;
    466			expnsize = (*pducnt) * expppdu;
    467			return expnsize;
    468		}
    469		fragcnt = (data_len / fragsize);
    470		expnsize =  fragcnt * expppdu;
    471		fragleft = data_len % fragsize;
    472		if (fragleft > 0)
    473			expnsize += expppdu;
    474	}
    475	return expnsize;
    476}
    477
    478/* WR with IV, KEY and CPL SFO added */
    479static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb,
    480			       int tls_tx_imm, int tls_len, u32 credits)
    481{
    482	unsigned short pdus_per_ulp = 0;
    483	struct chtls_sock *csk;
    484	struct chtls_hws *hws;
    485	int expn_sz;
    486	int pdus;
    487
    488	csk = rcu_dereference_sk_user_data(sk);
    489	hws = &csk->tlshws;
    490	pdus = DIV_ROUND_UP(tls_len, hws->mfs);
    491	expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL);
    492	if (!hws->compute) {
    493		hws->expansion = chtls_expansion_size(sk,
    494						      hws->fcplenmax,
    495						      1, &pdus_per_ulp);
    496		hws->pdus = pdus_per_ulp;
    497		hws->adjustlen = hws->pdus *
    498			((hws->expansion / hws->pdus) + hws->mfs);
    499		hws->compute = 1;
    500	}
    501	if (tls_copy_ivs(sk, skb))
    502		return;
    503	tls_copy_tx_key(sk, skb);
    504	tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus);
    505	hws->tx_seq_no += (pdus - 1);
    506}
    507
    508static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb,
    509			    unsigned int immdlen, int len,
    510			    u32 credits, u32 compl)
    511{
    512	struct fw_ofld_tx_data_wr *req;
    513	unsigned int wr_ulp_mode_force;
    514	struct chtls_sock *csk;
    515	unsigned int opcode;
    516
    517	csk = rcu_dereference_sk_user_data(sk);
    518	opcode = FW_OFLD_TX_DATA_WR;
    519
    520	req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req));
    521	req->op_to_immdlen = htonl(WR_OP_V(opcode) |
    522				FW_WR_COMPL_V(compl) |
    523				FW_WR_IMMDLEN_V(immdlen));
    524	req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) |
    525				FW_WR_LEN16_V(credits));
    526
    527	wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode);
    528	if (is_sg_request(skb))
    529		wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
    530			((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
    531				FW_OFLD_TX_DATA_WR_SHOVE_F);
    532
    533	req->tunnel_to_proxy = htonl(wr_ulp_mode_force |
    534			TX_URG_V(skb_urgent(skb)) |
    535			TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
    536				   skb_queue_empty(&csk->txq)));
    537	req->plen = htonl(len);
    538}
    539
    540static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb,
    541			 bool size)
    542{
    543	int wr_size;
    544
    545	wr_size = TLS_WR_CPL_LEN;
    546	wr_size += KEY_ON_MEM_SZ;
    547	wr_size += ivs_size(csk->sk, skb);
    548
    549	if (size)
    550		return wr_size;
    551
    552	/* frags counted for IV dsgl */
    553	if (!skb_ulp_tls_iv_imm(skb))
    554		skb_shinfo(skb)->nr_frags++;
    555
    556	return wr_size;
    557}
    558
    559static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb)
    560{
    561	int length = skb->len;
    562
    563	if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN)
    564		return false;
    565
    566	if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
    567		/* Check TLS header len for Immediate */
    568		if (csk->ulp_mode == ULP_MODE_TLS &&
    569		    skb_ulp_tls_inline(skb))
    570			length += chtls_wr_size(csk, skb, true);
    571		else
    572			length += sizeof(struct fw_ofld_tx_data_wr);
    573
    574		return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN;
    575	}
    576	return true;
    577}
    578
    579static unsigned int calc_tx_flits(const struct sk_buff *skb,
    580				  unsigned int immdlen)
    581{
    582	unsigned int flits, cnt;
    583
    584	flits = immdlen / 8;   /* headers */
    585	cnt = skb_shinfo(skb)->nr_frags;
    586	if (skb_tail_pointer(skb) != skb_transport_header(skb))
    587		cnt++;
    588	return flits + sgl_len(cnt);
    589}
    590
    591static void arp_failure_discard(void *handle, struct sk_buff *skb)
    592{
    593	kfree_skb(skb);
    594}
    595
    596int chtls_push_frames(struct chtls_sock *csk, int comp)
    597{
    598	struct chtls_hws *hws = &csk->tlshws;
    599	struct tcp_sock *tp;
    600	struct sk_buff *skb;
    601	int total_size = 0;
    602	struct sock *sk;
    603	int wr_size;
    604
    605	wr_size = sizeof(struct fw_ofld_tx_data_wr);
    606	sk = csk->sk;
    607	tp = tcp_sk(sk);
    608
    609	if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE)))
    610		return 0;
    611
    612	if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN)))
    613		return 0;
    614
    615	while (csk->wr_credits && (skb = skb_peek(&csk->txq)) &&
    616	       (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) ||
    617		skb_queue_len(&csk->txq) > 1)) {
    618		unsigned int credit_len = skb->len;
    619		unsigned int credits_needed;
    620		unsigned int completion = 0;
    621		int tls_len = skb->len;/* TLS data len before IV/key */
    622		unsigned int immdlen;
    623		int len = skb->len;    /* length [ulp bytes] inserted by hw */
    624		int flowclen16 = 0;
    625		int tls_tx_imm = 0;
    626
    627		immdlen = skb->len;
    628		if (!is_ofld_imm(csk, skb)) {
    629			immdlen = skb_transport_offset(skb);
    630			if (skb_ulp_tls_inline(skb))
    631				wr_size = chtls_wr_size(csk, skb, false);
    632			credit_len = 8 * calc_tx_flits(skb, immdlen);
    633		} else {
    634			if (skb_ulp_tls_inline(skb)) {
    635				wr_size = chtls_wr_size(csk, skb, false);
    636				tls_tx_imm = 1;
    637			}
    638		}
    639		if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR))
    640			credit_len += wr_size;
    641		credits_needed = DIV_ROUND_UP(credit_len, 16);
    642		if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
    643			flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt,
    644						      tp->rcv_nxt);
    645			if (flowclen16 <= 0)
    646				break;
    647			csk->wr_credits -= flowclen16;
    648			csk->wr_unacked += flowclen16;
    649			csk->wr_nondata += flowclen16;
    650			csk_set_flag(csk, CSK_TX_DATA_SENT);
    651		}
    652
    653		if (csk->wr_credits < credits_needed) {
    654			if (skb_ulp_tls_inline(skb) &&
    655			    !skb_ulp_tls_iv_imm(skb))
    656				skb_shinfo(skb)->nr_frags--;
    657			break;
    658		}
    659
    660		__skb_unlink(skb, &csk->txq);
    661		skb_set_queue_mapping(skb, (csk->txq_idx << 1) |
    662				      CPL_PRIORITY_DATA);
    663		if (hws->ofld)
    664			hws->txqid = (skb->queue_mapping >> 1);
    665		skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata);
    666		csk->wr_credits -= credits_needed;
    667		csk->wr_unacked += credits_needed;
    668		csk->wr_nondata = 0;
    669		enqueue_wr(csk, skb);
    670
    671		if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
    672			if ((comp && csk->wr_unacked == credits_needed) ||
    673			    (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) ||
    674			    csk->wr_unacked >= csk->wr_max_credits / 2) {
    675				completion = 1;
    676				csk->wr_unacked = 0;
    677			}
    678			if (skb_ulp_tls_inline(skb))
    679				make_tlstx_data_wr(sk, skb, tls_tx_imm,
    680						   tls_len, credits_needed);
    681			else
    682				make_tx_data_wr(sk, skb, immdlen, len,
    683						credits_needed, completion);
    684			tp->snd_nxt += len;
    685			tp->lsndtime = tcp_jiffies32;
    686			if (completion)
    687				ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR;
    688		} else {
    689			struct cpl_close_con_req *req = cplhdr(skb);
    690			unsigned int cmd  = CPL_OPCODE_G(ntohl
    691					     (OPCODE_TID(req)));
    692
    693			if (cmd == CPL_CLOSE_CON_REQ)
    694				csk_set_flag(csk,
    695					     CSK_CLOSE_CON_REQUESTED);
    696
    697			if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) &&
    698			    (csk->wr_unacked >= csk->wr_max_credits / 2)) {
    699				req->wr.wr_hi |= htonl(FW_WR_COMPL_F);
    700				csk->wr_unacked = 0;
    701			}
    702		}
    703		total_size += skb->truesize;
    704		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER)
    705			csk_set_flag(csk, CSK_TX_WAIT_IDLE);
    706		t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
    707		cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
    708	}
    709	sk->sk_wmem_queued -= total_size;
    710	return total_size;
    711}
    712
    713static void mark_urg(struct tcp_sock *tp, int flags,
    714		     struct sk_buff *skb)
    715{
    716	if (unlikely(flags & MSG_OOB)) {
    717		tp->snd_up = tp->write_seq;
    718		ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG |
    719					 ULPCB_FLAG_BARRIER |
    720					 ULPCB_FLAG_NO_APPEND |
    721					 ULPCB_FLAG_NEED_HDR;
    722	}
    723}
    724
    725/*
    726 * Returns true if a connection should send more data to TCP engine
    727 */
    728static bool should_push(struct sock *sk)
    729{
    730	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
    731	struct chtls_dev *cdev = csk->cdev;
    732	struct tcp_sock *tp = tcp_sk(sk);
    733
    734	/*
    735	 * If we've released our offload resources there's nothing to do ...
    736	 */
    737	if (!cdev)
    738		return false;
    739
    740	/*
    741	 * If there aren't any work requests in flight, or there isn't enough
    742	 * data in flight, or Nagle is off then send the current TX_DATA
    743	 * otherwise hold it and wait to accumulate more data.
    744	 */
    745	return csk->wr_credits == csk->wr_max_credits ||
    746		(tp->nonagle & TCP_NAGLE_OFF);
    747}
    748
    749/*
    750 * Returns true if a TCP socket is corked.
    751 */
    752static bool corked(const struct tcp_sock *tp, int flags)
    753{
    754	return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK);
    755}
    756
    757/*
    758 * Returns true if a send should try to push new data.
    759 */
    760static bool send_should_push(struct sock *sk, int flags)
    761{
    762	return should_push(sk) && !corked(tcp_sk(sk), flags);
    763}
    764
    765void chtls_tcp_push(struct sock *sk, int flags)
    766{
    767	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
    768	int qlen = skb_queue_len(&csk->txq);
    769
    770	if (likely(qlen)) {
    771		struct sk_buff *skb = skb_peek_tail(&csk->txq);
    772		struct tcp_sock *tp = tcp_sk(sk);
    773
    774		mark_urg(tp, flags, skb);
    775
    776		if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) &&
    777		    corked(tp, flags)) {
    778			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD;
    779			return;
    780		}
    781
    782		ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD;
    783		if (qlen == 1 &&
    784		    ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
    785		     should_push(sk)))
    786			chtls_push_frames(csk, 1);
    787	}
    788}
    789
    790/*
    791 * Calculate the size for a new send sk_buff.  It's maximum size so we can
    792 * pack lots of data into it, unless we plan to send it immediately, in which
    793 * case we size it more tightly.
    794 *
    795 * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't
    796 * arise in normal cases and when it does we are just wasting memory.
    797 */
    798static int select_size(struct sock *sk, int io_len, int flags, int len)
    799{
    800	const int pgbreak = SKB_MAX_HEAD(len);
    801
    802	/*
    803	 * If the data wouldn't fit in the main body anyway, put only the
    804	 * header in the main body so it can use immediate data and place all
    805	 * the payload in page fragments.
    806	 */
    807	if (io_len > pgbreak)
    808		return 0;
    809
    810	/*
    811	 * If we will be accumulating payload get a large main body.
    812	 */
    813	if (!send_should_push(sk, flags))
    814		return pgbreak;
    815
    816	return io_len;
    817}
    818
    819void skb_entail(struct sock *sk, struct sk_buff *skb, int flags)
    820{
    821	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
    822	struct tcp_sock *tp = tcp_sk(sk);
    823
    824	ULP_SKB_CB(skb)->seq = tp->write_seq;
    825	ULP_SKB_CB(skb)->flags = flags;
    826	__skb_queue_tail(&csk->txq, skb);
    827	sk->sk_wmem_queued += skb->truesize;
    828
    829	if (TCP_PAGE(sk) && TCP_OFF(sk)) {
    830		put_page(TCP_PAGE(sk));
    831		TCP_PAGE(sk) = NULL;
    832		TCP_OFF(sk) = 0;
    833	}
    834}
    835
    836static struct sk_buff *get_tx_skb(struct sock *sk, int size)
    837{
    838	struct sk_buff *skb;
    839
    840	skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation);
    841	if (likely(skb)) {
    842		skb_reserve(skb, TX_HEADER_LEN);
    843		skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
    844		skb_reset_transport_header(skb);
    845	}
    846	return skb;
    847}
    848
    849static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy)
    850{
    851	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
    852	struct sk_buff *skb;
    853
    854	skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN +
    855			KEY_ON_MEM_SZ + max_ivs_size(sk, size)),
    856			sk->sk_allocation);
    857	if (likely(skb)) {
    858		skb_reserve(skb, (TX_TLSHDR_LEN +
    859			    KEY_ON_MEM_SZ + max_ivs_size(sk, size)));
    860		skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
    861		skb_reset_transport_header(skb);
    862		ULP_SKB_CB(skb)->ulp.tls.ofld = 1;
    863		ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type;
    864	}
    865	return skb;
    866}
    867
    868static void tx_skb_finalize(struct sk_buff *skb)
    869{
    870	struct ulp_skb_cb *cb = ULP_SKB_CB(skb);
    871
    872	if (!(cb->flags & ULPCB_FLAG_NO_HDR))
    873		cb->flags = ULPCB_FLAG_NEED_HDR;
    874	cb->flags |= ULPCB_FLAG_NO_APPEND;
    875}
    876
    877static void push_frames_if_head(struct sock *sk)
    878{
    879	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
    880
    881	if (skb_queue_len(&csk->txq) == 1)
    882		chtls_push_frames(csk, 1);
    883}
    884
    885static int chtls_skb_copy_to_page_nocache(struct sock *sk,
    886					  struct iov_iter *from,
    887					  struct sk_buff *skb,
    888					  struct page *page,
    889					  int off, int copy)
    890{
    891	int err;
    892
    893	err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) +
    894				       off, copy, skb->len);
    895	if (err)
    896		return err;
    897
    898	skb->len             += copy;
    899	skb->data_len        += copy;
    900	skb->truesize        += copy;
    901	sk->sk_wmem_queued   += copy;
    902	return 0;
    903}
    904
    905static bool csk_mem_free(struct chtls_dev *cdev, struct sock *sk)
    906{
    907	return (cdev->max_host_sndbuf - sk->sk_wmem_queued > 0);
    908}
    909
    910static int csk_wait_memory(struct chtls_dev *cdev,
    911			   struct sock *sk, long *timeo_p)
    912{
    913	DEFINE_WAIT_FUNC(wait, woken_wake_function);
    914	int err = 0;
    915	long current_timeo;
    916	long vm_wait = 0;
    917	bool noblock;
    918
    919	current_timeo = *timeo_p;
    920	noblock = (*timeo_p ? false : true);
    921	if (csk_mem_free(cdev, sk)) {
    922		current_timeo = (prandom_u32() % (HZ / 5)) + 2;
    923		vm_wait = (prandom_u32() % (HZ / 5)) + 2;
    924	}
    925
    926	add_wait_queue(sk_sleep(sk), &wait);
    927	while (1) {
    928		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
    929
    930		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
    931			goto do_error;
    932		if (!*timeo_p) {
    933			if (noblock)
    934				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
    935			goto do_nonblock;
    936		}
    937		if (signal_pending(current))
    938			goto do_interrupted;
    939		sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
    940		if (csk_mem_free(cdev, sk) && !vm_wait)
    941			break;
    942
    943		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
    944		sk->sk_write_pending++;
    945		sk_wait_event(sk, &current_timeo, sk->sk_err ||
    946			      (sk->sk_shutdown & SEND_SHUTDOWN) ||
    947			      (csk_mem_free(cdev, sk) && !vm_wait), &wait);
    948		sk->sk_write_pending--;
    949
    950		if (vm_wait) {
    951			vm_wait -= current_timeo;
    952			current_timeo = *timeo_p;
    953			if (current_timeo != MAX_SCHEDULE_TIMEOUT) {
    954				current_timeo -= vm_wait;
    955				if (current_timeo < 0)
    956					current_timeo = 0;
    957			}
    958			vm_wait = 0;
    959		}
    960		*timeo_p = current_timeo;
    961	}
    962do_rm_wq:
    963	remove_wait_queue(sk_sleep(sk), &wait);
    964	return err;
    965do_error:
    966	err = -EPIPE;
    967	goto do_rm_wq;
    968do_nonblock:
    969	err = -EAGAIN;
    970	goto do_rm_wq;
    971do_interrupted:
    972	err = sock_intr_errno(*timeo_p);
    973	goto do_rm_wq;
    974}
    975
    976static int chtls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
    977			       unsigned char *record_type)
    978{
    979	struct cmsghdr *cmsg;
    980	int rc = -EINVAL;
    981
    982	for_each_cmsghdr(cmsg, msg) {
    983		if (!CMSG_OK(msg, cmsg))
    984			return -EINVAL;
    985		if (cmsg->cmsg_level != SOL_TLS)
    986			continue;
    987
    988		switch (cmsg->cmsg_type) {
    989		case TLS_SET_RECORD_TYPE:
    990			if (cmsg->cmsg_len < CMSG_LEN(sizeof(*record_type)))
    991				return -EINVAL;
    992
    993			if (msg->msg_flags & MSG_MORE)
    994				return -EINVAL;
    995
    996			*record_type = *(unsigned char *)CMSG_DATA(cmsg);
    997			rc = 0;
    998			break;
    999		default:
   1000			return -EINVAL;
   1001		}
   1002	}
   1003
   1004	return rc;
   1005}
   1006
   1007int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
   1008{
   1009	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
   1010	struct chtls_dev *cdev = csk->cdev;
   1011	struct tcp_sock *tp = tcp_sk(sk);
   1012	struct sk_buff *skb;
   1013	int mss, flags, err;
   1014	int recordsz = 0;
   1015	int copied = 0;
   1016	long timeo;
   1017
   1018	lock_sock(sk);
   1019	flags = msg->msg_flags;
   1020	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
   1021
   1022	if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
   1023		err = sk_stream_wait_connect(sk, &timeo);
   1024		if (err)
   1025			goto out_err;
   1026	}
   1027
   1028	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
   1029	err = -EPIPE;
   1030	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
   1031		goto out_err;
   1032
   1033	mss = csk->mss;
   1034	csk_set_flag(csk, CSK_TX_MORE_DATA);
   1035
   1036	while (msg_data_left(msg)) {
   1037		int copy = 0;
   1038
   1039		skb = skb_peek_tail(&csk->txq);
   1040		if (skb) {
   1041			copy = mss - skb->len;
   1042			skb->ip_summed = CHECKSUM_UNNECESSARY;
   1043		}
   1044		if (!csk_mem_free(cdev, sk))
   1045			goto wait_for_sndbuf;
   1046
   1047		if (is_tls_tx(csk) && !csk->tlshws.txleft) {
   1048			unsigned char record_type = TLS_RECORD_TYPE_DATA;
   1049
   1050			if (unlikely(msg->msg_controllen)) {
   1051				err = chtls_proccess_cmsg(sk, msg,
   1052							  &record_type);
   1053				if (err)
   1054					goto out_err;
   1055
   1056				/* Avoid appending tls handshake, alert to tls data */
   1057				if (skb)
   1058					tx_skb_finalize(skb);
   1059			}
   1060
   1061			recordsz = size;
   1062			csk->tlshws.txleft = recordsz;
   1063			csk->tlshws.type = record_type;
   1064		}
   1065
   1066		if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
   1067		    copy <= 0) {
   1068new_buf:
   1069			if (skb) {
   1070				tx_skb_finalize(skb);
   1071				push_frames_if_head(sk);
   1072			}
   1073
   1074			if (is_tls_tx(csk)) {
   1075				skb = get_record_skb(sk,
   1076						     select_size(sk,
   1077								 recordsz,
   1078								 flags,
   1079								 TX_TLSHDR_LEN),
   1080								 false);
   1081			} else {
   1082				skb = get_tx_skb(sk,
   1083						 select_size(sk, size, flags,
   1084							     TX_HEADER_LEN));
   1085			}
   1086			if (unlikely(!skb))
   1087				goto wait_for_memory;
   1088
   1089			skb->ip_summed = CHECKSUM_UNNECESSARY;
   1090			copy = mss;
   1091		}
   1092		if (copy > size)
   1093			copy = size;
   1094
   1095		if (skb_tailroom(skb) > 0) {
   1096			copy = min(copy, skb_tailroom(skb));
   1097			if (is_tls_tx(csk))
   1098				copy = min_t(int, copy, csk->tlshws.txleft);
   1099			err = skb_add_data_nocache(sk, skb,
   1100						   &msg->msg_iter, copy);
   1101			if (err)
   1102				goto do_fault;
   1103		} else {
   1104			int i = skb_shinfo(skb)->nr_frags;
   1105			struct page *page = TCP_PAGE(sk);
   1106			int pg_size = PAGE_SIZE;
   1107			int off = TCP_OFF(sk);
   1108			bool merge;
   1109
   1110			if (page)
   1111				pg_size = page_size(page);
   1112			if (off < pg_size &&
   1113			    skb_can_coalesce(skb, i, page, off)) {
   1114				merge = true;
   1115				goto copy;
   1116			}
   1117			merge = false;
   1118			if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) :
   1119			    MAX_SKB_FRAGS))
   1120				goto new_buf;
   1121
   1122			if (page && off == pg_size) {
   1123				put_page(page);
   1124				TCP_PAGE(sk) = page = NULL;
   1125				pg_size = PAGE_SIZE;
   1126			}
   1127
   1128			if (!page) {
   1129				gfp_t gfp = sk->sk_allocation;
   1130				int order = cdev->send_page_order;
   1131
   1132				if (order) {
   1133					page = alloc_pages(gfp | __GFP_COMP |
   1134							   __GFP_NOWARN |
   1135							   __GFP_NORETRY,
   1136							   order);
   1137					if (page)
   1138						pg_size <<= order;
   1139				}
   1140				if (!page) {
   1141					page = alloc_page(gfp);
   1142					pg_size = PAGE_SIZE;
   1143				}
   1144				if (!page)
   1145					goto wait_for_memory;
   1146				off = 0;
   1147			}
   1148copy:
   1149			if (copy > pg_size - off)
   1150				copy = pg_size - off;
   1151			if (is_tls_tx(csk))
   1152				copy = min_t(int, copy, csk->tlshws.txleft);
   1153
   1154			err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter,
   1155							     skb, page,
   1156							     off, copy);
   1157			if (unlikely(err)) {
   1158				if (!TCP_PAGE(sk)) {
   1159					TCP_PAGE(sk) = page;
   1160					TCP_OFF(sk) = 0;
   1161				}
   1162				goto do_fault;
   1163			}
   1164			/* Update the skb. */
   1165			if (merge) {
   1166				skb_frag_size_add(
   1167						&skb_shinfo(skb)->frags[i - 1],
   1168						copy);
   1169			} else {
   1170				skb_fill_page_desc(skb, i, page, off, copy);
   1171				if (off + copy < pg_size) {
   1172					/* space left keep page */
   1173					get_page(page);
   1174					TCP_PAGE(sk) = page;
   1175				} else {
   1176					TCP_PAGE(sk) = NULL;
   1177				}
   1178			}
   1179			TCP_OFF(sk) = off + copy;
   1180		}
   1181		if (unlikely(skb->len == mss))
   1182			tx_skb_finalize(skb);
   1183		tp->write_seq += copy;
   1184		copied += copy;
   1185		size -= copy;
   1186
   1187		if (is_tls_tx(csk))
   1188			csk->tlshws.txleft -= copy;
   1189
   1190		if (corked(tp, flags) &&
   1191		    (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
   1192			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
   1193
   1194		if (size == 0)
   1195			goto out;
   1196
   1197		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND)
   1198			push_frames_if_head(sk);
   1199		continue;
   1200wait_for_sndbuf:
   1201		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
   1202wait_for_memory:
   1203		err = csk_wait_memory(cdev, sk, &timeo);
   1204		if (err)
   1205			goto do_error;
   1206	}
   1207out:
   1208	csk_reset_flag(csk, CSK_TX_MORE_DATA);
   1209	if (copied)
   1210		chtls_tcp_push(sk, flags);
   1211done:
   1212	release_sock(sk);
   1213	return copied;
   1214do_fault:
   1215	if (!skb->len) {
   1216		__skb_unlink(skb, &csk->txq);
   1217		sk->sk_wmem_queued -= skb->truesize;
   1218		__kfree_skb(skb);
   1219	}
   1220do_error:
   1221	if (copied)
   1222		goto out;
   1223out_err:
   1224	if (csk_conn_inline(csk))
   1225		csk_reset_flag(csk, CSK_TX_MORE_DATA);
   1226	copied = sk_stream_error(sk, flags, err);
   1227	goto done;
   1228}
   1229
   1230int chtls_sendpage(struct sock *sk, struct page *page,
   1231		   int offset, size_t size, int flags)
   1232{
   1233	struct chtls_sock *csk;
   1234	struct chtls_dev *cdev;
   1235	int mss, err, copied;
   1236	struct tcp_sock *tp;
   1237	long timeo;
   1238
   1239	tp = tcp_sk(sk);
   1240	copied = 0;
   1241	csk = rcu_dereference_sk_user_data(sk);
   1242	cdev = csk->cdev;
   1243	lock_sock(sk);
   1244	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
   1245
   1246	err = sk_stream_wait_connect(sk, &timeo);
   1247	if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
   1248	    err != 0)
   1249		goto out_err;
   1250
   1251	mss = csk->mss;
   1252	csk_set_flag(csk, CSK_TX_MORE_DATA);
   1253
   1254	while (size > 0) {
   1255		struct sk_buff *skb = skb_peek_tail(&csk->txq);
   1256		int copy, i;
   1257
   1258		if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
   1259		    (copy = mss - skb->len) <= 0) {
   1260new_buf:
   1261			if (!csk_mem_free(cdev, sk))
   1262				goto wait_for_sndbuf;
   1263
   1264			if (is_tls_tx(csk)) {
   1265				skb = get_record_skb(sk,
   1266						     select_size(sk, size,
   1267								 flags,
   1268								 TX_TLSHDR_LEN),
   1269						     true);
   1270			} else {
   1271				skb = get_tx_skb(sk, 0);
   1272			}
   1273			if (!skb)
   1274				goto wait_for_memory;
   1275			copy = mss;
   1276		}
   1277		if (copy > size)
   1278			copy = size;
   1279
   1280		i = skb_shinfo(skb)->nr_frags;
   1281		if (skb_can_coalesce(skb, i, page, offset)) {
   1282			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
   1283		} else if (i < MAX_SKB_FRAGS) {
   1284			get_page(page);
   1285			skb_fill_page_desc(skb, i, page, offset, copy);
   1286		} else {
   1287			tx_skb_finalize(skb);
   1288			push_frames_if_head(sk);
   1289			goto new_buf;
   1290		}
   1291
   1292		skb->len += copy;
   1293		if (skb->len == mss)
   1294			tx_skb_finalize(skb);
   1295		skb->data_len += copy;
   1296		skb->truesize += copy;
   1297		sk->sk_wmem_queued += copy;
   1298		tp->write_seq += copy;
   1299		copied += copy;
   1300		offset += copy;
   1301		size -= copy;
   1302
   1303		if (corked(tp, flags) &&
   1304		    (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
   1305			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
   1306
   1307		if (!size)
   1308			break;
   1309
   1310		if (unlikely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND))
   1311			push_frames_if_head(sk);
   1312		continue;
   1313wait_for_sndbuf:
   1314		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
   1315wait_for_memory:
   1316		err = csk_wait_memory(cdev, sk, &timeo);
   1317		if (err)
   1318			goto do_error;
   1319	}
   1320out:
   1321	csk_reset_flag(csk, CSK_TX_MORE_DATA);
   1322	if (copied)
   1323		chtls_tcp_push(sk, flags);
   1324done:
   1325	release_sock(sk);
   1326	return copied;
   1327
   1328do_error:
   1329	if (copied)
   1330		goto out;
   1331
   1332out_err:
   1333	if (csk_conn_inline(csk))
   1334		csk_reset_flag(csk, CSK_TX_MORE_DATA);
   1335	copied = sk_stream_error(sk, flags, err);
   1336	goto done;
   1337}
   1338
   1339static void chtls_select_window(struct sock *sk)
   1340{
   1341	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
   1342	struct tcp_sock *tp = tcp_sk(sk);
   1343	unsigned int wnd = tp->rcv_wnd;
   1344
   1345	wnd = max_t(unsigned int, wnd, tcp_full_space(sk));
   1346	wnd = max_t(unsigned int, MIN_RCV_WND, wnd);
   1347
   1348	if (wnd > MAX_RCV_WND)
   1349		wnd = MAX_RCV_WND;
   1350
   1351/*
   1352 * Check if we need to grow the receive window in response to an increase in
   1353 * the socket's receive buffer size.  Some applications increase the buffer
   1354 * size dynamically and rely on the window to grow accordingly.
   1355 */
   1356
   1357	if (wnd > tp->rcv_wnd) {
   1358		tp->rcv_wup -= wnd - tp->rcv_wnd;
   1359		tp->rcv_wnd = wnd;
   1360		/* Mark the receive window as updated */
   1361		csk_reset_flag(csk, CSK_UPDATE_RCV_WND);
   1362	}
   1363}
   1364
   1365/*
   1366 * Send RX credits through an RX_DATA_ACK CPL message.  We are permitted
   1367 * to return without sending the message in case we cannot allocate
   1368 * an sk_buff.  Returns the number of credits sent.
   1369 */
   1370static u32 send_rx_credits(struct chtls_sock *csk, u32 credits)
   1371{
   1372	struct cpl_rx_data_ack *req;
   1373	struct sk_buff *skb;
   1374
   1375	skb = alloc_skb(sizeof(*req), GFP_ATOMIC);
   1376	if (!skb)
   1377		return 0;
   1378	__skb_put(skb, sizeof(*req));
   1379	req = (struct cpl_rx_data_ack *)skb->head;
   1380
   1381	set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id);
   1382	INIT_TP_WR(req, csk->tid);
   1383	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
   1384						    csk->tid));
   1385	req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) |
   1386				       RX_FORCE_ACK_F);
   1387	cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb);
   1388	return credits;
   1389}
   1390
   1391#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \
   1392			     TCPF_FIN_WAIT1 | \
   1393			     TCPF_FIN_WAIT2)
   1394
   1395/*
   1396 * Called after some received data has been read.  It returns RX credits
   1397 * to the HW for the amount of data processed.
   1398 */
   1399static void chtls_cleanup_rbuf(struct sock *sk, int copied)
   1400{
   1401	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
   1402	struct tcp_sock *tp;
   1403	int must_send;
   1404	u32 credits;
   1405	u32 thres;
   1406
   1407	thres = 15 * 1024;
   1408
   1409	if (!sk_in_state(sk, CREDIT_RETURN_STATE))
   1410		return;
   1411
   1412	chtls_select_window(sk);
   1413	tp = tcp_sk(sk);
   1414	credits = tp->copied_seq - tp->rcv_wup;
   1415	if (unlikely(!credits))
   1416		return;
   1417
   1418/*
   1419 * For coalescing to work effectively ensure the receive window has
   1420 * at least 16KB left.
   1421 */
   1422	must_send = credits + 16384 >= tp->rcv_wnd;
   1423
   1424	if (must_send || credits >= thres)
   1425		tp->rcv_wup += send_rx_credits(csk, credits);
   1426}
   1427
   1428static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
   1429			    int flags, int *addr_len)
   1430{
   1431	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
   1432	struct chtls_hws *hws = &csk->tlshws;
   1433	struct net_device *dev = csk->egress_dev;
   1434	struct adapter *adap = netdev2adap(dev);
   1435	struct tcp_sock *tp = tcp_sk(sk);
   1436	unsigned long avail;
   1437	int buffers_freed;
   1438	int copied = 0;
   1439	int target;
   1440	long timeo;
   1441
   1442	buffers_freed = 0;
   1443
   1444	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
   1445	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
   1446
   1447	if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
   1448		chtls_cleanup_rbuf(sk, copied);
   1449
   1450	do {
   1451		struct sk_buff *skb;
   1452		u32 offset = 0;
   1453
   1454		if (unlikely(tp->urg_data &&
   1455			     tp->urg_seq == tp->copied_seq)) {
   1456			if (copied)
   1457				break;
   1458			if (signal_pending(current)) {
   1459				copied = timeo ? sock_intr_errno(timeo) :
   1460					-EAGAIN;
   1461				break;
   1462			}
   1463		}
   1464		skb = skb_peek(&sk->sk_receive_queue);
   1465		if (skb)
   1466			goto found_ok_skb;
   1467		if (csk->wr_credits &&
   1468		    skb_queue_len(&csk->txq) &&
   1469		    chtls_push_frames(csk, csk->wr_credits ==
   1470				      csk->wr_max_credits))
   1471			sk->sk_write_space(sk);
   1472
   1473		if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
   1474			break;
   1475
   1476		if (copied) {
   1477			if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
   1478			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
   1479			    signal_pending(current))
   1480				break;
   1481
   1482			if (!timeo)
   1483				break;
   1484		} else {
   1485			if (sock_flag(sk, SOCK_DONE))
   1486				break;
   1487			if (sk->sk_err) {
   1488				copied = sock_error(sk);
   1489				break;
   1490			}
   1491			if (sk->sk_shutdown & RCV_SHUTDOWN)
   1492				break;
   1493			if (sk->sk_state == TCP_CLOSE) {
   1494				copied = -ENOTCONN;
   1495				break;
   1496			}
   1497			if (!timeo) {
   1498				copied = -EAGAIN;
   1499				break;
   1500			}
   1501			if (signal_pending(current)) {
   1502				copied = sock_intr_errno(timeo);
   1503				break;
   1504			}
   1505		}
   1506		if (READ_ONCE(sk->sk_backlog.tail)) {
   1507			release_sock(sk);
   1508			lock_sock(sk);
   1509			chtls_cleanup_rbuf(sk, copied);
   1510			continue;
   1511		}
   1512
   1513		if (copied >= target)
   1514			break;
   1515		chtls_cleanup_rbuf(sk, copied);
   1516		sk_wait_data(sk, &timeo, NULL);
   1517		continue;
   1518found_ok_skb:
   1519		if (!skb->len) {
   1520			skb_dst_set(skb, NULL);
   1521			__skb_unlink(skb, &sk->sk_receive_queue);
   1522			kfree_skb(skb);
   1523
   1524			if (!copied && !timeo) {
   1525				copied = -EAGAIN;
   1526				break;
   1527			}
   1528
   1529			if (copied < target) {
   1530				release_sock(sk);
   1531				lock_sock(sk);
   1532				continue;
   1533			}
   1534			break;
   1535		}
   1536		offset = hws->copied_seq;
   1537		avail = skb->len - offset;
   1538		if (len < avail)
   1539			avail = len;
   1540
   1541		if (unlikely(tp->urg_data)) {
   1542			u32 urg_offset = tp->urg_seq - tp->copied_seq;
   1543
   1544			if (urg_offset < avail) {
   1545				if (urg_offset) {
   1546					avail = urg_offset;
   1547				} else if (!sock_flag(sk, SOCK_URGINLINE)) {
   1548					/* First byte is urgent, skip */
   1549					tp->copied_seq++;
   1550					offset++;
   1551					avail--;
   1552					if (!avail)
   1553						goto skip_copy;
   1554				}
   1555			}
   1556		}
   1557		/* Set record type if not already done. For a non-data record,
   1558		 * do not proceed if record type could not be copied.
   1559		 */
   1560		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
   1561			struct tls_hdr *thdr = (struct tls_hdr *)skb->data;
   1562			int cerr = 0;
   1563
   1564			cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
   1565					sizeof(thdr->type), &thdr->type);
   1566
   1567			if (cerr && thdr->type != TLS_RECORD_TYPE_DATA) {
   1568				copied = -EIO;
   1569				break;
   1570			}
   1571			/*  don't send tls header, skip copy */
   1572			goto skip_copy;
   1573		}
   1574
   1575		if (skb_copy_datagram_msg(skb, offset, msg, avail)) {
   1576			if (!copied) {
   1577				copied = -EFAULT;
   1578				break;
   1579			}
   1580		}
   1581
   1582		copied += avail;
   1583		len -= avail;
   1584		hws->copied_seq += avail;
   1585skip_copy:
   1586		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
   1587			tp->urg_data = 0;
   1588
   1589		if ((avail + offset) >= skb->len) {
   1590			struct sk_buff *next_skb;
   1591			if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
   1592				tp->copied_seq += skb->len;
   1593				hws->rcvpld = skb->hdr_len;
   1594			} else {
   1595				atomic_inc(&adap->chcr_stats.tls_pdu_rx);
   1596				tp->copied_seq += hws->rcvpld;
   1597			}
   1598			chtls_free_skb(sk, skb);
   1599			buffers_freed++;
   1600			hws->copied_seq = 0;
   1601			next_skb = skb_peek(&sk->sk_receive_queue);
   1602			if (copied >= target && !next_skb)
   1603				break;
   1604			if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR)
   1605				break;
   1606		}
   1607	} while (len > 0);
   1608
   1609	if (buffers_freed)
   1610		chtls_cleanup_rbuf(sk, copied);
   1611	release_sock(sk);
   1612	return copied;
   1613}
   1614
   1615/*
   1616 * Peek at data in a socket's receive buffer.
   1617 */
   1618static int peekmsg(struct sock *sk, struct msghdr *msg,
   1619		   size_t len, int flags)
   1620{
   1621	struct tcp_sock *tp = tcp_sk(sk);
   1622	u32 peek_seq, offset;
   1623	struct sk_buff *skb;
   1624	int copied = 0;
   1625	size_t avail;          /* amount of available data in current skb */
   1626	long timeo;
   1627
   1628	lock_sock(sk);
   1629	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
   1630	peek_seq = tp->copied_seq;
   1631
   1632	do {
   1633		if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) {
   1634			if (copied)
   1635				break;
   1636			if (signal_pending(current)) {
   1637				copied = timeo ? sock_intr_errno(timeo) :
   1638				-EAGAIN;
   1639				break;
   1640			}
   1641		}
   1642
   1643		skb_queue_walk(&sk->sk_receive_queue, skb) {
   1644			offset = peek_seq - ULP_SKB_CB(skb)->seq;
   1645			if (offset < skb->len)
   1646				goto found_ok_skb;
   1647		}
   1648
   1649		/* empty receive queue */
   1650		if (copied)
   1651			break;
   1652		if (sock_flag(sk, SOCK_DONE))
   1653			break;
   1654		if (sk->sk_err) {
   1655			copied = sock_error(sk);
   1656			break;
   1657		}
   1658		if (sk->sk_shutdown & RCV_SHUTDOWN)
   1659			break;
   1660		if (sk->sk_state == TCP_CLOSE) {
   1661			copied = -ENOTCONN;
   1662			break;
   1663		}
   1664		if (!timeo) {
   1665			copied = -EAGAIN;
   1666			break;
   1667		}
   1668		if (signal_pending(current)) {
   1669			copied = sock_intr_errno(timeo);
   1670			break;
   1671		}
   1672
   1673		if (READ_ONCE(sk->sk_backlog.tail)) {
   1674			/* Do not sleep, just process backlog. */
   1675			release_sock(sk);
   1676			lock_sock(sk);
   1677		} else {
   1678			sk_wait_data(sk, &timeo, NULL);
   1679		}
   1680
   1681		if (unlikely(peek_seq != tp->copied_seq)) {
   1682			if (net_ratelimit())
   1683				pr_info("TCP(%s:%d), race in MSG_PEEK.\n",
   1684					current->comm, current->pid);
   1685			peek_seq = tp->copied_seq;
   1686		}
   1687		continue;
   1688
   1689found_ok_skb:
   1690		avail = skb->len - offset;
   1691		if (len < avail)
   1692			avail = len;
   1693		/*
   1694		 * Do we have urgent data here?  We need to skip over the
   1695		 * urgent byte.
   1696		 */
   1697		if (unlikely(tp->urg_data)) {
   1698			u32 urg_offset = tp->urg_seq - peek_seq;
   1699
   1700			if (urg_offset < avail) {
   1701				/*
   1702				 * The amount of data we are preparing to copy
   1703				 * contains urgent data.
   1704				 */
   1705				if (!urg_offset) { /* First byte is urgent */
   1706					if (!sock_flag(sk, SOCK_URGINLINE)) {
   1707						peek_seq++;
   1708						offset++;
   1709						avail--;
   1710					}
   1711					if (!avail)
   1712						continue;
   1713				} else {
   1714					/* stop short of the urgent data */
   1715					avail = urg_offset;
   1716				}
   1717			}
   1718		}
   1719
   1720		/*
   1721		 * If MSG_TRUNC is specified the data is discarded.
   1722		 */
   1723		if (likely(!(flags & MSG_TRUNC)))
   1724			if (skb_copy_datagram_msg(skb, offset, msg, len)) {
   1725				if (!copied) {
   1726					copied = -EFAULT;
   1727					break;
   1728				}
   1729			}
   1730		peek_seq += avail;
   1731		copied += avail;
   1732		len -= avail;
   1733	} while (len > 0);
   1734
   1735	release_sock(sk);
   1736	return copied;
   1737}
   1738
   1739int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
   1740		  int flags, int *addr_len)
   1741{
   1742	struct tcp_sock *tp = tcp_sk(sk);
   1743	struct chtls_sock *csk;
   1744	unsigned long avail;    /* amount of available data in current skb */
   1745	int buffers_freed;
   1746	int copied = 0;
   1747	long timeo;
   1748	int target;             /* Read at least this many bytes */
   1749
   1750	buffers_freed = 0;
   1751
   1752	if (unlikely(flags & MSG_OOB))
   1753		return tcp_prot.recvmsg(sk, msg, len, flags, addr_len);
   1754
   1755	if (unlikely(flags & MSG_PEEK))
   1756		return peekmsg(sk, msg, len, flags);
   1757
   1758	if (sk_can_busy_loop(sk) &&
   1759	    skb_queue_empty_lockless(&sk->sk_receive_queue) &&
   1760	    sk->sk_state == TCP_ESTABLISHED)
   1761		sk_busy_loop(sk, flags & MSG_DONTWAIT);
   1762
   1763	lock_sock(sk);
   1764	csk = rcu_dereference_sk_user_data(sk);
   1765
   1766	if (is_tls_rx(csk))
   1767		return chtls_pt_recvmsg(sk, msg, len, flags, addr_len);
   1768
   1769	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
   1770	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
   1771
   1772	if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
   1773		chtls_cleanup_rbuf(sk, copied);
   1774
   1775	do {
   1776		struct sk_buff *skb;
   1777		u32 offset;
   1778
   1779		if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) {
   1780			if (copied)
   1781				break;
   1782			if (signal_pending(current)) {
   1783				copied = timeo ? sock_intr_errno(timeo) :
   1784					-EAGAIN;
   1785				break;
   1786			}
   1787		}
   1788
   1789		skb = skb_peek(&sk->sk_receive_queue);
   1790		if (skb)
   1791			goto found_ok_skb;
   1792
   1793		if (csk->wr_credits &&
   1794		    skb_queue_len(&csk->txq) &&
   1795		    chtls_push_frames(csk, csk->wr_credits ==
   1796				      csk->wr_max_credits))
   1797			sk->sk_write_space(sk);
   1798
   1799		if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
   1800			break;
   1801
   1802		if (copied) {
   1803			if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
   1804			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
   1805			    signal_pending(current))
   1806				break;
   1807		} else {
   1808			if (sock_flag(sk, SOCK_DONE))
   1809				break;
   1810			if (sk->sk_err) {
   1811				copied = sock_error(sk);
   1812				break;
   1813			}
   1814			if (sk->sk_shutdown & RCV_SHUTDOWN)
   1815				break;
   1816			if (sk->sk_state == TCP_CLOSE) {
   1817				copied = -ENOTCONN;
   1818				break;
   1819			}
   1820			if (!timeo) {
   1821				copied = -EAGAIN;
   1822				break;
   1823			}
   1824			if (signal_pending(current)) {
   1825				copied = sock_intr_errno(timeo);
   1826				break;
   1827			}
   1828		}
   1829
   1830		if (READ_ONCE(sk->sk_backlog.tail)) {
   1831			release_sock(sk);
   1832			lock_sock(sk);
   1833			chtls_cleanup_rbuf(sk, copied);
   1834			continue;
   1835		}
   1836
   1837		if (copied >= target)
   1838			break;
   1839		chtls_cleanup_rbuf(sk, copied);
   1840		sk_wait_data(sk, &timeo, NULL);
   1841		continue;
   1842
   1843found_ok_skb:
   1844		if (!skb->len) {
   1845			chtls_kfree_skb(sk, skb);
   1846			if (!copied && !timeo) {
   1847				copied = -EAGAIN;
   1848				break;
   1849			}
   1850
   1851			if (copied < target)
   1852				continue;
   1853
   1854			break;
   1855		}
   1856
   1857		offset = tp->copied_seq - ULP_SKB_CB(skb)->seq;
   1858		avail = skb->len - offset;
   1859		if (len < avail)
   1860			avail = len;
   1861
   1862		if (unlikely(tp->urg_data)) {
   1863			u32 urg_offset = tp->urg_seq - tp->copied_seq;
   1864
   1865			if (urg_offset < avail) {
   1866				if (urg_offset) {
   1867					avail = urg_offset;
   1868				} else if (!sock_flag(sk, SOCK_URGINLINE)) {
   1869					tp->copied_seq++;
   1870					offset++;
   1871					avail--;
   1872					if (!avail)
   1873						goto skip_copy;
   1874				}
   1875			}
   1876		}
   1877
   1878		if (likely(!(flags & MSG_TRUNC))) {
   1879			if (skb_copy_datagram_msg(skb, offset,
   1880						  msg, avail)) {
   1881				if (!copied) {
   1882					copied = -EFAULT;
   1883					break;
   1884				}
   1885			}
   1886		}
   1887
   1888		tp->copied_seq += avail;
   1889		copied += avail;
   1890		len -= avail;
   1891
   1892skip_copy:
   1893		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
   1894			tp->urg_data = 0;
   1895
   1896		if (avail + offset >= skb->len) {
   1897			chtls_free_skb(sk, skb);
   1898			buffers_freed++;
   1899
   1900			if  (copied >= target &&
   1901			     !skb_peek(&sk->sk_receive_queue))
   1902				break;
   1903		}
   1904	} while (len > 0);
   1905
   1906	if (buffers_freed)
   1907		chtls_cleanup_rbuf(sk, copied);
   1908
   1909	release_sock(sk);
   1910	return copied;
   1911}