cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ccid2.c (23509B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 *  Copyright (c) 2005, 2006 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
      4 *
      5 *  Changes to meet Linux coding standards, and DCCP infrastructure fixes.
      6 *
      7 *  Copyright (c) 2006 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
      8 */
      9
     10/*
     11 * This implementation should follow RFC 4341
     12 */
     13#include <linux/slab.h>
     14#include "../feat.h"
     15#include "ccid2.h"
     16
     17
     18#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
     19static bool ccid2_debug;
     20#define ccid2_pr_debug(format, a...)	DCCP_PR_DEBUG(ccid2_debug, format, ##a)
     21#else
     22#define ccid2_pr_debug(format, a...)
     23#endif
     24
     25static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
     26{
     27	struct ccid2_seq *seqp;
     28	int i;
     29
     30	/* check if we have space to preserve the pointer to the buffer */
     31	if (hc->tx_seqbufc >= (sizeof(hc->tx_seqbuf) /
     32			       sizeof(struct ccid2_seq *)))
     33		return -ENOMEM;
     34
     35	/* allocate buffer and initialize linked list */
     36	seqp = kmalloc_array(CCID2_SEQBUF_LEN, sizeof(struct ccid2_seq),
     37			     gfp_any());
     38	if (seqp == NULL)
     39		return -ENOMEM;
     40
     41	for (i = 0; i < (CCID2_SEQBUF_LEN - 1); i++) {
     42		seqp[i].ccid2s_next = &seqp[i + 1];
     43		seqp[i + 1].ccid2s_prev = &seqp[i];
     44	}
     45	seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = seqp;
     46	seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
     47
     48	/* This is the first allocation.  Initiate the head and tail.  */
     49	if (hc->tx_seqbufc == 0)
     50		hc->tx_seqh = hc->tx_seqt = seqp;
     51	else {
     52		/* link the existing list with the one we just created */
     53		hc->tx_seqh->ccid2s_next = seqp;
     54		seqp->ccid2s_prev = hc->tx_seqh;
     55
     56		hc->tx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
     57		seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hc->tx_seqt;
     58	}
     59
     60	/* store the original pointer to the buffer so we can free it */
     61	hc->tx_seqbuf[hc->tx_seqbufc] = seqp;
     62	hc->tx_seqbufc++;
     63
     64	return 0;
     65}
     66
     67static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
     68{
     69	if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk)))
     70		return CCID_PACKET_WILL_DEQUEUE_LATER;
     71	return CCID_PACKET_SEND_AT_ONCE;
     72}
     73
     74static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
     75{
     76	u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2);
     77
     78	/*
     79	 * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from
     80	 * RFC 4341, 6.1.2. We ignore the statement that Ack Ratio 2 is always
     81	 * acceptable since this causes starvation/deadlock whenever cwnd < 2.
     82	 * The same problem arises when Ack Ratio is 0 (ie. Ack Ratio disabled).
     83	 */
     84	if (val == 0 || val > max_ratio) {
     85		DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
     86		val = max_ratio;
     87	}
     88	dccp_feat_signal_nn_change(sk, DCCPF_ACK_RATIO,
     89				   min_t(u32, val, DCCPF_ACK_RATIO_MAX));
     90}
     91
     92static void ccid2_check_l_ack_ratio(struct sock *sk)
     93{
     94	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
     95
     96	/*
     97	 * After a loss, idle period, application limited period, or RTO we
     98	 * need to check that the ack ratio is still less than the congestion
     99	 * window. Otherwise, we will send an entire congestion window of
    100	 * packets and got no response because we haven't sent ack ratio
    101	 * packets yet.
    102	 * If the ack ratio does need to be reduced, we reduce it to half of
    103	 * the congestion window (or 1 if that's zero) instead of to the
    104	 * congestion window. This prevents problems if one ack is lost.
    105	 */
    106	if (dccp_feat_nn_get(sk, DCCPF_ACK_RATIO) > hc->tx_cwnd)
    107		ccid2_change_l_ack_ratio(sk, hc->tx_cwnd/2 ? : 1U);
    108}
    109
    110static void ccid2_change_l_seq_window(struct sock *sk, u64 val)
    111{
    112	dccp_feat_signal_nn_change(sk, DCCPF_SEQUENCE_WINDOW,
    113				   clamp_val(val, DCCPF_SEQ_WMIN,
    114						  DCCPF_SEQ_WMAX));
    115}
    116
    117static void dccp_tasklet_schedule(struct sock *sk)
    118{
    119	struct tasklet_struct *t = &dccp_sk(sk)->dccps_xmitlet;
    120
    121	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
    122		sock_hold(sk);
    123		__tasklet_schedule(t);
    124	}
    125}
    126
    127static void ccid2_hc_tx_rto_expire(struct timer_list *t)
    128{
    129	struct ccid2_hc_tx_sock *hc = from_timer(hc, t, tx_rtotimer);
    130	struct sock *sk = hc->sk;
    131	const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
    132
    133	bh_lock_sock(sk);
    134	if (sock_owned_by_user(sk)) {
    135		sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + HZ / 5);
    136		goto out;
    137	}
    138
    139	ccid2_pr_debug("RTO_EXPIRE\n");
    140
    141	if (sk->sk_state == DCCP_CLOSED)
    142		goto out;
    143
    144	/* back-off timer */
    145	hc->tx_rto <<= 1;
    146	if (hc->tx_rto > DCCP_RTO_MAX)
    147		hc->tx_rto = DCCP_RTO_MAX;
    148
    149	/* adjust pipe, cwnd etc */
    150	hc->tx_ssthresh = hc->tx_cwnd / 2;
    151	if (hc->tx_ssthresh < 2)
    152		hc->tx_ssthresh = 2;
    153	hc->tx_cwnd	= 1;
    154	hc->tx_pipe	= 0;
    155
    156	/* clear state about stuff we sent */
    157	hc->tx_seqt = hc->tx_seqh;
    158	hc->tx_packets_acked = 0;
    159
    160	/* clear ack ratio state. */
    161	hc->tx_rpseq    = 0;
    162	hc->tx_rpdupack = -1;
    163	ccid2_change_l_ack_ratio(sk, 1);
    164
    165	/* if we were blocked before, we may now send cwnd=1 packet */
    166	if (sender_was_blocked)
    167		dccp_tasklet_schedule(sk);
    168	/* restart backed-off timer */
    169	sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
    170out:
    171	bh_unlock_sock(sk);
    172	sock_put(sk);
    173}
    174
    175/*
    176 *	Congestion window validation (RFC 2861).
    177 */
    178static bool ccid2_do_cwv = true;
    179module_param(ccid2_do_cwv, bool, 0644);
    180MODULE_PARM_DESC(ccid2_do_cwv, "Perform RFC2861 Congestion Window Validation");
    181
    182/**
    183 * ccid2_update_used_window  -  Track how much of cwnd is actually used
    184 * @hc: socket to update window
    185 * @new_wnd: new window values to add into the filter
    186 *
    187 * This is done in addition to CWV. The sender needs to have an idea of how many
    188 * packets may be in flight, to set the local Sequence Window value accordingly
    189 * (RFC 4340, 7.5.2). The CWV mechanism is exploited to keep track of the
    190 * maximum-used window. We use an EWMA low-pass filter to filter out noise.
    191 */
    192static void ccid2_update_used_window(struct ccid2_hc_tx_sock *hc, u32 new_wnd)
    193{
    194	hc->tx_expected_wnd = (3 * hc->tx_expected_wnd + new_wnd) / 4;
    195}
    196
    197/* This borrows the code of tcp_cwnd_application_limited() */
    198static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now)
    199{
    200	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
    201	/* don't reduce cwnd below the initial window (IW) */
    202	u32 init_win = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache),
    203	    win_used = max(hc->tx_cwnd_used, init_win);
    204
    205	if (win_used < hc->tx_cwnd) {
    206		hc->tx_ssthresh = max(hc->tx_ssthresh,
    207				     (hc->tx_cwnd >> 1) + (hc->tx_cwnd >> 2));
    208		hc->tx_cwnd = (hc->tx_cwnd + win_used) >> 1;
    209	}
    210	hc->tx_cwnd_used  = 0;
    211	hc->tx_cwnd_stamp = now;
    212
    213	ccid2_check_l_ack_ratio(sk);
    214}
    215
    216/* This borrows the code of tcp_cwnd_restart() */
    217static void ccid2_cwnd_restart(struct sock *sk, const u32 now)
    218{
    219	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
    220	u32 cwnd = hc->tx_cwnd, restart_cwnd,
    221	    iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache);
    222	s32 delta = now - hc->tx_lsndtime;
    223
    224	hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2));
    225
    226	/* don't reduce cwnd below the initial window (IW) */
    227	restart_cwnd = min(cwnd, iwnd);
    228
    229	while ((delta -= hc->tx_rto) >= 0 && cwnd > restart_cwnd)
    230		cwnd >>= 1;
    231	hc->tx_cwnd = max(cwnd, restart_cwnd);
    232	hc->tx_cwnd_stamp = now;
    233	hc->tx_cwnd_used  = 0;
    234
    235	ccid2_check_l_ack_ratio(sk);
    236}
    237
    238static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
    239{
    240	struct dccp_sock *dp = dccp_sk(sk);
    241	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
    242	const u32 now = ccid2_jiffies32;
    243	struct ccid2_seq *next;
    244
    245	/* slow-start after idle periods (RFC 2581, RFC 2861) */
    246	if (ccid2_do_cwv && !hc->tx_pipe &&
    247	    (s32)(now - hc->tx_lsndtime) >= hc->tx_rto)
    248		ccid2_cwnd_restart(sk, now);
    249
    250	hc->tx_lsndtime = now;
    251	hc->tx_pipe    += 1;
    252
    253	/* see whether cwnd was fully used (RFC 2861), update expected window */
    254	if (ccid2_cwnd_network_limited(hc)) {
    255		ccid2_update_used_window(hc, hc->tx_cwnd);
    256		hc->tx_cwnd_used  = 0;
    257		hc->tx_cwnd_stamp = now;
    258	} else {
    259		if (hc->tx_pipe > hc->tx_cwnd_used)
    260			hc->tx_cwnd_used = hc->tx_pipe;
    261
    262		ccid2_update_used_window(hc, hc->tx_cwnd_used);
    263
    264		if (ccid2_do_cwv && (s32)(now - hc->tx_cwnd_stamp) >= hc->tx_rto)
    265			ccid2_cwnd_application_limited(sk, now);
    266	}
    267
    268	hc->tx_seqh->ccid2s_seq   = dp->dccps_gss;
    269	hc->tx_seqh->ccid2s_acked = 0;
    270	hc->tx_seqh->ccid2s_sent  = now;
    271
    272	next = hc->tx_seqh->ccid2s_next;
    273	/* check if we need to alloc more space */
    274	if (next == hc->tx_seqt) {
    275		if (ccid2_hc_tx_alloc_seq(hc)) {
    276			DCCP_CRIT("packet history - out of memory!");
    277			/* FIXME: find a more graceful way to bail out */
    278			return;
    279		}
    280		next = hc->tx_seqh->ccid2s_next;
    281		BUG_ON(next == hc->tx_seqt);
    282	}
    283	hc->tx_seqh = next;
    284
    285	ccid2_pr_debug("cwnd=%d pipe=%d\n", hc->tx_cwnd, hc->tx_pipe);
    286
    287	/*
    288	 * FIXME: The code below is broken and the variables have been removed
    289	 * from the socket struct. The `ackloss' variable was always set to 0,
    290	 * and with arsent there are several problems:
    291	 *  (i) it doesn't just count the number of Acks, but all sent packets;
    292	 *  (ii) it is expressed in # of packets, not # of windows, so the
    293	 *  comparison below uses the wrong formula: Appendix A of RFC 4341
    294	 *  comes up with the number K = cwnd / (R^2 - R) of consecutive windows
    295	 *  of data with no lost or marked Ack packets. If arsent were the # of
    296	 *  consecutive Acks received without loss, then Ack Ratio needs to be
    297	 *  decreased by 1 when
    298	 *	      arsent >=  K * cwnd / R  =  cwnd^2 / (R^3 - R^2)
    299	 *  where cwnd / R is the number of Acks received per window of data
    300	 *  (cf. RFC 4341, App. A). The problems are that
    301	 *  - arsent counts other packets as well;
    302	 *  - the comparison uses a formula different from RFC 4341;
    303	 *  - computing a cubic/quadratic equation each time is too complicated.
    304	 *  Hence a different algorithm is needed.
    305	 */
    306#if 0
    307	/* Ack Ratio.  Need to maintain a concept of how many windows we sent */
    308	hc->tx_arsent++;
    309	/* We had an ack loss in this window... */
    310	if (hc->tx_ackloss) {
    311		if (hc->tx_arsent >= hc->tx_cwnd) {
    312			hc->tx_arsent  = 0;
    313			hc->tx_ackloss = 0;
    314		}
    315	} else {
    316		/* No acks lost up to now... */
    317		/* decrease ack ratio if enough packets were sent */
    318		if (dp->dccps_l_ack_ratio > 1) {
    319			/* XXX don't calculate denominator each time */
    320			int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio -
    321				    dp->dccps_l_ack_ratio;
    322
    323			denom = hc->tx_cwnd * hc->tx_cwnd / denom;
    324
    325			if (hc->tx_arsent >= denom) {
    326				ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1);
    327				hc->tx_arsent = 0;
    328			}
    329		} else {
    330			/* we can't increase ack ratio further [1] */
    331			hc->tx_arsent = 0; /* or maybe set it to cwnd*/
    332		}
    333	}
    334#endif
    335
    336	sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
    337
    338#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
    339	do {
    340		struct ccid2_seq *seqp = hc->tx_seqt;
    341
    342		while (seqp != hc->tx_seqh) {
    343			ccid2_pr_debug("out seq=%llu acked=%d time=%u\n",
    344				       (unsigned long long)seqp->ccid2s_seq,
    345				       seqp->ccid2s_acked, seqp->ccid2s_sent);
    346			seqp = seqp->ccid2s_next;
    347		}
    348	} while (0);
    349	ccid2_pr_debug("=========\n");
    350#endif
    351}
    352
    353/**
    354 * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
    355 * @sk: socket to perform estimator on
    356 *
    357 * This code is almost identical with TCP's tcp_rtt_estimator(), since
    358 * - it has a higher sampling frequency (recommended by RFC 1323),
    359 * - the RTO does not collapse into RTT due to RTTVAR going towards zero,
    360 * - it is simple (cf. more complex proposals such as Eifel timer or research
    361 *   which suggests that the gain should be set according to window size),
    362 * - in tests it was found to work well with CCID2 [gerrit].
    363 */
    364static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
    365{
    366	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
    367	long m = mrtt ? : 1;
    368
    369	if (hc->tx_srtt == 0) {
    370		/* First measurement m */
    371		hc->tx_srtt = m << 3;
    372		hc->tx_mdev = m << 1;
    373
    374		hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk));
    375		hc->tx_rttvar   = hc->tx_mdev_max;
    376
    377		hc->tx_rtt_seq  = dccp_sk(sk)->dccps_gss;
    378	} else {
    379		/* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
    380		m -= (hc->tx_srtt >> 3);
    381		hc->tx_srtt += m;
    382
    383		/* Similarly, update scaled mdev with regard to |m| */
    384		if (m < 0) {
    385			m = -m;
    386			m -= (hc->tx_mdev >> 2);
    387			/*
    388			 * This neutralises RTO increase when RTT < SRTT - mdev
    389			 * (see P. Sarolahti, A. Kuznetsov,"Congestion Control
    390			 * in Linux TCP", USENIX 2002, pp. 49-62).
    391			 */
    392			if (m > 0)
    393				m >>= 3;
    394		} else {
    395			m -= (hc->tx_mdev >> 2);
    396		}
    397		hc->tx_mdev += m;
    398
    399		if (hc->tx_mdev > hc->tx_mdev_max) {
    400			hc->tx_mdev_max = hc->tx_mdev;
    401			if (hc->tx_mdev_max > hc->tx_rttvar)
    402				hc->tx_rttvar = hc->tx_mdev_max;
    403		}
    404
    405		/*
    406		 * Decay RTTVAR at most once per flight, exploiting that
    407		 *  1) pipe <= cwnd <= Sequence_Window = W  (RFC 4340, 7.5.2)
    408		 *  2) AWL = GSS-W+1 <= GAR <= GSS          (RFC 4340, 7.5.1)
    409		 * GAR is a useful bound for FlightSize = pipe.
    410		 * AWL is probably too low here, as it over-estimates pipe.
    411		 */
    412		if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) {
    413			if (hc->tx_mdev_max < hc->tx_rttvar)
    414				hc->tx_rttvar -= (hc->tx_rttvar -
    415						  hc->tx_mdev_max) >> 2;
    416			hc->tx_rtt_seq  = dccp_sk(sk)->dccps_gss;
    417			hc->tx_mdev_max = tcp_rto_min(sk);
    418		}
    419	}
    420
    421	/*
    422	 * Set RTO from SRTT and RTTVAR
    423	 * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms.
    424	 * This agrees with RFC 4341, 5:
    425	 *	"Because DCCP does not retransmit data, DCCP does not require
    426	 *	 TCP's recommended minimum timeout of one second".
    427	 */
    428	hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar;
    429
    430	if (hc->tx_rto > DCCP_RTO_MAX)
    431		hc->tx_rto = DCCP_RTO_MAX;
    432}
    433
    434static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
    435			  unsigned int *maxincr)
    436{
    437	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
    438	struct dccp_sock *dp = dccp_sk(sk);
    439	int r_seq_used = hc->tx_cwnd / dp->dccps_l_ack_ratio;
    440
    441	if (hc->tx_cwnd < dp->dccps_l_seq_win &&
    442	    r_seq_used < dp->dccps_r_seq_win) {
    443		if (hc->tx_cwnd < hc->tx_ssthresh) {
    444			if (*maxincr > 0 && ++hc->tx_packets_acked >= 2) {
    445				hc->tx_cwnd += 1;
    446				*maxincr    -= 1;
    447				hc->tx_packets_acked = 0;
    448			}
    449		} else if (++hc->tx_packets_acked >= hc->tx_cwnd) {
    450			hc->tx_cwnd += 1;
    451			hc->tx_packets_acked = 0;
    452		}
    453	}
    454
    455	/*
    456	 * Adjust the local sequence window and the ack ratio to allow about
    457	 * 5 times the number of packets in the network (RFC 4340 7.5.2)
    458	 */
    459	if (r_seq_used * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_r_seq_win)
    460		ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio * 2);
    461	else if (r_seq_used * CCID2_WIN_CHANGE_FACTOR < dp->dccps_r_seq_win/2)
    462		ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio / 2 ? : 1U);
    463
    464	if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_l_seq_win)
    465		ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win * 2);
    466	else if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR < dp->dccps_l_seq_win/2)
    467		ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win / 2);
    468
    469	/*
    470	 * FIXME: RTT is sampled several times per acknowledgment (for each
    471	 * entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
    472	 * This causes the RTT to be over-estimated, since the older entries
    473	 * in the Ack Vector have earlier sending times.
    474	 * The cleanest solution is to not use the ccid2s_sent field at all
    475	 * and instead use DCCP timestamps: requires changes in other places.
    476	 */
    477	ccid2_rtt_estimator(sk, ccid2_jiffies32 - seqp->ccid2s_sent);
    478}
    479
    480static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
    481{
    482	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
    483
    484	if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) {
    485		ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
    486		return;
    487	}
    488
    489	hc->tx_last_cong = ccid2_jiffies32;
    490
    491	hc->tx_cwnd      = hc->tx_cwnd / 2 ? : 1U;
    492	hc->tx_ssthresh  = max(hc->tx_cwnd, 2U);
    493
    494	ccid2_check_l_ack_ratio(sk);
    495}
    496
    497static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
    498				     u8 option, u8 *optval, u8 optlen)
    499{
    500	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
    501
    502	switch (option) {
    503	case DCCPO_ACK_VECTOR_0:
    504	case DCCPO_ACK_VECTOR_1:
    505		return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen,
    506					      option - DCCPO_ACK_VECTOR_0);
    507	}
    508	return 0;
    509}
    510
    511static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
    512{
    513	struct dccp_sock *dp = dccp_sk(sk);
    514	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
    515	const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
    516	struct dccp_ackvec_parsed *avp;
    517	u64 ackno, seqno;
    518	struct ccid2_seq *seqp;
    519	int done = 0;
    520	unsigned int maxincr = 0;
    521
    522	/* check reverse path congestion */
    523	seqno = DCCP_SKB_CB(skb)->dccpd_seq;
    524
    525	/* XXX this whole "algorithm" is broken.  Need to fix it to keep track
    526	 * of the seqnos of the dupacks so that rpseq and rpdupack are correct
    527	 * -sorbo.
    528	 */
    529	/* need to bootstrap */
    530	if (hc->tx_rpdupack == -1) {
    531		hc->tx_rpdupack = 0;
    532		hc->tx_rpseq    = seqno;
    533	} else {
    534		/* check if packet is consecutive */
    535		if (dccp_delta_seqno(hc->tx_rpseq, seqno) == 1)
    536			hc->tx_rpseq = seqno;
    537		/* it's a later packet */
    538		else if (after48(seqno, hc->tx_rpseq)) {
    539			hc->tx_rpdupack++;
    540
    541			/* check if we got enough dupacks */
    542			if (hc->tx_rpdupack >= NUMDUPACK) {
    543				hc->tx_rpdupack = -1; /* XXX lame */
    544				hc->tx_rpseq    = 0;
    545#ifdef __CCID2_COPES_GRACEFULLY_WITH_ACK_CONGESTION_CONTROL__
    546				/*
    547				 * FIXME: Ack Congestion Control is broken; in
    548				 * the current state instabilities occurred with
    549				 * Ack Ratios greater than 1; causing hang-ups
    550				 * and long RTO timeouts. This needs to be fixed
    551				 * before opening up dynamic changes. -- gerrit
    552				 */
    553				ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
    554#endif
    555			}
    556		}
    557	}
    558
    559	/* check forward path congestion */
    560	if (dccp_packet_without_ack(skb))
    561		return;
    562
    563	/* still didn't send out new data packets */
    564	if (hc->tx_seqh == hc->tx_seqt)
    565		goto done;
    566
    567	ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
    568	if (after48(ackno, hc->tx_high_ack))
    569		hc->tx_high_ack = ackno;
    570
    571	seqp = hc->tx_seqt;
    572	while (before48(seqp->ccid2s_seq, ackno)) {
    573		seqp = seqp->ccid2s_next;
    574		if (seqp == hc->tx_seqh) {
    575			seqp = hc->tx_seqh->ccid2s_prev;
    576			break;
    577		}
    578	}
    579
    580	/*
    581	 * In slow-start, cwnd can increase up to a maximum of Ack Ratio/2
    582	 * packets per acknowledgement. Rounding up avoids that cwnd is not
    583	 * advanced when Ack Ratio is 1 and gives a slight edge otherwise.
    584	 */
    585	if (hc->tx_cwnd < hc->tx_ssthresh)
    586		maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
    587
    588	/* go through all ack vectors */
    589	list_for_each_entry(avp, &hc->tx_av_chunks, node) {
    590		/* go through this ack vector */
    591		for (; avp->len--; avp->vec++) {
    592			u64 ackno_end_rl = SUB48(ackno,
    593						 dccp_ackvec_runlen(avp->vec));
    594
    595			ccid2_pr_debug("ackvec %llu |%u,%u|\n",
    596				       (unsigned long long)ackno,
    597				       dccp_ackvec_state(avp->vec) >> 6,
    598				       dccp_ackvec_runlen(avp->vec));
    599			/* if the seqno we are analyzing is larger than the
    600			 * current ackno, then move towards the tail of our
    601			 * seqnos.
    602			 */
    603			while (after48(seqp->ccid2s_seq, ackno)) {
    604				if (seqp == hc->tx_seqt) {
    605					done = 1;
    606					break;
    607				}
    608				seqp = seqp->ccid2s_prev;
    609			}
    610			if (done)
    611				break;
    612
    613			/* check all seqnos in the range of the vector
    614			 * run length
    615			 */
    616			while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
    617				const u8 state = dccp_ackvec_state(avp->vec);
    618
    619				/* new packet received or marked */
    620				if (state != DCCPAV_NOT_RECEIVED &&
    621				    !seqp->ccid2s_acked) {
    622					if (state == DCCPAV_ECN_MARKED)
    623						ccid2_congestion_event(sk,
    624								       seqp);
    625					else
    626						ccid2_new_ack(sk, seqp,
    627							      &maxincr);
    628
    629					seqp->ccid2s_acked = 1;
    630					ccid2_pr_debug("Got ack for %llu\n",
    631						       (unsigned long long)seqp->ccid2s_seq);
    632					hc->tx_pipe--;
    633				}
    634				if (seqp == hc->tx_seqt) {
    635					done = 1;
    636					break;
    637				}
    638				seqp = seqp->ccid2s_prev;
    639			}
    640			if (done)
    641				break;
    642
    643			ackno = SUB48(ackno_end_rl, 1);
    644		}
    645		if (done)
    646			break;
    647	}
    648
    649	/* The state about what is acked should be correct now
    650	 * Check for NUMDUPACK
    651	 */
    652	seqp = hc->tx_seqt;
    653	while (before48(seqp->ccid2s_seq, hc->tx_high_ack)) {
    654		seqp = seqp->ccid2s_next;
    655		if (seqp == hc->tx_seqh) {
    656			seqp = hc->tx_seqh->ccid2s_prev;
    657			break;
    658		}
    659	}
    660	done = 0;
    661	while (1) {
    662		if (seqp->ccid2s_acked) {
    663			done++;
    664			if (done == NUMDUPACK)
    665				break;
    666		}
    667		if (seqp == hc->tx_seqt)
    668			break;
    669		seqp = seqp->ccid2s_prev;
    670	}
    671
    672	/* If there are at least 3 acknowledgements, anything unacknowledged
    673	 * below the last sequence number is considered lost
    674	 */
    675	if (done == NUMDUPACK) {
    676		struct ccid2_seq *last_acked = seqp;
    677
    678		/* check for lost packets */
    679		while (1) {
    680			if (!seqp->ccid2s_acked) {
    681				ccid2_pr_debug("Packet lost: %llu\n",
    682					       (unsigned long long)seqp->ccid2s_seq);
    683				/* XXX need to traverse from tail -> head in
    684				 * order to detect multiple congestion events in
    685				 * one ack vector.
    686				 */
    687				ccid2_congestion_event(sk, seqp);
    688				hc->tx_pipe--;
    689			}
    690			if (seqp == hc->tx_seqt)
    691				break;
    692			seqp = seqp->ccid2s_prev;
    693		}
    694
    695		hc->tx_seqt = last_acked;
    696	}
    697
    698	/* trim acked packets in tail */
    699	while (hc->tx_seqt != hc->tx_seqh) {
    700		if (!hc->tx_seqt->ccid2s_acked)
    701			break;
    702
    703		hc->tx_seqt = hc->tx_seqt->ccid2s_next;
    704	}
    705
    706	/* restart RTO timer if not all outstanding data has been acked */
    707	if (hc->tx_pipe == 0)
    708		sk_stop_timer(sk, &hc->tx_rtotimer);
    709	else
    710		sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
    711done:
    712	/* check if incoming Acks allow pending packets to be sent */
    713	if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
    714		dccp_tasklet_schedule(sk);
    715	dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
    716}
    717
    718static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
    719{
    720	struct ccid2_hc_tx_sock *hc = ccid_priv(ccid);
    721	struct dccp_sock *dp = dccp_sk(sk);
    722	u32 max_ratio;
    723
    724	/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
    725	hc->tx_ssthresh = ~0U;
    726
    727	/* Use larger initial windows (RFC 4341, section 5). */
    728	hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
    729	hc->tx_expected_wnd = hc->tx_cwnd;
    730
    731	/* Make sure that Ack Ratio is enabled and within bounds. */
    732	max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
    733	if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio)
    734		dp->dccps_l_ack_ratio = max_ratio;
    735
    736	/* XXX init ~ to window size... */
    737	if (ccid2_hc_tx_alloc_seq(hc))
    738		return -ENOMEM;
    739
    740	hc->tx_rto	 = DCCP_TIMEOUT_INIT;
    741	hc->tx_rpdupack  = -1;
    742	hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32;
    743	hc->tx_cwnd_used = 0;
    744	hc->sk		 = sk;
    745	timer_setup(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 0);
    746	INIT_LIST_HEAD(&hc->tx_av_chunks);
    747	return 0;
    748}
    749
    750static void ccid2_hc_tx_exit(struct sock *sk)
    751{
    752	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
    753	int i;
    754
    755	sk_stop_timer(sk, &hc->tx_rtotimer);
    756
    757	for (i = 0; i < hc->tx_seqbufc; i++)
    758		kfree(hc->tx_seqbuf[i]);
    759	hc->tx_seqbufc = 0;
    760	dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
    761}
    762
    763static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
    764{
    765	struct ccid2_hc_rx_sock *hc = ccid2_hc_rx_sk(sk);
    766
    767	if (!dccp_data_packet(skb))
    768		return;
    769
    770	if (++hc->rx_num_data_pkts >= dccp_sk(sk)->dccps_r_ack_ratio) {
    771		dccp_send_ack(sk);
    772		hc->rx_num_data_pkts = 0;
    773	}
    774}
    775
    776struct ccid_operations ccid2_ops = {
    777	.ccid_id		  = DCCPC_CCID2,
    778	.ccid_name		  = "TCP-like",
    779	.ccid_hc_tx_obj_size	  = sizeof(struct ccid2_hc_tx_sock),
    780	.ccid_hc_tx_init	  = ccid2_hc_tx_init,
    781	.ccid_hc_tx_exit	  = ccid2_hc_tx_exit,
    782	.ccid_hc_tx_send_packet	  = ccid2_hc_tx_send_packet,
    783	.ccid_hc_tx_packet_sent	  = ccid2_hc_tx_packet_sent,
    784	.ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options,
    785	.ccid_hc_tx_packet_recv	  = ccid2_hc_tx_packet_recv,
    786	.ccid_hc_rx_obj_size	  = sizeof(struct ccid2_hc_rx_sock),
    787	.ccid_hc_rx_packet_recv	  = ccid2_hc_rx_packet_recv,
    788};
    789
    790#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
    791module_param(ccid2_debug, bool, 0644);
    792MODULE_PARM_DESC(ccid2_debug, "Enable CCID-2 debug messages");
    793#endif