cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

transport.c (26327B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/* SCTP kernel implementation
      3 * Copyright (c) 1999-2000 Cisco, Inc.
      4 * Copyright (c) 1999-2001 Motorola, Inc.
      5 * Copyright (c) 2001-2003 International Business Machines Corp.
      6 * Copyright (c) 2001 Intel Corp.
      7 * Copyright (c) 2001 La Monte H.P. Yarroll
      8 *
      9 * This file is part of the SCTP kernel implementation
     10 *
     11 * This module provides the abstraction for an SCTP transport representing
     12 * a remote transport address.  For local transport addresses, we just use
     13 * union sctp_addr.
     14 *
     15 * Please send any bug reports or fixes you make to the
     16 * email address(es):
     17 *    lksctp developers <linux-sctp@vger.kernel.org>
     18 *
     19 * Written or modified by:
     20 *    La Monte H.P. Yarroll <piggy@acm.org>
     21 *    Karl Knutson          <karl@athena.chicago.il.us>
     22 *    Jon Grimm             <jgrimm@us.ibm.com>
     23 *    Xingang Guo           <xingang.guo@intel.com>
     24 *    Hui Huang             <hui.huang@nokia.com>
     25 *    Sridhar Samudrala	    <sri@us.ibm.com>
     26 *    Ardelle Fan	    <ardelle.fan@intel.com>
     27 */
     28
     29#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
     30
     31#include <linux/slab.h>
     32#include <linux/types.h>
     33#include <linux/random.h>
     34#include <net/sctp/sctp.h>
     35#include <net/sctp/sm.h>
     36
     37/* 1st Level Abstractions.  */
     38
     39/* Initialize a new transport from provided memory.  */
     40static struct sctp_transport *sctp_transport_init(struct net *net,
     41						  struct sctp_transport *peer,
     42						  const union sctp_addr *addr,
     43						  gfp_t gfp)
     44{
     45	/* Copy in the address.  */
     46	peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
     47	memcpy(&peer->ipaddr, addr, peer->af_specific->sockaddr_len);
     48	memset(&peer->saddr, 0, sizeof(union sctp_addr));
     49
     50	peer->sack_generation = 0;
     51
     52	/* From 6.3.1 RTO Calculation:
     53	 *
     54	 * C1) Until an RTT measurement has been made for a packet sent to the
     55	 * given destination transport address, set RTO to the protocol
     56	 * parameter 'RTO.Initial'.
     57	 */
     58	peer->rto = msecs_to_jiffies(net->sctp.rto_initial);
     59
     60	peer->last_time_heard = 0;
     61	peer->last_time_ecne_reduced = jiffies;
     62
     63	peer->param_flags = SPP_HB_DISABLE |
     64			    SPP_PMTUD_ENABLE |
     65			    SPP_SACKDELAY_ENABLE;
     66
     67	/* Initialize the default path max_retrans.  */
     68	peer->pathmaxrxt  = net->sctp.max_retrans_path;
     69	peer->pf_retrans  = net->sctp.pf_retrans;
     70
     71	INIT_LIST_HEAD(&peer->transmitted);
     72	INIT_LIST_HEAD(&peer->send_ready);
     73	INIT_LIST_HEAD(&peer->transports);
     74
     75	timer_setup(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 0);
     76	timer_setup(&peer->hb_timer, sctp_generate_heartbeat_event, 0);
     77	timer_setup(&peer->reconf_timer, sctp_generate_reconf_event, 0);
     78	timer_setup(&peer->probe_timer, sctp_generate_probe_event, 0);
     79	timer_setup(&peer->proto_unreach_timer,
     80		    sctp_generate_proto_unreach_event, 0);
     81
     82	/* Initialize the 64-bit random nonce sent with heartbeat. */
     83	get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
     84
     85	refcount_set(&peer->refcnt, 1);
     86
     87	return peer;
     88}
     89
     90/* Allocate and initialize a new transport.  */
     91struct sctp_transport *sctp_transport_new(struct net *net,
     92					  const union sctp_addr *addr,
     93					  gfp_t gfp)
     94{
     95	struct sctp_transport *transport;
     96
     97	transport = kzalloc(sizeof(*transport), gfp);
     98	if (!transport)
     99		goto fail;
    100
    101	if (!sctp_transport_init(net, transport, addr, gfp))
    102		goto fail_init;
    103
    104	SCTP_DBG_OBJCNT_INC(transport);
    105
    106	return transport;
    107
    108fail_init:
    109	kfree(transport);
    110
    111fail:
    112	return NULL;
    113}
    114
    115/* This transport is no longer needed.  Free up if possible, or
    116 * delay until it last reference count.
    117 */
    118void sctp_transport_free(struct sctp_transport *transport)
    119{
    120	/* Try to delete the heartbeat timer.  */
    121	if (del_timer(&transport->hb_timer))
    122		sctp_transport_put(transport);
    123
    124	/* Delete the T3_rtx timer if it's active.
    125	 * There is no point in not doing this now and letting
    126	 * structure hang around in memory since we know
    127	 * the transport is going away.
    128	 */
    129	if (del_timer(&transport->T3_rtx_timer))
    130		sctp_transport_put(transport);
    131
    132	if (del_timer(&transport->reconf_timer))
    133		sctp_transport_put(transport);
    134
    135	if (del_timer(&transport->probe_timer))
    136		sctp_transport_put(transport);
    137
    138	/* Delete the ICMP proto unreachable timer if it's active. */
    139	if (del_timer(&transport->proto_unreach_timer))
    140		sctp_transport_put(transport);
    141
    142	sctp_transport_put(transport);
    143}
    144
    145static void sctp_transport_destroy_rcu(struct rcu_head *head)
    146{
    147	struct sctp_transport *transport;
    148
    149	transport = container_of(head, struct sctp_transport, rcu);
    150
    151	dst_release(transport->dst);
    152	kfree(transport);
    153	SCTP_DBG_OBJCNT_DEC(transport);
    154}
    155
    156/* Destroy the transport data structure.
    157 * Assumes there are no more users of this structure.
    158 */
    159static void sctp_transport_destroy(struct sctp_transport *transport)
    160{
    161	if (unlikely(refcount_read(&transport->refcnt))) {
    162		WARN(1, "Attempt to destroy undead transport %p!\n", transport);
    163		return;
    164	}
    165
    166	sctp_packet_free(&transport->packet);
    167
    168	if (transport->asoc)
    169		sctp_association_put(transport->asoc);
    170
    171	call_rcu(&transport->rcu, sctp_transport_destroy_rcu);
    172}
    173
    174/* Start T3_rtx timer if it is not already running and update the heartbeat
    175 * timer.  This routine is called every time a DATA chunk is sent.
    176 */
    177void sctp_transport_reset_t3_rtx(struct sctp_transport *transport)
    178{
    179	/* RFC 2960 6.3.2 Retransmission Timer Rules
    180	 *
    181	 * R1) Every time a DATA chunk is sent to any address(including a
    182	 * retransmission), if the T3-rtx timer of that address is not running
    183	 * start it running so that it will expire after the RTO of that
    184	 * address.
    185	 */
    186
    187	if (!timer_pending(&transport->T3_rtx_timer))
    188		if (!mod_timer(&transport->T3_rtx_timer,
    189			       jiffies + transport->rto))
    190			sctp_transport_hold(transport);
    191}
    192
    193void sctp_transport_reset_hb_timer(struct sctp_transport *transport)
    194{
    195	unsigned long expires;
    196
    197	/* When a data chunk is sent, reset the heartbeat interval.  */
    198	expires = jiffies + sctp_transport_timeout(transport);
    199	if ((time_before(transport->hb_timer.expires, expires) ||
    200	     !timer_pending(&transport->hb_timer)) &&
    201	    !mod_timer(&transport->hb_timer,
    202		       expires + prandom_u32_max(transport->rto)))
    203		sctp_transport_hold(transport);
    204}
    205
    206void sctp_transport_reset_reconf_timer(struct sctp_transport *transport)
    207{
    208	if (!timer_pending(&transport->reconf_timer))
    209		if (!mod_timer(&transport->reconf_timer,
    210			       jiffies + transport->rto))
    211			sctp_transport_hold(transport);
    212}
    213
    214void sctp_transport_reset_probe_timer(struct sctp_transport *transport)
    215{
    216	if (!mod_timer(&transport->probe_timer,
    217		       jiffies + transport->probe_interval))
    218		sctp_transport_hold(transport);
    219}
    220
    221void sctp_transport_reset_raise_timer(struct sctp_transport *transport)
    222{
    223	if (!mod_timer(&transport->probe_timer,
    224		       jiffies + transport->probe_interval * 30))
    225		sctp_transport_hold(transport);
    226}
    227
    228/* This transport has been assigned to an association.
    229 * Initialize fields from the association or from the sock itself.
    230 * Register the reference count in the association.
    231 */
    232void sctp_transport_set_owner(struct sctp_transport *transport,
    233			      struct sctp_association *asoc)
    234{
    235	transport->asoc = asoc;
    236	sctp_association_hold(asoc);
    237}
    238
    239/* Initialize the pmtu of a transport. */
    240void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
    241{
    242	/* If we don't have a fresh route, look one up */
    243	if (!transport->dst || transport->dst->obsolete) {
    244		sctp_transport_dst_release(transport);
    245		transport->af_specific->get_dst(transport, &transport->saddr,
    246						&transport->fl, sk);
    247	}
    248
    249	if (transport->param_flags & SPP_PMTUD_DISABLE) {
    250		struct sctp_association *asoc = transport->asoc;
    251
    252		if (!transport->pathmtu && asoc && asoc->pathmtu)
    253			transport->pathmtu = asoc->pathmtu;
    254		if (transport->pathmtu)
    255			return;
    256	}
    257
    258	if (transport->dst)
    259		transport->pathmtu = sctp_dst_mtu(transport->dst);
    260	else
    261		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
    262
    263	sctp_transport_pl_update(transport);
    264}
    265
    266void sctp_transport_pl_send(struct sctp_transport *t)
    267{
    268	if (t->pl.probe_count < SCTP_MAX_PROBES)
    269		goto out;
    270
    271	t->pl.probe_count = 0;
    272	if (t->pl.state == SCTP_PL_BASE) {
    273		if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */
    274			t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
    275
    276			t->pl.pmtu = SCTP_BASE_PLPMTU;
    277			t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
    278			sctp_assoc_sync_pmtu(t->asoc);
    279		}
    280	} else if (t->pl.state == SCTP_PL_SEARCH) {
    281		if (t->pl.pmtu == t->pl.probe_size) { /* Black Hole Detected */
    282			t->pl.state = SCTP_PL_BASE;  /* Search -> Base */
    283			t->pl.probe_size = SCTP_BASE_PLPMTU;
    284			t->pl.probe_high = 0;
    285
    286			t->pl.pmtu = SCTP_BASE_PLPMTU;
    287			t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
    288			sctp_assoc_sync_pmtu(t->asoc);
    289		} else { /* Normal probe failure. */
    290			t->pl.probe_high = t->pl.probe_size;
    291			t->pl.probe_size = t->pl.pmtu;
    292		}
    293	} else if (t->pl.state == SCTP_PL_COMPLETE) {
    294		if (t->pl.pmtu == t->pl.probe_size) { /* Black Hole Detected */
    295			t->pl.state = SCTP_PL_BASE;  /* Search Complete -> Base */
    296			t->pl.probe_size = SCTP_BASE_PLPMTU;
    297
    298			t->pl.pmtu = SCTP_BASE_PLPMTU;
    299			t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
    300			sctp_assoc_sync_pmtu(t->asoc);
    301		}
    302	}
    303
    304out:
    305	pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
    306		 __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
    307	t->pl.probe_count++;
    308}
    309
    310bool sctp_transport_pl_recv(struct sctp_transport *t)
    311{
    312	pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
    313		 __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
    314
    315	t->pl.pmtu = t->pl.probe_size;
    316	t->pl.probe_count = 0;
    317	if (t->pl.state == SCTP_PL_BASE) {
    318		t->pl.state = SCTP_PL_SEARCH; /* Base -> Search */
    319		t->pl.probe_size += SCTP_PL_BIG_STEP;
    320	} else if (t->pl.state == SCTP_PL_ERROR) {
    321		t->pl.state = SCTP_PL_SEARCH; /* Error -> Search */
    322
    323		t->pl.pmtu = t->pl.probe_size;
    324		t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
    325		sctp_assoc_sync_pmtu(t->asoc);
    326		t->pl.probe_size += SCTP_PL_BIG_STEP;
    327	} else if (t->pl.state == SCTP_PL_SEARCH) {
    328		if (!t->pl.probe_high) {
    329			t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
    330					       SCTP_MAX_PLPMTU);
    331			return false;
    332		}
    333		t->pl.probe_size += SCTP_PL_MIN_STEP;
    334		if (t->pl.probe_size >= t->pl.probe_high) {
    335			t->pl.probe_high = 0;
    336			t->pl.state = SCTP_PL_COMPLETE; /* Search -> Search Complete */
    337
    338			t->pl.probe_size = t->pl.pmtu;
    339			t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
    340			sctp_assoc_sync_pmtu(t->asoc);
    341			sctp_transport_reset_raise_timer(t);
    342		}
    343	} else if (t->pl.state == SCTP_PL_COMPLETE) {
    344		/* Raise probe_size again after 30 * interval in Search Complete */
    345		t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
    346		t->pl.probe_size += SCTP_PL_MIN_STEP;
    347	}
    348
    349	return t->pl.state == SCTP_PL_COMPLETE;
    350}
    351
    352static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu)
    353{
    354	pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, ptb: %d\n",
    355		 __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, pmtu);
    356
    357	if (pmtu < SCTP_MIN_PLPMTU || pmtu >= t->pl.probe_size)
    358		return false;
    359
    360	if (t->pl.state == SCTP_PL_BASE) {
    361		if (pmtu >= SCTP_MIN_PLPMTU && pmtu < SCTP_BASE_PLPMTU) {
    362			t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
    363
    364			t->pl.pmtu = SCTP_BASE_PLPMTU;
    365			t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
    366			return true;
    367		}
    368	} else if (t->pl.state == SCTP_PL_SEARCH) {
    369		if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) {
    370			t->pl.state = SCTP_PL_BASE;  /* Search -> Base */
    371			t->pl.probe_size = SCTP_BASE_PLPMTU;
    372			t->pl.probe_count = 0;
    373
    374			t->pl.probe_high = 0;
    375			t->pl.pmtu = SCTP_BASE_PLPMTU;
    376			t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
    377			return true;
    378		} else if (pmtu > t->pl.pmtu && pmtu < t->pl.probe_size) {
    379			t->pl.probe_size = pmtu;
    380			t->pl.probe_count = 0;
    381		}
    382	} else if (t->pl.state == SCTP_PL_COMPLETE) {
    383		if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) {
    384			t->pl.state = SCTP_PL_BASE;  /* Complete -> Base */
    385			t->pl.probe_size = SCTP_BASE_PLPMTU;
    386			t->pl.probe_count = 0;
    387
    388			t->pl.probe_high = 0;
    389			t->pl.pmtu = SCTP_BASE_PLPMTU;
    390			t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
    391			sctp_transport_reset_probe_timer(t);
    392			return true;
    393		}
    394	}
    395
    396	return false;
    397}
    398
    399bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
    400{
    401	struct sock *sk = t->asoc->base.sk;
    402	struct dst_entry *dst;
    403	bool change = true;
    404
    405	if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
    406		pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n",
    407				    __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
    408		/* Use default minimum segment instead */
    409		pmtu = SCTP_DEFAULT_MINSEGMENT;
    410	}
    411	pmtu = SCTP_TRUNC4(pmtu);
    412
    413	if (sctp_transport_pl_enabled(t))
    414		return sctp_transport_pl_toobig(t, pmtu - sctp_transport_pl_hlen(t));
    415
    416	dst = sctp_transport_dst_check(t);
    417	if (dst) {
    418		struct sctp_pf *pf = sctp_get_pf_specific(dst->ops->family);
    419		union sctp_addr addr;
    420
    421		pf->af->from_sk(&addr, sk);
    422		pf->to_sk_daddr(&t->ipaddr, sk);
    423		dst->ops->update_pmtu(dst, sk, NULL, pmtu, true);
    424		pf->to_sk_daddr(&addr, sk);
    425
    426		dst = sctp_transport_dst_check(t);
    427	}
    428
    429	if (!dst) {
    430		t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
    431		dst = t->dst;
    432	}
    433
    434	if (dst) {
    435		/* Re-fetch, as under layers may have a higher minimum size */
    436		pmtu = sctp_dst_mtu(dst);
    437		change = t->pathmtu != pmtu;
    438	}
    439	t->pathmtu = pmtu;
    440
    441	return change;
    442}
    443
    444/* Caches the dst entry and source address for a transport's destination
    445 * address.
    446 */
    447void sctp_transport_route(struct sctp_transport *transport,
    448			  union sctp_addr *saddr, struct sctp_sock *opt)
    449{
    450	struct sctp_association *asoc = transport->asoc;
    451	struct sctp_af *af = transport->af_specific;
    452
    453	sctp_transport_dst_release(transport);
    454	af->get_dst(transport, saddr, &transport->fl, sctp_opt2sk(opt));
    455
    456	if (saddr)
    457		memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));
    458	else
    459		af->get_saddr(opt, transport, &transport->fl);
    460
    461	sctp_transport_pmtu(transport, sctp_opt2sk(opt));
    462
    463	/* Initialize sk->sk_rcv_saddr, if the transport is the
    464	 * association's active path for getsockname().
    465	 */
    466	if (transport->dst && asoc &&
    467	    (!asoc->peer.primary_path || transport == asoc->peer.active_path))
    468		opt->pf->to_sk_saddr(&transport->saddr, asoc->base.sk);
    469}
    470
    471/* Hold a reference to a transport.  */
    472int sctp_transport_hold(struct sctp_transport *transport)
    473{
    474	return refcount_inc_not_zero(&transport->refcnt);
    475}
    476
    477/* Release a reference to a transport and clean up
    478 * if there are no more references.
    479 */
    480void sctp_transport_put(struct sctp_transport *transport)
    481{
    482	if (refcount_dec_and_test(&transport->refcnt))
    483		sctp_transport_destroy(transport);
    484}
    485
    486/* Update transport's RTO based on the newly calculated RTT. */
    487void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
    488{
    489	if (unlikely(!tp->rto_pending))
    490		/* We should not be doing any RTO updates unless rto_pending is set.  */
    491		pr_debug("%s: rto_pending not set on transport %p!\n", __func__, tp);
    492
    493	if (tp->rttvar || tp->srtt) {
    494		struct net *net = tp->asoc->base.net;
    495		/* 6.3.1 C3) When a new RTT measurement R' is made, set
    496		 * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'|
    497		 * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R'
    498		 */
    499
    500		/* Note:  The above algorithm has been rewritten to
    501		 * express rto_beta and rto_alpha as inverse powers
    502		 * of two.
    503		 * For example, assuming the default value of RTO.Alpha of
    504		 * 1/8, rto_alpha would be expressed as 3.
    505		 */
    506		tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta)
    507			+ (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta);
    508		tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha)
    509			+ (rtt >> net->sctp.rto_alpha);
    510	} else {
    511		/* 6.3.1 C2) When the first RTT measurement R is made, set
    512		 * SRTT <- R, RTTVAR <- R/2.
    513		 */
    514		tp->srtt = rtt;
    515		tp->rttvar = rtt >> 1;
    516	}
    517
    518	/* 6.3.1 G1) Whenever RTTVAR is computed, if RTTVAR = 0, then
    519	 * adjust RTTVAR <- G, where G is the CLOCK GRANULARITY.
    520	 */
    521	if (tp->rttvar == 0)
    522		tp->rttvar = SCTP_CLOCK_GRANULARITY;
    523
    524	/* 6.3.1 C3) After the computation, update RTO <- SRTT + 4 * RTTVAR. */
    525	tp->rto = tp->srtt + (tp->rttvar << 2);
    526
    527	/* 6.3.1 C6) Whenever RTO is computed, if it is less than RTO.Min
    528	 * seconds then it is rounded up to RTO.Min seconds.
    529	 */
    530	if (tp->rto < tp->asoc->rto_min)
    531		tp->rto = tp->asoc->rto_min;
    532
    533	/* 6.3.1 C7) A maximum value may be placed on RTO provided it is
    534	 * at least RTO.max seconds.
    535	 */
    536	if (tp->rto > tp->asoc->rto_max)
    537		tp->rto = tp->asoc->rto_max;
    538
    539	sctp_max_rto(tp->asoc, tp);
    540	tp->rtt = rtt;
    541
    542	/* Reset rto_pending so that a new RTT measurement is started when a
    543	 * new data chunk is sent.
    544	 */
    545	tp->rto_pending = 0;
    546
    547	pr_debug("%s: transport:%p, rtt:%d, srtt:%d rttvar:%d, rto:%ld\n",
    548		 __func__, tp, rtt, tp->srtt, tp->rttvar, tp->rto);
    549}
    550
    551/* This routine updates the transport's cwnd and partial_bytes_acked
    552 * parameters based on the bytes acked in the received SACK.
    553 */
    554void sctp_transport_raise_cwnd(struct sctp_transport *transport,
    555			       __u32 sack_ctsn, __u32 bytes_acked)
    556{
    557	struct sctp_association *asoc = transport->asoc;
    558	__u32 cwnd, ssthresh, flight_size, pba, pmtu;
    559
    560	cwnd = transport->cwnd;
    561	flight_size = transport->flight_size;
    562
    563	/* See if we need to exit Fast Recovery first */
    564	if (asoc->fast_recovery &&
    565	    TSN_lte(asoc->fast_recovery_exit, sack_ctsn))
    566		asoc->fast_recovery = 0;
    567
    568	ssthresh = transport->ssthresh;
    569	pba = transport->partial_bytes_acked;
    570	pmtu = transport->asoc->pathmtu;
    571
    572	if (cwnd <= ssthresh) {
    573		/* RFC 4960 7.2.1
    574		 * o  When cwnd is less than or equal to ssthresh, an SCTP
    575		 *    endpoint MUST use the slow-start algorithm to increase
    576		 *    cwnd only if the current congestion window is being fully
    577		 *    utilized, an incoming SACK advances the Cumulative TSN
    578		 *    Ack Point, and the data sender is not in Fast Recovery.
    579		 *    Only when these three conditions are met can the cwnd be
    580		 *    increased; otherwise, the cwnd MUST not be increased.
    581		 *    If these conditions are met, then cwnd MUST be increased
    582		 *    by, at most, the lesser of 1) the total size of the
    583		 *    previously outstanding DATA chunk(s) acknowledged, and
    584		 *    2) the destination's path MTU.  This upper bound protects
    585		 *    against the ACK-Splitting attack outlined in [SAVAGE99].
    586		 */
    587		if (asoc->fast_recovery)
    588			return;
    589
    590		/* The appropriate cwnd increase algorithm is performed
    591		 * if, and only if the congestion window is being fully
    592		 * utilized.  Note that RFC4960 Errata 3.22 removed the
    593		 * other condition on ctsn moving.
    594		 */
    595		if (flight_size < cwnd)
    596			return;
    597
    598		if (bytes_acked > pmtu)
    599			cwnd += pmtu;
    600		else
    601			cwnd += bytes_acked;
    602
    603		pr_debug("%s: slow start: transport:%p, bytes_acked:%d, "
    604			 "cwnd:%d, ssthresh:%d, flight_size:%d, pba:%d\n",
    605			 __func__, transport, bytes_acked, cwnd, ssthresh,
    606			 flight_size, pba);
    607	} else {
    608		/* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh,
    609		 * upon each SACK arrival, increase partial_bytes_acked
    610		 * by the total number of bytes of all new chunks
    611		 * acknowledged in that SACK including chunks
    612		 * acknowledged by the new Cumulative TSN Ack and by Gap
    613		 * Ack Blocks. (updated by RFC4960 Errata 3.22)
    614		 *
    615		 * When partial_bytes_acked is greater than cwnd and
    616		 * before the arrival of the SACK the sender had less
    617		 * bytes of data outstanding than cwnd (i.e., before
    618		 * arrival of the SACK, flightsize was less than cwnd),
    619		 * reset partial_bytes_acked to cwnd. (RFC 4960 Errata
    620		 * 3.26)
    621		 *
    622		 * When partial_bytes_acked is equal to or greater than
    623		 * cwnd and before the arrival of the SACK the sender
    624		 * had cwnd or more bytes of data outstanding (i.e.,
    625		 * before arrival of the SACK, flightsize was greater
    626		 * than or equal to cwnd), partial_bytes_acked is reset
    627		 * to (partial_bytes_acked - cwnd). Next, cwnd is
    628		 * increased by MTU. (RFC 4960 Errata 3.12)
    629		 */
    630		pba += bytes_acked;
    631		if (pba > cwnd && flight_size < cwnd)
    632			pba = cwnd;
    633		if (pba >= cwnd && flight_size >= cwnd) {
    634			pba = pba - cwnd;
    635			cwnd += pmtu;
    636		}
    637
    638		pr_debug("%s: congestion avoidance: transport:%p, "
    639			 "bytes_acked:%d, cwnd:%d, ssthresh:%d, "
    640			 "flight_size:%d, pba:%d\n", __func__,
    641			 transport, bytes_acked, cwnd, ssthresh,
    642			 flight_size, pba);
    643	}
    644
    645	transport->cwnd = cwnd;
    646	transport->partial_bytes_acked = pba;
    647}
    648
    649/* This routine is used to lower the transport's cwnd when congestion is
    650 * detected.
    651 */
    652void sctp_transport_lower_cwnd(struct sctp_transport *transport,
    653			       enum sctp_lower_cwnd reason)
    654{
    655	struct sctp_association *asoc = transport->asoc;
    656
    657	switch (reason) {
    658	case SCTP_LOWER_CWND_T3_RTX:
    659		/* RFC 2960 Section 7.2.3, sctpimpguide
    660		 * When the T3-rtx timer expires on an address, SCTP should
    661		 * perform slow start by:
    662		 *      ssthresh = max(cwnd/2, 4*MTU)
    663		 *      cwnd = 1*MTU
    664		 *      partial_bytes_acked = 0
    665		 */
    666		transport->ssthresh = max(transport->cwnd/2,
    667					  4*asoc->pathmtu);
    668		transport->cwnd = asoc->pathmtu;
    669
    670		/* T3-rtx also clears fast recovery */
    671		asoc->fast_recovery = 0;
    672		break;
    673
    674	case SCTP_LOWER_CWND_FAST_RTX:
    675		/* RFC 2960 7.2.4 Adjust the ssthresh and cwnd of the
    676		 * destination address(es) to which the missing DATA chunks
    677		 * were last sent, according to the formula described in
    678		 * Section 7.2.3.
    679		 *
    680		 * RFC 2960 7.2.3, sctpimpguide Upon detection of packet
    681		 * losses from SACK (see Section 7.2.4), An endpoint
    682		 * should do the following:
    683		 *      ssthresh = max(cwnd/2, 4*MTU)
    684		 *      cwnd = ssthresh
    685		 *      partial_bytes_acked = 0
    686		 */
    687		if (asoc->fast_recovery)
    688			return;
    689
    690		/* Mark Fast recovery */
    691		asoc->fast_recovery = 1;
    692		asoc->fast_recovery_exit = asoc->next_tsn - 1;
    693
    694		transport->ssthresh = max(transport->cwnd/2,
    695					  4*asoc->pathmtu);
    696		transport->cwnd = transport->ssthresh;
    697		break;
    698
    699	case SCTP_LOWER_CWND_ECNE:
    700		/* RFC 2481 Section 6.1.2.
    701		 * If the sender receives an ECN-Echo ACK packet
    702		 * then the sender knows that congestion was encountered in the
    703		 * network on the path from the sender to the receiver. The
    704		 * indication of congestion should be treated just as a
    705		 * congestion loss in non-ECN Capable TCP. That is, the TCP
    706		 * source halves the congestion window "cwnd" and reduces the
    707		 * slow start threshold "ssthresh".
    708		 * A critical condition is that TCP does not react to
    709		 * congestion indications more than once every window of
    710		 * data (or more loosely more than once every round-trip time).
    711		 */
    712		if (time_after(jiffies, transport->last_time_ecne_reduced +
    713					transport->rtt)) {
    714			transport->ssthresh = max(transport->cwnd/2,
    715						  4*asoc->pathmtu);
    716			transport->cwnd = transport->ssthresh;
    717			transport->last_time_ecne_reduced = jiffies;
    718		}
    719		break;
    720
    721	case SCTP_LOWER_CWND_INACTIVE:
    722		/* RFC 2960 Section 7.2.1, sctpimpguide
    723		 * When the endpoint does not transmit data on a given
    724		 * transport address, the cwnd of the transport address
    725		 * should be adjusted to max(cwnd/2, 4*MTU) per RTO.
    726		 * NOTE: Although the draft recommends that this check needs
    727		 * to be done every RTO interval, we do it every hearbeat
    728		 * interval.
    729		 */
    730		transport->cwnd = max(transport->cwnd/2,
    731					 4*asoc->pathmtu);
    732		/* RFC 4960 Errata 3.27.2: also adjust sshthresh */
    733		transport->ssthresh = transport->cwnd;
    734		break;
    735	}
    736
    737	transport->partial_bytes_acked = 0;
    738
    739	pr_debug("%s: transport:%p, reason:%d, cwnd:%d, ssthresh:%d\n",
    740		 __func__, transport, reason, transport->cwnd,
    741		 transport->ssthresh);
    742}
    743
    744/* Apply Max.Burst limit to the congestion window:
    745 * sctpimpguide-05 2.14.2
    746 * D) When the time comes for the sender to
    747 * transmit new DATA chunks, the protocol parameter Max.Burst MUST
    748 * first be applied to limit how many new DATA chunks may be sent.
    749 * The limit is applied by adjusting cwnd as follows:
    750 * 	if ((flightsize+ Max.Burst * MTU) < cwnd)
    751 * 		cwnd = flightsize + Max.Burst * MTU
    752 */
    753
    754void sctp_transport_burst_limited(struct sctp_transport *t)
    755{
    756	struct sctp_association *asoc = t->asoc;
    757	u32 old_cwnd = t->cwnd;
    758	u32 max_burst_bytes;
    759
    760	if (t->burst_limited || asoc->max_burst == 0)
    761		return;
    762
    763	max_burst_bytes = t->flight_size + (asoc->max_burst * asoc->pathmtu);
    764	if (max_burst_bytes < old_cwnd) {
    765		t->cwnd = max_burst_bytes;
    766		t->burst_limited = old_cwnd;
    767	}
    768}
    769
    770/* Restore the old cwnd congestion window, after the burst had it's
    771 * desired effect.
    772 */
    773void sctp_transport_burst_reset(struct sctp_transport *t)
    774{
    775	if (t->burst_limited) {
    776		t->cwnd = t->burst_limited;
    777		t->burst_limited = 0;
    778	}
    779}
    780
    781/* What is the next timeout value for this transport? */
    782unsigned long sctp_transport_timeout(struct sctp_transport *trans)
    783{
    784	/* RTO + timer slack +/- 50% of RTO */
    785	unsigned long timeout = trans->rto >> 1;
    786
    787	if (trans->state != SCTP_UNCONFIRMED &&
    788	    trans->state != SCTP_PF)
    789		timeout += trans->hbinterval;
    790
    791	return max_t(unsigned long, timeout, HZ / 5);
    792}
    793
    794/* Reset transport variables to their initial values */
    795void sctp_transport_reset(struct sctp_transport *t)
    796{
    797	struct sctp_association *asoc = t->asoc;
    798
    799	/* RFC 2960 (bis), Section 5.2.4
    800	 * All the congestion control parameters (e.g., cwnd, ssthresh)
    801	 * related to this peer MUST be reset to their initial values
    802	 * (see Section 6.2.1)
    803	 */
    804	t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380));
    805	t->burst_limited = 0;
    806	t->ssthresh = asoc->peer.i.a_rwnd;
    807	t->rto = asoc->rto_initial;
    808	sctp_max_rto(asoc, t);
    809	t->rtt = 0;
    810	t->srtt = 0;
    811	t->rttvar = 0;
    812
    813	/* Reset these additional variables so that we have a clean slate. */
    814	t->partial_bytes_acked = 0;
    815	t->flight_size = 0;
    816	t->error_count = 0;
    817	t->rto_pending = 0;
    818	t->hb_sent = 0;
    819
    820	/* Initialize the state information for SFR-CACC */
    821	t->cacc.changeover_active = 0;
    822	t->cacc.cycling_changeover = 0;
    823	t->cacc.next_tsn_at_change = 0;
    824	t->cacc.cacc_saw_newack = 0;
    825}
    826
    827/* Schedule retransmission on the given transport */
    828void sctp_transport_immediate_rtx(struct sctp_transport *t)
    829{
    830	/* Stop pending T3_rtx_timer */
    831	if (del_timer(&t->T3_rtx_timer))
    832		sctp_transport_put(t);
    833
    834	sctp_retransmit(&t->asoc->outqueue, t, SCTP_RTXR_T3_RTX);
    835	if (!timer_pending(&t->T3_rtx_timer)) {
    836		if (!mod_timer(&t->T3_rtx_timer, jiffies + t->rto))
    837			sctp_transport_hold(t);
    838	}
    839}
    840
    841/* Drop dst */
    842void sctp_transport_dst_release(struct sctp_transport *t)
    843{
    844	dst_release(t->dst);
    845	t->dst = NULL;
    846	t->dst_pending_confirm = 0;
    847}
    848
    849/* Schedule neighbour confirm */
    850void sctp_transport_dst_confirm(struct sctp_transport *t)
    851{
    852	t->dst_pending_confirm = 1;
    853}