transport.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. /* SCTP kernel reference Implementation
  2. * Copyright (c) 1999-2000 Cisco, Inc.
  3. * Copyright (c) 1999-2001 Motorola, Inc.
  4. * Copyright (c) 2001-2003 International Business Machines Corp.
  5. * Copyright (c) 2001 Intel Corp.
  6. * Copyright (c) 2001 La Monte H.P. Yarroll
  7. *
  8. * This file is part of the SCTP kernel reference Implementation
  9. *
  10. * This module provides the abstraction for an SCTP tranport representing
  11. * a remote transport address. For local transport addresses, we just use
  12. * union sctp_addr.
  13. *
  14. * The SCTP reference implementation is free software;
  15. * you can redistribute it and/or modify it under the terms of
  16. * the GNU General Public License as published by
  17. * the Free Software Foundation; either version 2, or (at your option)
  18. * any later version.
  19. *
  20. * The SCTP reference implementation is distributed in the hope that it
  21. * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  22. * ************************
  23. * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  24. * See the GNU General Public License for more details.
  25. *
  26. * You should have received a copy of the GNU General Public License
  27. * along with GNU CC; see the file COPYING. If not, write to
  28. * the Free Software Foundation, 59 Temple Place - Suite 330,
  29. * Boston, MA 02111-1307, USA.
  30. *
  31. * Please send any bug reports or fixes you make to the
  32. * email address(es):
  33. * lksctp developers <lksctp-developers@lists.sourceforge.net>
  34. *
  35. * Or submit a bug report through the following website:
  36. * http://www.sf.net/projects/lksctp
  37. *
  38. * Written or modified by:
  39. * La Monte H.P. Yarroll <piggy@acm.org>
  40. * Karl Knutson <karl@athena.chicago.il.us>
  41. * Jon Grimm <jgrimm@us.ibm.com>
  42. * Xingang Guo <xingang.guo@intel.com>
  43. * Hui Huang <hui.huang@nokia.com>
  44. * Sridhar Samudrala <sri@us.ibm.com>
  45. * Ardelle Fan <ardelle.fan@intel.com>
  46. *
  47. * Any bugs reported given to us we will try to fix... any fixes shared will
  48. * be incorporated into the next SCTP release.
  49. */
  50. #include <linux/types.h>
  51. #include <net/sctp/sctp.h>
  52. #include <net/sctp/sm.h>
  53. /* 1st Level Abstractions. */
  54. /* Initialize a new transport from provided memory. */
  55. static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
  56. const union sctp_addr *addr,
  57. int gfp)
  58. {
  59. /* Copy in the address. */
  60. peer->ipaddr = *addr;
  61. peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
  62. peer->asoc = NULL;
  63. peer->dst = NULL;
  64. memset(&peer->saddr, 0, sizeof(union sctp_addr));
  65. /* From 6.3.1 RTO Calculation:
  66. *
  67. * C1) Until an RTT measurement has been made for a packet sent to the
  68. * given destination transport address, set RTO to the protocol
  69. * parameter 'RTO.Initial'.
  70. */
  71. peer->rtt = 0;
  72. peer->rto = sctp_rto_initial;
  73. peer->rttvar = 0;
  74. peer->srtt = 0;
  75. peer->rto_pending = 0;
  76. peer->last_time_heard = jiffies;
  77. peer->last_time_used = jiffies;
  78. peer->last_time_ecne_reduced = jiffies;
  79. peer->init_sent_count = 0;
  80. peer->state = SCTP_ACTIVE;
  81. peer->hb_allowed = 0;
  82. /* Initialize the default path max_retrans. */
  83. peer->max_retrans = sctp_max_retrans_path;
  84. peer->error_count = 0;
  85. INIT_LIST_HEAD(&peer->transmitted);
  86. INIT_LIST_HEAD(&peer->send_ready);
  87. INIT_LIST_HEAD(&peer->transports);
  88. /* Set up the retransmission timer. */
  89. init_timer(&peer->T3_rtx_timer);
  90. peer->T3_rtx_timer.function = sctp_generate_t3_rtx_event;
  91. peer->T3_rtx_timer.data = (unsigned long)peer;
  92. /* Set up the heartbeat timer. */
  93. init_timer(&peer->hb_timer);
  94. peer->hb_timer.function = sctp_generate_heartbeat_event;
  95. peer->hb_timer.data = (unsigned long)peer;
  96. atomic_set(&peer->refcnt, 1);
  97. peer->dead = 0;
  98. peer->malloced = 0;
  99. /* Initialize the state information for SFR-CACC */
  100. peer->cacc.changeover_active = 0;
  101. peer->cacc.cycling_changeover = 0;
  102. peer->cacc.next_tsn_at_change = 0;
  103. peer->cacc.cacc_saw_newack = 0;
  104. return peer;
  105. }
  106. /* Allocate and initialize a new transport. */
  107. struct sctp_transport *sctp_transport_new(const union sctp_addr *addr, int gfp)
  108. {
  109. struct sctp_transport *transport;
  110. transport = t_new(struct sctp_transport, gfp);
  111. if (!transport)
  112. goto fail;
  113. if (!sctp_transport_init(transport, addr, gfp))
  114. goto fail_init;
  115. transport->malloced = 1;
  116. SCTP_DBG_OBJCNT_INC(transport);
  117. return transport;
  118. fail_init:
  119. kfree(transport);
  120. fail:
  121. return NULL;
  122. }
  123. /* This transport is no longer needed. Free up if possible, or
  124. * delay until it last reference count.
  125. */
  126. void sctp_transport_free(struct sctp_transport *transport)
  127. {
  128. transport->dead = 1;
  129. /* Try to delete the heartbeat timer. */
  130. if (del_timer(&transport->hb_timer))
  131. sctp_transport_put(transport);
  132. /* Delete the T3_rtx timer if it's active.
  133. * There is no point in not doing this now and letting
  134. * structure hang around in memory since we know
  135. * the tranport is going away.
  136. */
  137. if (timer_pending(&transport->T3_rtx_timer) &&
  138. del_timer(&transport->T3_rtx_timer))
  139. sctp_transport_put(transport);
  140. sctp_transport_put(transport);
  141. }
  142. /* Destroy the transport data structure.
  143. * Assumes there are no more users of this structure.
  144. */
  145. static void sctp_transport_destroy(struct sctp_transport *transport)
  146. {
  147. SCTP_ASSERT(transport->dead, "Transport is not dead", return);
  148. if (transport->asoc)
  149. sctp_association_put(transport->asoc);
  150. sctp_packet_free(&transport->packet);
  151. dst_release(transport->dst);
  152. kfree(transport);
  153. SCTP_DBG_OBJCNT_DEC(transport);
  154. }
  155. /* Start T3_rtx timer if it is not already running and update the heartbeat
  156. * timer. This routine is called every time a DATA chunk is sent.
  157. */
  158. void sctp_transport_reset_timers(struct sctp_transport *transport)
  159. {
  160. /* RFC 2960 6.3.2 Retransmission Timer Rules
  161. *
  162. * R1) Every time a DATA chunk is sent to any address(including a
  163. * retransmission), if the T3-rtx timer of that address is not running
  164. * start it running so that it will expire after the RTO of that
  165. * address.
  166. */
  167. if (!timer_pending(&transport->T3_rtx_timer))
  168. if (!mod_timer(&transport->T3_rtx_timer,
  169. jiffies + transport->rto))
  170. sctp_transport_hold(transport);
  171. /* When a data chunk is sent, reset the heartbeat interval. */
  172. if (!mod_timer(&transport->hb_timer,
  173. sctp_transport_timeout(transport)))
  174. sctp_transport_hold(transport);
  175. }
  176. /* This transport has been assigned to an association.
  177. * Initialize fields from the association or from the sock itself.
  178. * Register the reference count in the association.
  179. */
  180. void sctp_transport_set_owner(struct sctp_transport *transport,
  181. struct sctp_association *asoc)
  182. {
  183. transport->asoc = asoc;
  184. sctp_association_hold(asoc);
  185. }
  186. /* Initialize the pmtu of a transport. */
  187. void sctp_transport_pmtu(struct sctp_transport *transport)
  188. {
  189. struct dst_entry *dst;
  190. dst = transport->af_specific->get_dst(NULL, &transport->ipaddr, NULL);
  191. if (dst) {
  192. transport->pmtu = dst_mtu(dst);
  193. dst_release(dst);
  194. } else
  195. transport->pmtu = SCTP_DEFAULT_MAXSEGMENT;
  196. }
  197. /* Caches the dst entry and source address for a transport's destination
  198. * address.
  199. */
  200. void sctp_transport_route(struct sctp_transport *transport,
  201. union sctp_addr *saddr, struct sctp_sock *opt)
  202. {
  203. struct sctp_association *asoc = transport->asoc;
  204. struct sctp_af *af = transport->af_specific;
  205. union sctp_addr *daddr = &transport->ipaddr;
  206. struct dst_entry *dst;
  207. dst = af->get_dst(asoc, daddr, saddr);
  208. if (saddr)
  209. memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));
  210. else
  211. af->get_saddr(asoc, dst, daddr, &transport->saddr);
  212. transport->dst = dst;
  213. if (dst) {
  214. transport->pmtu = dst_mtu(dst);
  215. /* Initialize sk->sk_rcv_saddr, if the transport is the
  216. * association's active path for getsockname().
  217. */
  218. if (asoc && (transport == asoc->peer.active_path))
  219. af->to_sk_saddr(&transport->saddr, asoc->base.sk);
  220. } else
  221. transport->pmtu = SCTP_DEFAULT_MAXSEGMENT;
  222. }
  223. /* Hold a reference to a transport. */
  224. void sctp_transport_hold(struct sctp_transport *transport)
  225. {
  226. atomic_inc(&transport->refcnt);
  227. }
  228. /* Release a reference to a transport and clean up
  229. * if there are no more references.
  230. */
  231. void sctp_transport_put(struct sctp_transport *transport)
  232. {
  233. if (atomic_dec_and_test(&transport->refcnt))
  234. sctp_transport_destroy(transport);
  235. }
  236. /* Update transport's RTO based on the newly calculated RTT. */
  237. void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
  238. {
  239. /* Check for valid transport. */
  240. SCTP_ASSERT(tp, "NULL transport", return);
  241. /* We should not be doing any RTO updates unless rto_pending is set. */
  242. SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return);
  243. if (tp->rttvar || tp->srtt) {
  244. /* 6.3.1 C3) When a new RTT measurement R' is made, set
  245. * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'|
  246. * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R'
  247. */
  248. /* Note: The above algorithm has been rewritten to
  249. * express rto_beta and rto_alpha as inverse powers
  250. * of two.
  251. * For example, assuming the default value of RTO.Alpha of
  252. * 1/8, rto_alpha would be expressed as 3.
  253. */
  254. tp->rttvar = tp->rttvar - (tp->rttvar >> sctp_rto_beta)
  255. + ((abs(tp->srtt - rtt)) >> sctp_rto_beta);
  256. tp->srtt = tp->srtt - (tp->srtt >> sctp_rto_alpha)
  257. + (rtt >> sctp_rto_alpha);
  258. } else {
  259. /* 6.3.1 C2) When the first RTT measurement R is made, set
  260. * SRTT <- R, RTTVAR <- R/2.
  261. */
  262. tp->srtt = rtt;
  263. tp->rttvar = rtt >> 1;
  264. }
  265. /* 6.3.1 G1) Whenever RTTVAR is computed, if RTTVAR = 0, then
  266. * adjust RTTVAR <- G, where G is the CLOCK GRANULARITY.
  267. */
  268. if (tp->rttvar == 0)
  269. tp->rttvar = SCTP_CLOCK_GRANULARITY;
  270. /* 6.3.1 C3) After the computation, update RTO <- SRTT + 4 * RTTVAR. */
  271. tp->rto = tp->srtt + (tp->rttvar << 2);
  272. /* 6.3.1 C6) Whenever RTO is computed, if it is less than RTO.Min
  273. * seconds then it is rounded up to RTO.Min seconds.
  274. */
  275. if (tp->rto < tp->asoc->rto_min)
  276. tp->rto = tp->asoc->rto_min;
  277. /* 6.3.1 C7) A maximum value may be placed on RTO provided it is
  278. * at least RTO.max seconds.
  279. */
  280. if (tp->rto > tp->asoc->rto_max)
  281. tp->rto = tp->asoc->rto_max;
  282. tp->rtt = rtt;
  283. /* Reset rto_pending so that a new RTT measurement is started when a
  284. * new data chunk is sent.
  285. */
  286. tp->rto_pending = 0;
  287. SCTP_DEBUG_PRINTK("%s: transport: %p, rtt: %d, srtt: %d "
  288. "rttvar: %d, rto: %d\n", __FUNCTION__,
  289. tp, rtt, tp->srtt, tp->rttvar, tp->rto);
  290. }
  291. /* This routine updates the transport's cwnd and partial_bytes_acked
  292. * parameters based on the bytes acked in the received SACK.
  293. */
  294. void sctp_transport_raise_cwnd(struct sctp_transport *transport,
  295. __u32 sack_ctsn, __u32 bytes_acked)
  296. {
  297. __u32 cwnd, ssthresh, flight_size, pba, pmtu;
  298. cwnd = transport->cwnd;
  299. flight_size = transport->flight_size;
  300. /* The appropriate cwnd increase algorithm is performed if, and only
  301. * if the cumulative TSN has advanced and the congestion window is
  302. * being fully utilized.
  303. */
  304. if ((transport->asoc->ctsn_ack_point >= sack_ctsn) ||
  305. (flight_size < cwnd))
  306. return;
  307. ssthresh = transport->ssthresh;
  308. pba = transport->partial_bytes_acked;
  309. pmtu = transport->asoc->pmtu;
  310. if (cwnd <= ssthresh) {
  311. /* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less
  312. * than or equal to ssthresh an SCTP endpoint MUST use the
  313. * slow start algorithm to increase cwnd only if the current
  314. * congestion window is being fully utilized and an incoming
  315. * SACK advances the Cumulative TSN Ack Point. Only when these
  316. * two conditions are met can the cwnd be increased otherwise
  317. * the cwnd MUST not be increased. If these conditions are met
  318. * then cwnd MUST be increased by at most the lesser of
  319. * 1) the total size of the previously outstanding DATA
  320. * chunk(s) acknowledged, and 2) the destination's path MTU.
  321. */
  322. if (bytes_acked > pmtu)
  323. cwnd += pmtu;
  324. else
  325. cwnd += bytes_acked;
  326. SCTP_DEBUG_PRINTK("%s: SLOW START: transport: %p, "
  327. "bytes_acked: %d, cwnd: %d, ssthresh: %d, "
  328. "flight_size: %d, pba: %d\n",
  329. __FUNCTION__,
  330. transport, bytes_acked, cwnd,
  331. ssthresh, flight_size, pba);
  332. } else {
  333. /* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh,
  334. * upon each SACK arrival that advances the Cumulative TSN Ack
  335. * Point, increase partial_bytes_acked by the total number of
  336. * bytes of all new chunks acknowledged in that SACK including
  337. * chunks acknowledged by the new Cumulative TSN Ack and by
  338. * Gap Ack Blocks.
  339. *
  340. * When partial_bytes_acked is equal to or greater than cwnd
  341. * and before the arrival of the SACK the sender had cwnd or
  342. * more bytes of data outstanding (i.e., before arrival of the
  343. * SACK, flightsize was greater than or equal to cwnd),
  344. * increase cwnd by MTU, and reset partial_bytes_acked to
  345. * (partial_bytes_acked - cwnd).
  346. */
  347. pba += bytes_acked;
  348. if (pba >= cwnd) {
  349. cwnd += pmtu;
  350. pba = ((cwnd < pba) ? (pba - cwnd) : 0);
  351. }
  352. SCTP_DEBUG_PRINTK("%s: CONGESTION AVOIDANCE: "
  353. "transport: %p, bytes_acked: %d, cwnd: %d, "
  354. "ssthresh: %d, flight_size: %d, pba: %d\n",
  355. __FUNCTION__,
  356. transport, bytes_acked, cwnd,
  357. ssthresh, flight_size, pba);
  358. }
  359. transport->cwnd = cwnd;
  360. transport->partial_bytes_acked = pba;
  361. }
  362. /* This routine is used to lower the transport's cwnd when congestion is
  363. * detected.
  364. */
  365. void sctp_transport_lower_cwnd(struct sctp_transport *transport,
  366. sctp_lower_cwnd_t reason)
  367. {
  368. switch (reason) {
  369. case SCTP_LOWER_CWND_T3_RTX:
  370. /* RFC 2960 Section 7.2.3, sctpimpguide
  371. * When the T3-rtx timer expires on an address, SCTP should
  372. * perform slow start by:
  373. * ssthresh = max(cwnd/2, 4*MTU)
  374. * cwnd = 1*MTU
  375. * partial_bytes_acked = 0
  376. */
  377. transport->ssthresh = max(transport->cwnd/2,
  378. 4*transport->asoc->pmtu);
  379. transport->cwnd = transport->asoc->pmtu;
  380. break;
  381. case SCTP_LOWER_CWND_FAST_RTX:
  382. /* RFC 2960 7.2.4 Adjust the ssthresh and cwnd of the
  383. * destination address(es) to which the missing DATA chunks
  384. * were last sent, according to the formula described in
  385. * Section 7.2.3.
  386. *
  387. * RFC 2960 7.2.3, sctpimpguide Upon detection of packet
  388. * losses from SACK (see Section 7.2.4), An endpoint
  389. * should do the following:
  390. * ssthresh = max(cwnd/2, 4*MTU)
  391. * cwnd = ssthresh
  392. * partial_bytes_acked = 0
  393. */
  394. transport->ssthresh = max(transport->cwnd/2,
  395. 4*transport->asoc->pmtu);
  396. transport->cwnd = transport->ssthresh;
  397. break;
  398. case SCTP_LOWER_CWND_ECNE:
  399. /* RFC 2481 Section 6.1.2.
  400. * If the sender receives an ECN-Echo ACK packet
  401. * then the sender knows that congestion was encountered in the
  402. * network on the path from the sender to the receiver. The
  403. * indication of congestion should be treated just as a
  404. * congestion loss in non-ECN Capable TCP. That is, the TCP
  405. * source halves the congestion window "cwnd" and reduces the
  406. * slow start threshold "ssthresh".
  407. * A critical condition is that TCP does not react to
  408. * congestion indications more than once every window of
  409. * data (or more loosely more than once every round-trip time).
  410. */
  411. if ((jiffies - transport->last_time_ecne_reduced) >
  412. transport->rtt) {
  413. transport->ssthresh = max(transport->cwnd/2,
  414. 4*transport->asoc->pmtu);
  415. transport->cwnd = transport->ssthresh;
  416. transport->last_time_ecne_reduced = jiffies;
  417. }
  418. break;
  419. case SCTP_LOWER_CWND_INACTIVE:
  420. /* RFC 2960 Section 7.2.1, sctpimpguide
  421. * When the endpoint does not transmit data on a given
  422. * transport address, the cwnd of the transport address
  423. * should be adjusted to max(cwnd/2, 4*MTU) per RTO.
  424. * NOTE: Although the draft recommends that this check needs
  425. * to be done every RTO interval, we do it every hearbeat
  426. * interval.
  427. */
  428. if ((jiffies - transport->last_time_used) > transport->rto)
  429. transport->cwnd = max(transport->cwnd/2,
  430. 4*transport->asoc->pmtu);
  431. break;
  432. };
  433. transport->partial_bytes_acked = 0;
  434. SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: "
  435. "%d ssthresh: %d\n", __FUNCTION__,
  436. transport, reason,
  437. transport->cwnd, transport->ssthresh);
  438. }
  439. /* What is the next timeout value for this transport? */
  440. unsigned long sctp_transport_timeout(struct sctp_transport *t)
  441. {
  442. unsigned long timeout;
  443. timeout = t->hb_interval + t->rto + sctp_jitter(t->rto);
  444. timeout += jiffies;
  445. return timeout;
  446. }