transport.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516
  1. /* SCTP kernel reference Implementation
  2. * Copyright (c) 1999-2000 Cisco, Inc.
  3. * Copyright (c) 1999-2001 Motorola, Inc.
  4. * Copyright (c) 2001-2003 International Business Machines Corp.
  5. * Copyright (c) 2001 Intel Corp.
  6. * Copyright (c) 2001 La Monte H.P. Yarroll
  7. *
  8. * This file is part of the SCTP kernel reference Implementation
  9. *
  10. * This module provides the abstraction for an SCTP tranport representing
  11. * a remote transport address. For local transport addresses, we just use
  12. * union sctp_addr.
  13. *
  14. * The SCTP reference implementation is free software;
  15. * you can redistribute it and/or modify it under the terms of
  16. * the GNU General Public License as published by
  17. * the Free Software Foundation; either version 2, or (at your option)
  18. * any later version.
  19. *
  20. * The SCTP reference implementation is distributed in the hope that it
  21. * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  22. * ************************
  23. * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  24. * See the GNU General Public License for more details.
  25. *
  26. * You should have received a copy of the GNU General Public License
  27. * along with GNU CC; see the file COPYING. If not, write to
  28. * the Free Software Foundation, 59 Temple Place - Suite 330,
  29. * Boston, MA 02111-1307, USA.
  30. *
  31. * Please send any bug reports or fixes you make to the
  32. * email address(es):
  33. * lksctp developers <lksctp-developers@lists.sourceforge.net>
  34. *
  35. * Or submit a bug report through the following website:
  36. * http://www.sf.net/projects/lksctp
  37. *
  38. * Written or modified by:
  39. * La Monte H.P. Yarroll <piggy@acm.org>
  40. * Karl Knutson <karl@athena.chicago.il.us>
  41. * Jon Grimm <jgrimm@us.ibm.com>
  42. * Xingang Guo <xingang.guo@intel.com>
  43. * Hui Huang <hui.huang@nokia.com>
  44. * Sridhar Samudrala <sri@us.ibm.com>
  45. * Ardelle Fan <ardelle.fan@intel.com>
  46. *
  47. * Any bugs reported given to us we will try to fix... any fixes shared will
  48. * be incorporated into the next SCTP release.
  49. */
  50. #include <linux/types.h>
  51. #include <net/sctp/sctp.h>
  52. #include <net/sctp/sm.h>
  53. /* 1st Level Abstractions. */
  54. /* Initialize a new transport from provided memory. */
  55. static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
  56. const union sctp_addr *addr,
  57. unsigned int __nocast gfp)
  58. {
  59. /* Copy in the address. */
  60. peer->ipaddr = *addr;
  61. peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
  62. peer->asoc = NULL;
  63. peer->dst = NULL;
  64. memset(&peer->saddr, 0, sizeof(union sctp_addr));
  65. /* From 6.3.1 RTO Calculation:
  66. *
  67. * C1) Until an RTT measurement has been made for a packet sent to the
  68. * given destination transport address, set RTO to the protocol
  69. * parameter 'RTO.Initial'.
  70. */
  71. peer->rtt = 0;
  72. peer->rto = sctp_rto_initial;
  73. peer->rttvar = 0;
  74. peer->srtt = 0;
  75. peer->rto_pending = 0;
  76. peer->last_time_heard = jiffies;
  77. peer->last_time_used = jiffies;
  78. peer->last_time_ecne_reduced = jiffies;
  79. peer->init_sent_count = 0;
  80. peer->state = SCTP_ACTIVE;
  81. peer->hb_allowed = 0;
  82. /* Initialize the default path max_retrans. */
  83. peer->max_retrans = sctp_max_retrans_path;
  84. peer->error_count = 0;
  85. INIT_LIST_HEAD(&peer->transmitted);
  86. INIT_LIST_HEAD(&peer->send_ready);
  87. INIT_LIST_HEAD(&peer->transports);
  88. /* Set up the retransmission timer. */
  89. init_timer(&peer->T3_rtx_timer);
  90. peer->T3_rtx_timer.function = sctp_generate_t3_rtx_event;
  91. peer->T3_rtx_timer.data = (unsigned long)peer;
  92. /* Set up the heartbeat timer. */
  93. init_timer(&peer->hb_timer);
  94. peer->hb_timer.function = sctp_generate_heartbeat_event;
  95. peer->hb_timer.data = (unsigned long)peer;
  96. atomic_set(&peer->refcnt, 1);
  97. peer->dead = 0;
  98. peer->malloced = 0;
  99. /* Initialize the state information for SFR-CACC */
  100. peer->cacc.changeover_active = 0;
  101. peer->cacc.cycling_changeover = 0;
  102. peer->cacc.next_tsn_at_change = 0;
  103. peer->cacc.cacc_saw_newack = 0;
  104. return peer;
  105. }
  106. /* Allocate and initialize a new transport. */
  107. struct sctp_transport *sctp_transport_new(const union sctp_addr *addr,
  108. unsigned int __nocast gfp)
  109. {
  110. struct sctp_transport *transport;
  111. transport = t_new(struct sctp_transport, gfp);
  112. if (!transport)
  113. goto fail;
  114. if (!sctp_transport_init(transport, addr, gfp))
  115. goto fail_init;
  116. transport->malloced = 1;
  117. SCTP_DBG_OBJCNT_INC(transport);
  118. return transport;
  119. fail_init:
  120. kfree(transport);
  121. fail:
  122. return NULL;
  123. }
  124. /* This transport is no longer needed. Free up if possible, or
  125. * delay until it last reference count.
  126. */
  127. void sctp_transport_free(struct sctp_transport *transport)
  128. {
  129. transport->dead = 1;
  130. /* Try to delete the heartbeat timer. */
  131. if (del_timer(&transport->hb_timer))
  132. sctp_transport_put(transport);
  133. /* Delete the T3_rtx timer if it's active.
  134. * There is no point in not doing this now and letting
  135. * structure hang around in memory since we know
  136. * the tranport is going away.
  137. */
  138. if (timer_pending(&transport->T3_rtx_timer) &&
  139. del_timer(&transport->T3_rtx_timer))
  140. sctp_transport_put(transport);
  141. sctp_transport_put(transport);
  142. }
  143. /* Destroy the transport data structure.
  144. * Assumes there are no more users of this structure.
  145. */
  146. static void sctp_transport_destroy(struct sctp_transport *transport)
  147. {
  148. SCTP_ASSERT(transport->dead, "Transport is not dead", return);
  149. if (transport->asoc)
  150. sctp_association_put(transport->asoc);
  151. sctp_packet_free(&transport->packet);
  152. dst_release(transport->dst);
  153. kfree(transport);
  154. SCTP_DBG_OBJCNT_DEC(transport);
  155. }
  156. /* Start T3_rtx timer if it is not already running and update the heartbeat
  157. * timer. This routine is called every time a DATA chunk is sent.
  158. */
  159. void sctp_transport_reset_timers(struct sctp_transport *transport)
  160. {
  161. /* RFC 2960 6.3.2 Retransmission Timer Rules
  162. *
  163. * R1) Every time a DATA chunk is sent to any address(including a
  164. * retransmission), if the T3-rtx timer of that address is not running
  165. * start it running so that it will expire after the RTO of that
  166. * address.
  167. */
  168. if (!timer_pending(&transport->T3_rtx_timer))
  169. if (!mod_timer(&transport->T3_rtx_timer,
  170. jiffies + transport->rto))
  171. sctp_transport_hold(transport);
  172. /* When a data chunk is sent, reset the heartbeat interval. */
  173. if (!mod_timer(&transport->hb_timer,
  174. sctp_transport_timeout(transport)))
  175. sctp_transport_hold(transport);
  176. }
  177. /* This transport has been assigned to an association.
  178. * Initialize fields from the association or from the sock itself.
  179. * Register the reference count in the association.
  180. */
  181. void sctp_transport_set_owner(struct sctp_transport *transport,
  182. struct sctp_association *asoc)
  183. {
  184. transport->asoc = asoc;
  185. sctp_association_hold(asoc);
  186. }
  187. /* Initialize the pmtu of a transport. */
  188. void sctp_transport_pmtu(struct sctp_transport *transport)
  189. {
  190. struct dst_entry *dst;
  191. dst = transport->af_specific->get_dst(NULL, &transport->ipaddr, NULL);
  192. if (dst) {
  193. transport->pmtu = dst_mtu(dst);
  194. dst_release(dst);
  195. } else
  196. transport->pmtu = SCTP_DEFAULT_MAXSEGMENT;
  197. }
  198. /* Caches the dst entry and source address for a transport's destination
  199. * address.
  200. */
  201. void sctp_transport_route(struct sctp_transport *transport,
  202. union sctp_addr *saddr, struct sctp_sock *opt)
  203. {
  204. struct sctp_association *asoc = transport->asoc;
  205. struct sctp_af *af = transport->af_specific;
  206. union sctp_addr *daddr = &transport->ipaddr;
  207. struct dst_entry *dst;
  208. dst = af->get_dst(asoc, daddr, saddr);
  209. if (saddr)
  210. memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));
  211. else
  212. af->get_saddr(asoc, dst, daddr, &transport->saddr);
  213. transport->dst = dst;
  214. if (dst) {
  215. transport->pmtu = dst_mtu(dst);
  216. /* Initialize sk->sk_rcv_saddr, if the transport is the
  217. * association's active path for getsockname().
  218. */
  219. if (asoc && (transport == asoc->peer.active_path))
  220. af->to_sk_saddr(&transport->saddr, asoc->base.sk);
  221. } else
  222. transport->pmtu = SCTP_DEFAULT_MAXSEGMENT;
  223. }
  224. /* Hold a reference to a transport. */
  225. void sctp_transport_hold(struct sctp_transport *transport)
  226. {
  227. atomic_inc(&transport->refcnt);
  228. }
  229. /* Release a reference to a transport and clean up
  230. * if there are no more references.
  231. */
  232. void sctp_transport_put(struct sctp_transport *transport)
  233. {
  234. if (atomic_dec_and_test(&transport->refcnt))
  235. sctp_transport_destroy(transport);
  236. }
  237. /* Update transport's RTO based on the newly calculated RTT. */
  238. void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
  239. {
  240. /* Check for valid transport. */
  241. SCTP_ASSERT(tp, "NULL transport", return);
  242. /* We should not be doing any RTO updates unless rto_pending is set. */
  243. SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return);
  244. if (tp->rttvar || tp->srtt) {
  245. /* 6.3.1 C3) When a new RTT measurement R' is made, set
  246. * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'|
  247. * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R'
  248. */
  249. /* Note: The above algorithm has been rewritten to
  250. * express rto_beta and rto_alpha as inverse powers
  251. * of two.
  252. * For example, assuming the default value of RTO.Alpha of
  253. * 1/8, rto_alpha would be expressed as 3.
  254. */
  255. tp->rttvar = tp->rttvar - (tp->rttvar >> sctp_rto_beta)
  256. + ((abs(tp->srtt - rtt)) >> sctp_rto_beta);
  257. tp->srtt = tp->srtt - (tp->srtt >> sctp_rto_alpha)
  258. + (rtt >> sctp_rto_alpha);
  259. } else {
  260. /* 6.3.1 C2) When the first RTT measurement R is made, set
  261. * SRTT <- R, RTTVAR <- R/2.
  262. */
  263. tp->srtt = rtt;
  264. tp->rttvar = rtt >> 1;
  265. }
  266. /* 6.3.1 G1) Whenever RTTVAR is computed, if RTTVAR = 0, then
  267. * adjust RTTVAR <- G, where G is the CLOCK GRANULARITY.
  268. */
  269. if (tp->rttvar == 0)
  270. tp->rttvar = SCTP_CLOCK_GRANULARITY;
  271. /* 6.3.1 C3) After the computation, update RTO <- SRTT + 4 * RTTVAR. */
  272. tp->rto = tp->srtt + (tp->rttvar << 2);
  273. /* 6.3.1 C6) Whenever RTO is computed, if it is less than RTO.Min
  274. * seconds then it is rounded up to RTO.Min seconds.
  275. */
  276. if (tp->rto < tp->asoc->rto_min)
  277. tp->rto = tp->asoc->rto_min;
  278. /* 6.3.1 C7) A maximum value may be placed on RTO provided it is
  279. * at least RTO.max seconds.
  280. */
  281. if (tp->rto > tp->asoc->rto_max)
  282. tp->rto = tp->asoc->rto_max;
  283. tp->rtt = rtt;
  284. /* Reset rto_pending so that a new RTT measurement is started when a
  285. * new data chunk is sent.
  286. */
  287. tp->rto_pending = 0;
  288. SCTP_DEBUG_PRINTK("%s: transport: %p, rtt: %d, srtt: %d "
  289. "rttvar: %d, rto: %d\n", __FUNCTION__,
  290. tp, rtt, tp->srtt, tp->rttvar, tp->rto);
  291. }
  292. /* This routine updates the transport's cwnd and partial_bytes_acked
  293. * parameters based on the bytes acked in the received SACK.
  294. */
  295. void sctp_transport_raise_cwnd(struct sctp_transport *transport,
  296. __u32 sack_ctsn, __u32 bytes_acked)
  297. {
  298. __u32 cwnd, ssthresh, flight_size, pba, pmtu;
  299. cwnd = transport->cwnd;
  300. flight_size = transport->flight_size;
  301. /* The appropriate cwnd increase algorithm is performed if, and only
  302. * if the cumulative TSN has advanced and the congestion window is
  303. * being fully utilized.
  304. */
  305. if ((transport->asoc->ctsn_ack_point >= sack_ctsn) ||
  306. (flight_size < cwnd))
  307. return;
  308. ssthresh = transport->ssthresh;
  309. pba = transport->partial_bytes_acked;
  310. pmtu = transport->asoc->pmtu;
  311. if (cwnd <= ssthresh) {
  312. /* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less
  313. * than or equal to ssthresh an SCTP endpoint MUST use the
  314. * slow start algorithm to increase cwnd only if the current
  315. * congestion window is being fully utilized and an incoming
  316. * SACK advances the Cumulative TSN Ack Point. Only when these
  317. * two conditions are met can the cwnd be increased otherwise
  318. * the cwnd MUST not be increased. If these conditions are met
  319. * then cwnd MUST be increased by at most the lesser of
  320. * 1) the total size of the previously outstanding DATA
  321. * chunk(s) acknowledged, and 2) the destination's path MTU.
  322. */
  323. if (bytes_acked > pmtu)
  324. cwnd += pmtu;
  325. else
  326. cwnd += bytes_acked;
  327. SCTP_DEBUG_PRINTK("%s: SLOW START: transport: %p, "
  328. "bytes_acked: %d, cwnd: %d, ssthresh: %d, "
  329. "flight_size: %d, pba: %d\n",
  330. __FUNCTION__,
  331. transport, bytes_acked, cwnd,
  332. ssthresh, flight_size, pba);
  333. } else {
  334. /* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh,
  335. * upon each SACK arrival that advances the Cumulative TSN Ack
  336. * Point, increase partial_bytes_acked by the total number of
  337. * bytes of all new chunks acknowledged in that SACK including
  338. * chunks acknowledged by the new Cumulative TSN Ack and by
  339. * Gap Ack Blocks.
  340. *
  341. * When partial_bytes_acked is equal to or greater than cwnd
  342. * and before the arrival of the SACK the sender had cwnd or
  343. * more bytes of data outstanding (i.e., before arrival of the
  344. * SACK, flightsize was greater than or equal to cwnd),
  345. * increase cwnd by MTU, and reset partial_bytes_acked to
  346. * (partial_bytes_acked - cwnd).
  347. */
  348. pba += bytes_acked;
  349. if (pba >= cwnd) {
  350. cwnd += pmtu;
  351. pba = ((cwnd < pba) ? (pba - cwnd) : 0);
  352. }
  353. SCTP_DEBUG_PRINTK("%s: CONGESTION AVOIDANCE: "
  354. "transport: %p, bytes_acked: %d, cwnd: %d, "
  355. "ssthresh: %d, flight_size: %d, pba: %d\n",
  356. __FUNCTION__,
  357. transport, bytes_acked, cwnd,
  358. ssthresh, flight_size, pba);
  359. }
  360. transport->cwnd = cwnd;
  361. transport->partial_bytes_acked = pba;
  362. }
  363. /* This routine is used to lower the transport's cwnd when congestion is
  364. * detected.
  365. */
  366. void sctp_transport_lower_cwnd(struct sctp_transport *transport,
  367. sctp_lower_cwnd_t reason)
  368. {
  369. switch (reason) {
  370. case SCTP_LOWER_CWND_T3_RTX:
  371. /* RFC 2960 Section 7.2.3, sctpimpguide
  372. * When the T3-rtx timer expires on an address, SCTP should
  373. * perform slow start by:
  374. * ssthresh = max(cwnd/2, 4*MTU)
  375. * cwnd = 1*MTU
  376. * partial_bytes_acked = 0
  377. */
  378. transport->ssthresh = max(transport->cwnd/2,
  379. 4*transport->asoc->pmtu);
  380. transport->cwnd = transport->asoc->pmtu;
  381. break;
  382. case SCTP_LOWER_CWND_FAST_RTX:
  383. /* RFC 2960 7.2.4 Adjust the ssthresh and cwnd of the
  384. * destination address(es) to which the missing DATA chunks
  385. * were last sent, according to the formula described in
  386. * Section 7.2.3.
  387. *
  388. * RFC 2960 7.2.3, sctpimpguide Upon detection of packet
  389. * losses from SACK (see Section 7.2.4), An endpoint
  390. * should do the following:
  391. * ssthresh = max(cwnd/2, 4*MTU)
  392. * cwnd = ssthresh
  393. * partial_bytes_acked = 0
  394. */
  395. transport->ssthresh = max(transport->cwnd/2,
  396. 4*transport->asoc->pmtu);
  397. transport->cwnd = transport->ssthresh;
  398. break;
  399. case SCTP_LOWER_CWND_ECNE:
  400. /* RFC 2481 Section 6.1.2.
  401. * If the sender receives an ECN-Echo ACK packet
  402. * then the sender knows that congestion was encountered in the
  403. * network on the path from the sender to the receiver. The
  404. * indication of congestion should be treated just as a
  405. * congestion loss in non-ECN Capable TCP. That is, the TCP
  406. * source halves the congestion window "cwnd" and reduces the
  407. * slow start threshold "ssthresh".
  408. * A critical condition is that TCP does not react to
  409. * congestion indications more than once every window of
  410. * data (or more loosely more than once every round-trip time).
  411. */
  412. if ((jiffies - transport->last_time_ecne_reduced) >
  413. transport->rtt) {
  414. transport->ssthresh = max(transport->cwnd/2,
  415. 4*transport->asoc->pmtu);
  416. transport->cwnd = transport->ssthresh;
  417. transport->last_time_ecne_reduced = jiffies;
  418. }
  419. break;
  420. case SCTP_LOWER_CWND_INACTIVE:
  421. /* RFC 2960 Section 7.2.1, sctpimpguide
  422. * When the endpoint does not transmit data on a given
  423. * transport address, the cwnd of the transport address
  424. * should be adjusted to max(cwnd/2, 4*MTU) per RTO.
  425. * NOTE: Although the draft recommends that this check needs
  426. * to be done every RTO interval, we do it every hearbeat
  427. * interval.
  428. */
  429. if ((jiffies - transport->last_time_used) > transport->rto)
  430. transport->cwnd = max(transport->cwnd/2,
  431. 4*transport->asoc->pmtu);
  432. break;
  433. };
  434. transport->partial_bytes_acked = 0;
  435. SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: "
  436. "%d ssthresh: %d\n", __FUNCTION__,
  437. transport, reason,
  438. transport->cwnd, transport->ssthresh);
  439. }
  440. /* What is the next timeout value for this transport? */
  441. unsigned long sctp_transport_timeout(struct sctp_transport *t)
  442. {
  443. unsigned long timeout;
  444. timeout = t->hb_interval + t->rto + sctp_jitter(t->rto);
  445. timeout += jiffies;
  446. return timeout;
  447. }