transport.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. /* SCTP kernel reference Implementation
  2. * Copyright (c) 1999-2000 Cisco, Inc.
  3. * Copyright (c) 1999-2001 Motorola, Inc.
  4. * Copyright (c) 2001-2003 International Business Machines Corp.
  5. * Copyright (c) 2001 Intel Corp.
  6. * Copyright (c) 2001 La Monte H.P. Yarroll
  7. *
  8. * This file is part of the SCTP kernel reference Implementation
  9. *
  10. * This module provides the abstraction for an SCTP tranport representing
  11. * a remote transport address. For local transport addresses, we just use
  12. * union sctp_addr.
  13. *
  14. * The SCTP reference implementation is free software;
  15. * you can redistribute it and/or modify it under the terms of
  16. * the GNU General Public License as published by
  17. * the Free Software Foundation; either version 2, or (at your option)
  18. * any later version.
  19. *
  20. * The SCTP reference implementation is distributed in the hope that it
  21. * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  22. * ************************
  23. * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  24. * See the GNU General Public License for more details.
  25. *
  26. * You should have received a copy of the GNU General Public License
  27. * along with GNU CC; see the file COPYING. If not, write to
  28. * the Free Software Foundation, 59 Temple Place - Suite 330,
  29. * Boston, MA 02111-1307, USA.
  30. *
  31. * Please send any bug reports or fixes you make to the
  32. * email address(es):
  33. * lksctp developers <lksctp-developers@lists.sourceforge.net>
  34. *
  35. * Or submit a bug report through the following website:
  36. * http://www.sf.net/projects/lksctp
  37. *
  38. * Written or modified by:
  39. * La Monte H.P. Yarroll <piggy@acm.org>
  40. * Karl Knutson <karl@athena.chicago.il.us>
  41. * Jon Grimm <jgrimm@us.ibm.com>
  42. * Xingang Guo <xingang.guo@intel.com>
  43. * Hui Huang <hui.huang@nokia.com>
  44. * Sridhar Samudrala <sri@us.ibm.com>
  45. * Ardelle Fan <ardelle.fan@intel.com>
  46. *
  47. * Any bugs reported given to us we will try to fix... any fixes shared will
  48. * be incorporated into the next SCTP release.
  49. */
  50. #include <linux/types.h>
  51. #include <net/sctp/sctp.h>
  52. #include <net/sctp/sm.h>
  53. /* 1st Level Abstractions. */
  54. /* Initialize a new transport from provided memory. */
  55. static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
  56. const union sctp_addr *addr,
  57. gfp_t gfp)
  58. {
  59. /* Copy in the address. */
  60. peer->ipaddr = *addr;
  61. peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
  62. peer->asoc = NULL;
  63. peer->dst = NULL;
  64. memset(&peer->saddr, 0, sizeof(union sctp_addr));
  65. /* From 6.3.1 RTO Calculation:
  66. *
  67. * C1) Until an RTT measurement has been made for a packet sent to the
  68. * given destination transport address, set RTO to the protocol
  69. * parameter 'RTO.Initial'.
  70. */
  71. peer->rtt = 0;
  72. peer->rto = sctp_rto_initial;
  73. peer->rttvar = 0;
  74. peer->srtt = 0;
  75. peer->rto_pending = 0;
  76. peer->last_time_heard = jiffies;
  77. peer->last_time_used = jiffies;
  78. peer->last_time_ecne_reduced = jiffies;
  79. peer->init_sent_count = 0;
  80. peer->state = SCTP_ACTIVE;
  81. peer->param_flags = SPP_HB_DISABLE |
  82. SPP_PMTUD_ENABLE |
  83. SPP_SACKDELAY_ENABLE;
  84. peer->hbinterval = 0;
  85. /* Initialize the default path max_retrans. */
  86. peer->pathmaxrxt = sctp_max_retrans_path;
  87. peer->error_count = 0;
  88. INIT_LIST_HEAD(&peer->transmitted);
  89. INIT_LIST_HEAD(&peer->send_ready);
  90. INIT_LIST_HEAD(&peer->transports);
  91. /* Set up the retransmission timer. */
  92. init_timer(&peer->T3_rtx_timer);
  93. peer->T3_rtx_timer.function = sctp_generate_t3_rtx_event;
  94. peer->T3_rtx_timer.data = (unsigned long)peer;
  95. /* Set up the heartbeat timer. */
  96. init_timer(&peer->hb_timer);
  97. peer->hb_timer.function = sctp_generate_heartbeat_event;
  98. peer->hb_timer.data = (unsigned long)peer;
  99. atomic_set(&peer->refcnt, 1);
  100. peer->dead = 0;
  101. peer->malloced = 0;
  102. /* Initialize the state information for SFR-CACC */
  103. peer->cacc.changeover_active = 0;
  104. peer->cacc.cycling_changeover = 0;
  105. peer->cacc.next_tsn_at_change = 0;
  106. peer->cacc.cacc_saw_newack = 0;
  107. return peer;
  108. }
  109. /* Allocate and initialize a new transport. */
  110. struct sctp_transport *sctp_transport_new(const union sctp_addr *addr,
  111. gfp_t gfp)
  112. {
  113. struct sctp_transport *transport;
  114. transport = t_new(struct sctp_transport, gfp);
  115. if (!transport)
  116. goto fail;
  117. if (!sctp_transport_init(transport, addr, gfp))
  118. goto fail_init;
  119. transport->malloced = 1;
  120. SCTP_DBG_OBJCNT_INC(transport);
  121. return transport;
  122. fail_init:
  123. kfree(transport);
  124. fail:
  125. return NULL;
  126. }
  127. /* This transport is no longer needed. Free up if possible, or
  128. * delay until it last reference count.
  129. */
  130. void sctp_transport_free(struct sctp_transport *transport)
  131. {
  132. transport->dead = 1;
  133. /* Try to delete the heartbeat timer. */
  134. if (del_timer(&transport->hb_timer))
  135. sctp_transport_put(transport);
  136. /* Delete the T3_rtx timer if it's active.
  137. * There is no point in not doing this now and letting
  138. * structure hang around in memory since we know
  139. * the tranport is going away.
  140. */
  141. if (timer_pending(&transport->T3_rtx_timer) &&
  142. del_timer(&transport->T3_rtx_timer))
  143. sctp_transport_put(transport);
  144. sctp_transport_put(transport);
  145. }
  146. /* Destroy the transport data structure.
  147. * Assumes there are no more users of this structure.
  148. */
  149. static void sctp_transport_destroy(struct sctp_transport *transport)
  150. {
  151. SCTP_ASSERT(transport->dead, "Transport is not dead", return);
  152. if (transport->asoc)
  153. sctp_association_put(transport->asoc);
  154. sctp_packet_free(&transport->packet);
  155. dst_release(transport->dst);
  156. kfree(transport);
  157. SCTP_DBG_OBJCNT_DEC(transport);
  158. }
  159. /* Start T3_rtx timer if it is not already running and update the heartbeat
  160. * timer. This routine is called every time a DATA chunk is sent.
  161. */
  162. void sctp_transport_reset_timers(struct sctp_transport *transport)
  163. {
  164. /* RFC 2960 6.3.2 Retransmission Timer Rules
  165. *
  166. * R1) Every time a DATA chunk is sent to any address(including a
  167. * retransmission), if the T3-rtx timer of that address is not running
  168. * start it running so that it will expire after the RTO of that
  169. * address.
  170. */
  171. if (!timer_pending(&transport->T3_rtx_timer))
  172. if (!mod_timer(&transport->T3_rtx_timer,
  173. jiffies + transport->rto))
  174. sctp_transport_hold(transport);
  175. /* When a data chunk is sent, reset the heartbeat interval. */
  176. if (!mod_timer(&transport->hb_timer,
  177. sctp_transport_timeout(transport)))
  178. sctp_transport_hold(transport);
  179. }
  180. /* This transport has been assigned to an association.
  181. * Initialize fields from the association or from the sock itself.
  182. * Register the reference count in the association.
  183. */
  184. void sctp_transport_set_owner(struct sctp_transport *transport,
  185. struct sctp_association *asoc)
  186. {
  187. transport->asoc = asoc;
  188. sctp_association_hold(asoc);
  189. }
  190. /* Initialize the pmtu of a transport. */
  191. void sctp_transport_pmtu(struct sctp_transport *transport)
  192. {
  193. struct dst_entry *dst;
  194. dst = transport->af_specific->get_dst(NULL, &transport->ipaddr, NULL);
  195. if (dst) {
  196. transport->pathmtu = dst_mtu(dst);
  197. dst_release(dst);
  198. } else
  199. transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
  200. }
  201. /* Caches the dst entry and source address for a transport's destination
  202. * address.
  203. */
  204. void sctp_transport_route(struct sctp_transport *transport,
  205. union sctp_addr *saddr, struct sctp_sock *opt)
  206. {
  207. struct sctp_association *asoc = transport->asoc;
  208. struct sctp_af *af = transport->af_specific;
  209. union sctp_addr *daddr = &transport->ipaddr;
  210. struct dst_entry *dst;
  211. dst = af->get_dst(asoc, daddr, saddr);
  212. if (saddr)
  213. memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));
  214. else
  215. af->get_saddr(asoc, dst, daddr, &transport->saddr);
  216. transport->dst = dst;
  217. if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {
  218. return;
  219. }
  220. if (dst) {
  221. transport->pathmtu = dst_mtu(dst);
  222. /* Initialize sk->sk_rcv_saddr, if the transport is the
  223. * association's active path for getsockname().
  224. */
  225. if (asoc && (transport == asoc->peer.active_path))
  226. opt->pf->af->to_sk_saddr(&transport->saddr,
  227. asoc->base.sk);
  228. } else
  229. transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
  230. }
  231. /* Hold a reference to a transport. */
  232. void sctp_transport_hold(struct sctp_transport *transport)
  233. {
  234. atomic_inc(&transport->refcnt);
  235. }
  236. /* Release a reference to a transport and clean up
  237. * if there are no more references.
  238. */
  239. void sctp_transport_put(struct sctp_transport *transport)
  240. {
  241. if (atomic_dec_and_test(&transport->refcnt))
  242. sctp_transport_destroy(transport);
  243. }
  244. /* Update transport's RTO based on the newly calculated RTT. */
  245. void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
  246. {
  247. /* Check for valid transport. */
  248. SCTP_ASSERT(tp, "NULL transport", return);
  249. /* We should not be doing any RTO updates unless rto_pending is set. */
  250. SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return);
  251. if (tp->rttvar || tp->srtt) {
  252. /* 6.3.1 C3) When a new RTT measurement R' is made, set
  253. * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'|
  254. * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R'
  255. */
  256. /* Note: The above algorithm has been rewritten to
  257. * express rto_beta and rto_alpha as inverse powers
  258. * of two.
  259. * For example, assuming the default value of RTO.Alpha of
  260. * 1/8, rto_alpha would be expressed as 3.
  261. */
  262. tp->rttvar = tp->rttvar - (tp->rttvar >> sctp_rto_beta)
  263. + ((abs(tp->srtt - rtt)) >> sctp_rto_beta);
  264. tp->srtt = tp->srtt - (tp->srtt >> sctp_rto_alpha)
  265. + (rtt >> sctp_rto_alpha);
  266. } else {
  267. /* 6.3.1 C2) When the first RTT measurement R is made, set
  268. * SRTT <- R, RTTVAR <- R/2.
  269. */
  270. tp->srtt = rtt;
  271. tp->rttvar = rtt >> 1;
  272. }
  273. /* 6.3.1 G1) Whenever RTTVAR is computed, if RTTVAR = 0, then
  274. * adjust RTTVAR <- G, where G is the CLOCK GRANULARITY.
  275. */
  276. if (tp->rttvar == 0)
  277. tp->rttvar = SCTP_CLOCK_GRANULARITY;
  278. /* 6.3.1 C3) After the computation, update RTO <- SRTT + 4 * RTTVAR. */
  279. tp->rto = tp->srtt + (tp->rttvar << 2);
  280. /* 6.3.1 C6) Whenever RTO is computed, if it is less than RTO.Min
  281. * seconds then it is rounded up to RTO.Min seconds.
  282. */
  283. if (tp->rto < tp->asoc->rto_min)
  284. tp->rto = tp->asoc->rto_min;
  285. /* 6.3.1 C7) A maximum value may be placed on RTO provided it is
  286. * at least RTO.max seconds.
  287. */
  288. if (tp->rto > tp->asoc->rto_max)
  289. tp->rto = tp->asoc->rto_max;
  290. tp->rtt = rtt;
  291. /* Reset rto_pending so that a new RTT measurement is started when a
  292. * new data chunk is sent.
  293. */
  294. tp->rto_pending = 0;
  295. SCTP_DEBUG_PRINTK("%s: transport: %p, rtt: %d, srtt: %d "
  296. "rttvar: %d, rto: %ld\n", __FUNCTION__,
  297. tp, rtt, tp->srtt, tp->rttvar, tp->rto);
  298. }
  299. /* This routine updates the transport's cwnd and partial_bytes_acked
  300. * parameters based on the bytes acked in the received SACK.
  301. */
  302. void sctp_transport_raise_cwnd(struct sctp_transport *transport,
  303. __u32 sack_ctsn, __u32 bytes_acked)
  304. {
  305. __u32 cwnd, ssthresh, flight_size, pba, pmtu;
  306. cwnd = transport->cwnd;
  307. flight_size = transport->flight_size;
  308. /* The appropriate cwnd increase algorithm is performed if, and only
  309. * if the cumulative TSN has advanced and the congestion window is
  310. * being fully utilized.
  311. */
  312. if ((transport->asoc->ctsn_ack_point >= sack_ctsn) ||
  313. (flight_size < cwnd))
  314. return;
  315. ssthresh = transport->ssthresh;
  316. pba = transport->partial_bytes_acked;
  317. pmtu = transport->asoc->pathmtu;
  318. if (cwnd <= ssthresh) {
  319. /* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less
  320. * than or equal to ssthresh an SCTP endpoint MUST use the
  321. * slow start algorithm to increase cwnd only if the current
  322. * congestion window is being fully utilized and an incoming
  323. * SACK advances the Cumulative TSN Ack Point. Only when these
  324. * two conditions are met can the cwnd be increased otherwise
  325. * the cwnd MUST not be increased. If these conditions are met
  326. * then cwnd MUST be increased by at most the lesser of
  327. * 1) the total size of the previously outstanding DATA
  328. * chunk(s) acknowledged, and 2) the destination's path MTU.
  329. */
  330. if (bytes_acked > pmtu)
  331. cwnd += pmtu;
  332. else
  333. cwnd += bytes_acked;
  334. SCTP_DEBUG_PRINTK("%s: SLOW START: transport: %p, "
  335. "bytes_acked: %d, cwnd: %d, ssthresh: %d, "
  336. "flight_size: %d, pba: %d\n",
  337. __FUNCTION__,
  338. transport, bytes_acked, cwnd,
  339. ssthresh, flight_size, pba);
  340. } else {
  341. /* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh,
  342. * upon each SACK arrival that advances the Cumulative TSN Ack
  343. * Point, increase partial_bytes_acked by the total number of
  344. * bytes of all new chunks acknowledged in that SACK including
  345. * chunks acknowledged by the new Cumulative TSN Ack and by
  346. * Gap Ack Blocks.
  347. *
  348. * When partial_bytes_acked is equal to or greater than cwnd
  349. * and before the arrival of the SACK the sender had cwnd or
  350. * more bytes of data outstanding (i.e., before arrival of the
  351. * SACK, flightsize was greater than or equal to cwnd),
  352. * increase cwnd by MTU, and reset partial_bytes_acked to
  353. * (partial_bytes_acked - cwnd).
  354. */
  355. pba += bytes_acked;
  356. if (pba >= cwnd) {
  357. cwnd += pmtu;
  358. pba = ((cwnd < pba) ? (pba - cwnd) : 0);
  359. }
  360. SCTP_DEBUG_PRINTK("%s: CONGESTION AVOIDANCE: "
  361. "transport: %p, bytes_acked: %d, cwnd: %d, "
  362. "ssthresh: %d, flight_size: %d, pba: %d\n",
  363. __FUNCTION__,
  364. transport, bytes_acked, cwnd,
  365. ssthresh, flight_size, pba);
  366. }
  367. transport->cwnd = cwnd;
  368. transport->partial_bytes_acked = pba;
  369. }
  370. /* This routine is used to lower the transport's cwnd when congestion is
  371. * detected.
  372. */
  373. void sctp_transport_lower_cwnd(struct sctp_transport *transport,
  374. sctp_lower_cwnd_t reason)
  375. {
  376. switch (reason) {
  377. case SCTP_LOWER_CWND_T3_RTX:
  378. /* RFC 2960 Section 7.2.3, sctpimpguide
  379. * When the T3-rtx timer expires on an address, SCTP should
  380. * perform slow start by:
  381. * ssthresh = max(cwnd/2, 4*MTU)
  382. * cwnd = 1*MTU
  383. * partial_bytes_acked = 0
  384. */
  385. transport->ssthresh = max(transport->cwnd/2,
  386. 4*transport->asoc->pathmtu);
  387. transport->cwnd = transport->asoc->pathmtu;
  388. break;
  389. case SCTP_LOWER_CWND_FAST_RTX:
  390. /* RFC 2960 7.2.4 Adjust the ssthresh and cwnd of the
  391. * destination address(es) to which the missing DATA chunks
  392. * were last sent, according to the formula described in
  393. * Section 7.2.3.
  394. *
  395. * RFC 2960 7.2.3, sctpimpguide Upon detection of packet
  396. * losses from SACK (see Section 7.2.4), An endpoint
  397. * should do the following:
  398. * ssthresh = max(cwnd/2, 4*MTU)
  399. * cwnd = ssthresh
  400. * partial_bytes_acked = 0
  401. */
  402. transport->ssthresh = max(transport->cwnd/2,
  403. 4*transport->asoc->pathmtu);
  404. transport->cwnd = transport->ssthresh;
  405. break;
  406. case SCTP_LOWER_CWND_ECNE:
  407. /* RFC 2481 Section 6.1.2.
  408. * If the sender receives an ECN-Echo ACK packet
  409. * then the sender knows that congestion was encountered in the
  410. * network on the path from the sender to the receiver. The
  411. * indication of congestion should be treated just as a
  412. * congestion loss in non-ECN Capable TCP. That is, the TCP
  413. * source halves the congestion window "cwnd" and reduces the
  414. * slow start threshold "ssthresh".
  415. * A critical condition is that TCP does not react to
  416. * congestion indications more than once every window of
  417. * data (or more loosely more than once every round-trip time).
  418. */
  419. if ((jiffies - transport->last_time_ecne_reduced) >
  420. transport->rtt) {
  421. transport->ssthresh = max(transport->cwnd/2,
  422. 4*transport->asoc->pathmtu);
  423. transport->cwnd = transport->ssthresh;
  424. transport->last_time_ecne_reduced = jiffies;
  425. }
  426. break;
  427. case SCTP_LOWER_CWND_INACTIVE:
  428. /* RFC 2960 Section 7.2.1, sctpimpguide
  429. * When the endpoint does not transmit data on a given
  430. * transport address, the cwnd of the transport address
  431. * should be adjusted to max(cwnd/2, 4*MTU) per RTO.
  432. * NOTE: Although the draft recommends that this check needs
  433. * to be done every RTO interval, we do it every hearbeat
  434. * interval.
  435. */
  436. if ((jiffies - transport->last_time_used) > transport->rto)
  437. transport->cwnd = max(transport->cwnd/2,
  438. 4*transport->asoc->pathmtu);
  439. break;
  440. };
  441. transport->partial_bytes_acked = 0;
  442. SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: "
  443. "%d ssthresh: %d\n", __FUNCTION__,
  444. transport, reason,
  445. transport->cwnd, transport->ssthresh);
  446. }
  447. /* What is the next timeout value for this transport? */
  448. unsigned long sctp_transport_timeout(struct sctp_transport *t)
  449. {
  450. unsigned long timeout;
  451. timeout = t->hbinterval + t->rto + sctp_jitter(t->rto);
  452. timeout += jiffies;
  453. return timeout;
  454. }