tcp_yeah.c 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. /*
  2. *
  3. * YeAH TCP
  4. *
  5. * For further details look at:
  6. * http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
  7. *
  8. */
  9. #include "tcp_yeah.h"
  10. /* Default values of the Vegas variables, in fixed-point representation
  11. * with V_PARAM_SHIFT bits to the right of the binary point.
  12. */
  13. #define V_PARAM_SHIFT 1
  14. #define TCP_YEAH_ALPHA 80 //lin number of packets queued at the bottleneck
  15. #define TCP_YEAH_GAMMA 1 //lin fraction of queue to be removed per rtt
  16. #define TCP_YEAH_DELTA 3 //log minimum fraction of cwnd to be removed on loss
  17. #define TCP_YEAH_EPSILON 1 //log maximum fraction to be removed on early decongestion
  18. #define TCP_YEAH_PHY 8 //lin maximum delta from base
  19. #define TCP_YEAH_RHO 16 //lin minumum number of consecutive rtt to consider competition on loss
  20. #define TCP_YEAH_ZETA 50 //lin minimum number of state switchs to reset reno_count
  21. #define TCP_SCALABLE_AI_CNT 100U
  22. /* YeAH variables */
  23. struct yeah {
  24. /* Vegas */
  25. u32 beg_snd_nxt; /* right edge during last RTT */
  26. u32 beg_snd_una; /* left edge during last RTT */
  27. u32 beg_snd_cwnd; /* saves the size of the cwnd */
  28. u8 doing_vegas_now;/* if true, do vegas for this RTT */
  29. u16 cntRTT; /* # of RTTs measured within last RTT */
  30. u32 minRTT; /* min of RTTs measured within last RTT (in usec) */
  31. u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */
  32. /* YeAH */
  33. u32 lastQ;
  34. u32 doing_reno_now;
  35. u32 reno_count;
  36. u32 fast_count;
  37. u32 pkts_acked;
  38. };
  39. static void tcp_yeah_init(struct sock *sk)
  40. {
  41. struct tcp_sock *tp = tcp_sk(sk);
  42. struct yeah *yeah = inet_csk_ca(sk);
  43. tcp_vegas_init(sk);
  44. yeah->doing_reno_now = 0;
  45. yeah->lastQ = 0;
  46. yeah->reno_count = 2;
  47. /* Ensure the MD arithmetic works. This is somewhat pedantic,
  48. * since I don't think we will see a cwnd this large. :) */
  49. tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
  50. }
  51. static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked)
  52. {
  53. const struct inet_connection_sock *icsk = inet_csk(sk);
  54. struct yeah *yeah = inet_csk_ca(sk);
  55. if (icsk->icsk_ca_state == TCP_CA_Open)
  56. yeah->pkts_acked = pkts_acked;
  57. }
  58. /* 64bit divisor, dividend and result. dynamic precision */
  59. static inline u64 div64_64(u64 dividend, u64 divisor)
  60. {
  61. u32 d = divisor;
  62. if (divisor > 0xffffffffULL) {
  63. unsigned int shift = fls(divisor >> 32);
  64. d = divisor >> shift;
  65. dividend >>= shift;
  66. }
  67. /* avoid 64 bit division if possible */
  68. if (dividend >> 32)
  69. do_div(dividend, d);
  70. else
  71. dividend = (u32) dividend / d;
  72. return dividend;
  73. }
  74. static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
  75. u32 seq_rtt, u32 in_flight, int flag)
  76. {
  77. struct tcp_sock *tp = tcp_sk(sk);
  78. struct yeah *yeah = inet_csk_ca(sk);
  79. if (!tcp_is_cwnd_limited(sk, in_flight))
  80. return;
  81. if (tp->snd_cwnd <= tp->snd_ssthresh) {
  82. tcp_slow_start(tp);
  83. } else if (!yeah->doing_reno_now) {
  84. /* Scalable */
  85. tp->snd_cwnd_cnt+=yeah->pkts_acked;
  86. if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
  87. if (tp->snd_cwnd < tp->snd_cwnd_clamp)
  88. tp->snd_cwnd++;
  89. tp->snd_cwnd_cnt = 0;
  90. }
  91. yeah->pkts_acked = 1;
  92. } else {
  93. /* Reno */
  94. if (tp->snd_cwnd_cnt < tp->snd_cwnd)
  95. tp->snd_cwnd_cnt++;
  96. if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
  97. tp->snd_cwnd++;
  98. tp->snd_cwnd_cnt = 0;
  99. }
  100. }
  101. /* The key players are v_beg_snd_una and v_beg_snd_nxt.
  102. *
  103. * These are so named because they represent the approximate values
  104. * of snd_una and snd_nxt at the beginning of the current RTT. More
  105. * precisely, they represent the amount of data sent during the RTT.
  106. * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
  107. * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding
  108. * bytes of data have been ACKed during the course of the RTT, giving
  109. * an "actual" rate of:
  110. *
  111. * (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration)
  112. *
  113. * Unfortunately, v_beg_snd_una is not exactly equal to snd_una,
  114. * because delayed ACKs can cover more than one segment, so they
  115. * don't line up yeahly with the boundaries of RTTs.
  116. *
  117. * Another unfortunate fact of life is that delayed ACKs delay the
  118. * advance of the left edge of our send window, so that the number
  119. * of bytes we send in an RTT is often less than our cwnd will allow.
  120. * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
  121. */
  122. if (after(ack, yeah->beg_snd_nxt)) {
  123. /* We do the Vegas calculations only if we got enough RTT
  124. * samples that we can be reasonably sure that we got
  125. * at least one RTT sample that wasn't from a delayed ACK.
  126. * If we only had 2 samples total,
  127. * then that means we're getting only 1 ACK per RTT, which
  128. * means they're almost certainly delayed ACKs.
  129. * If we have 3 samples, we should be OK.
  130. */
  131. if (yeah->cntRTT > 2) {
  132. u32 rtt;
  133. u32 queue, maxqueue;
  134. /* We have enough RTT samples, so, using the Vegas
  135. * algorithm, we determine if we should increase or
  136. * decrease cwnd, and by how much.
  137. */
  138. /* Pluck out the RTT we are using for the Vegas
  139. * calculations. This is the min RTT seen during the
  140. * last RTT. Taking the min filters out the effects
  141. * of delayed ACKs, at the cost of noticing congestion
  142. * a bit later.
  143. */
  144. rtt = yeah->minRTT;
  145. queue = (u32)div64_64((u64)tp->snd_cwnd * (rtt - yeah->baseRTT), rtt);
  146. maxqueue = TCP_YEAH_ALPHA;
  147. if (queue > maxqueue ||
  148. rtt - yeah->baseRTT > (yeah->baseRTT / TCP_YEAH_PHY)) {
  149. if (queue > maxqueue && tp->snd_cwnd > yeah->reno_count) {
  150. u32 reduction = min( queue / TCP_YEAH_GAMMA ,
  151. tp->snd_cwnd >> TCP_YEAH_EPSILON );
  152. tp->snd_cwnd -= reduction;
  153. tp->snd_cwnd = max( tp->snd_cwnd, yeah->reno_count);
  154. tp->snd_ssthresh = tp->snd_cwnd;
  155. }
  156. if (yeah->reno_count <= 2)
  157. yeah->reno_count = max( tp->snd_cwnd>>1, 2U);
  158. else
  159. yeah->reno_count++;
  160. yeah->doing_reno_now =
  161. min_t( u32, yeah->doing_reno_now + 1 , 0xffffff);
  162. } else {
  163. yeah->fast_count++;
  164. if (yeah->fast_count > TCP_YEAH_ZETA) {
  165. yeah->reno_count = 2;
  166. yeah->fast_count = 0;
  167. }
  168. yeah->doing_reno_now = 0;
  169. }
  170. yeah->lastQ = queue;
  171. }
  172. /* Save the extent of the current window so we can use this
  173. * at the end of the next RTT.
  174. */
  175. yeah->beg_snd_una = yeah->beg_snd_nxt;
  176. yeah->beg_snd_nxt = tp->snd_nxt;
  177. yeah->beg_snd_cwnd = tp->snd_cwnd;
  178. /* Wipe the slate clean for the next RTT. */
  179. yeah->cntRTT = 0;
  180. yeah->minRTT = 0x7fffffff;
  181. }
  182. }
  183. static u32 tcp_yeah_ssthresh(struct sock *sk) {
  184. const struct tcp_sock *tp = tcp_sk(sk);
  185. struct yeah *yeah = inet_csk_ca(sk);
  186. u32 reduction;
  187. if (yeah->doing_reno_now < TCP_YEAH_RHO) {
  188. reduction = yeah->lastQ;
  189. reduction = min( reduction, max(tp->snd_cwnd>>1, 2U) );
  190. reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA);
  191. } else
  192. reduction = max(tp->snd_cwnd>>1,2U);
  193. yeah->fast_count = 0;
  194. yeah->reno_count = max(yeah->reno_count>>1, 2U);
  195. return tp->snd_cwnd - reduction;
  196. }
  197. static struct tcp_congestion_ops tcp_yeah = {
  198. .init = tcp_yeah_init,
  199. .ssthresh = tcp_yeah_ssthresh,
  200. .cong_avoid = tcp_yeah_cong_avoid,
  201. .min_cwnd = tcp_reno_min_cwnd,
  202. .rtt_sample = tcp_vegas_rtt_calc,
  203. .set_state = tcp_vegas_state,
  204. .cwnd_event = tcp_vegas_cwnd_event,
  205. .get_info = tcp_vegas_get_info,
  206. .pkts_acked = tcp_yeah_pkts_acked,
  207. .owner = THIS_MODULE,
  208. .name = "yeah",
  209. };
  210. static int __init tcp_yeah_register(void)
  211. {
  212. BUG_ON(sizeof(struct yeah) > ICSK_CA_PRIV_SIZE);
  213. tcp_register_congestion_control(&tcp_yeah);
  214. return 0;
  215. }
  216. static void __exit tcp_yeah_unregister(void)
  217. {
  218. tcp_unregister_congestion_control(&tcp_yeah);
  219. }
  220. module_init(tcp_yeah_register);
  221. module_exit(tcp_yeah_unregister);
  222. MODULE_AUTHOR("Angelo P. Castellani");
  223. MODULE_LICENSE("GPL");
  224. MODULE_DESCRIPTION("YeAH TCP");