tcp_metrics.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749
  1. #include <linux/rcupdate.h>
  2. #include <linux/spinlock.h>
  3. #include <linux/jiffies.h>
  4. #include <linux/bootmem.h>
  5. #include <linux/module.h>
  6. #include <linux/cache.h>
  7. #include <linux/slab.h>
  8. #include <linux/init.h>
  9. #include <linux/tcp.h>
  10. #include <net/inet_connection_sock.h>
  11. #include <net/net_namespace.h>
  12. #include <net/request_sock.h>
  13. #include <net/inetpeer.h>
  14. #include <net/sock.h>
  15. #include <net/ipv6.h>
  16. #include <net/dst.h>
  17. #include <net/tcp.h>
  18. int sysctl_tcp_nometrics_save __read_mostly;
  19. enum tcp_metric_index {
  20. TCP_METRIC_RTT,
  21. TCP_METRIC_RTTVAR,
  22. TCP_METRIC_SSTHRESH,
  23. TCP_METRIC_CWND,
  24. TCP_METRIC_REORDERING,
  25. /* Always last. */
  26. TCP_METRIC_MAX,
  27. };
  28. struct tcp_fastopen_metrics {
  29. u16 mss;
  30. u16 syn_loss:10; /* Recurring Fast Open SYN losses */
  31. unsigned long last_syn_loss; /* Last Fast Open SYN loss */
  32. struct tcp_fastopen_cookie cookie;
  33. };
  34. struct tcp_metrics_block {
  35. struct tcp_metrics_block __rcu *tcpm_next;
  36. struct inetpeer_addr tcpm_addr;
  37. unsigned long tcpm_stamp;
  38. u32 tcpm_ts;
  39. u32 tcpm_ts_stamp;
  40. u32 tcpm_lock;
  41. u32 tcpm_vals[TCP_METRIC_MAX];
  42. struct tcp_fastopen_metrics tcpm_fastopen;
  43. };
  44. static bool tcp_metric_locked(struct tcp_metrics_block *tm,
  45. enum tcp_metric_index idx)
  46. {
  47. return tm->tcpm_lock & (1 << idx);
  48. }
  49. static u32 tcp_metric_get(struct tcp_metrics_block *tm,
  50. enum tcp_metric_index idx)
  51. {
  52. return tm->tcpm_vals[idx];
  53. }
  54. static u32 tcp_metric_get_jiffies(struct tcp_metrics_block *tm,
  55. enum tcp_metric_index idx)
  56. {
  57. return msecs_to_jiffies(tm->tcpm_vals[idx]);
  58. }
  59. static void tcp_metric_set(struct tcp_metrics_block *tm,
  60. enum tcp_metric_index idx,
  61. u32 val)
  62. {
  63. tm->tcpm_vals[idx] = val;
  64. }
  65. static void tcp_metric_set_msecs(struct tcp_metrics_block *tm,
  66. enum tcp_metric_index idx,
  67. u32 val)
  68. {
  69. tm->tcpm_vals[idx] = jiffies_to_msecs(val);
  70. }
  71. static bool addr_same(const struct inetpeer_addr *a,
  72. const struct inetpeer_addr *b)
  73. {
  74. const struct in6_addr *a6, *b6;
  75. if (a->family != b->family)
  76. return false;
  77. if (a->family == AF_INET)
  78. return a->addr.a4 == b->addr.a4;
  79. a6 = (const struct in6_addr *) &a->addr.a6[0];
  80. b6 = (const struct in6_addr *) &b->addr.a6[0];
  81. return ipv6_addr_equal(a6, b6);
  82. }
  83. struct tcpm_hash_bucket {
  84. struct tcp_metrics_block __rcu *chain;
  85. };
  86. static DEFINE_SPINLOCK(tcp_metrics_lock);
  87. static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst)
  88. {
  89. u32 val;
  90. val = 0;
  91. if (dst_metric_locked(dst, RTAX_RTT))
  92. val |= 1 << TCP_METRIC_RTT;
  93. if (dst_metric_locked(dst, RTAX_RTTVAR))
  94. val |= 1 << TCP_METRIC_RTTVAR;
  95. if (dst_metric_locked(dst, RTAX_SSTHRESH))
  96. val |= 1 << TCP_METRIC_SSTHRESH;
  97. if (dst_metric_locked(dst, RTAX_CWND))
  98. val |= 1 << TCP_METRIC_CWND;
  99. if (dst_metric_locked(dst, RTAX_REORDERING))
  100. val |= 1 << TCP_METRIC_REORDERING;
  101. tm->tcpm_lock = val;
  102. tm->tcpm_vals[TCP_METRIC_RTT] = dst_metric_raw(dst, RTAX_RTT);
  103. tm->tcpm_vals[TCP_METRIC_RTTVAR] = dst_metric_raw(dst, RTAX_RTTVAR);
  104. tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
  105. tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
  106. tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
  107. tm->tcpm_ts = 0;
  108. tm->tcpm_ts_stamp = 0;
  109. tm->tcpm_fastopen.mss = 0;
  110. tm->tcpm_fastopen.syn_loss = 0;
  111. tm->tcpm_fastopen.cookie.len = 0;
  112. }
  113. static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
  114. struct inetpeer_addr *addr,
  115. unsigned int hash,
  116. bool reclaim)
  117. {
  118. struct tcp_metrics_block *tm;
  119. struct net *net;
  120. spin_lock_bh(&tcp_metrics_lock);
  121. net = dev_net(dst->dev);
  122. if (unlikely(reclaim)) {
  123. struct tcp_metrics_block *oldest;
  124. oldest = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain);
  125. for (tm = rcu_dereference(oldest->tcpm_next); tm;
  126. tm = rcu_dereference(tm->tcpm_next)) {
  127. if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
  128. oldest = tm;
  129. }
  130. tm = oldest;
  131. } else {
  132. tm = kmalloc(sizeof(*tm), GFP_ATOMIC);
  133. if (!tm)
  134. goto out_unlock;
  135. }
  136. tm->tcpm_addr = *addr;
  137. tm->tcpm_stamp = jiffies;
  138. tcpm_suck_dst(tm, dst);
  139. if (likely(!reclaim)) {
  140. tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain;
  141. rcu_assign_pointer(net->ipv4.tcp_metrics_hash[hash].chain, tm);
  142. }
  143. out_unlock:
  144. spin_unlock_bh(&tcp_metrics_lock);
  145. return tm;
  146. }
  147. #define TCP_METRICS_TIMEOUT (60 * 60 * HZ)
  148. static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
  149. {
  150. if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
  151. tcpm_suck_dst(tm, dst);
  152. }
  153. #define TCP_METRICS_RECLAIM_DEPTH 5
  154. #define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL
  155. static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, int depth)
  156. {
  157. if (tm)
  158. return tm;
  159. if (depth > TCP_METRICS_RECLAIM_DEPTH)
  160. return TCP_METRICS_RECLAIM_PTR;
  161. return NULL;
  162. }
  163. static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr,
  164. struct net *net, unsigned int hash)
  165. {
  166. struct tcp_metrics_block *tm;
  167. int depth = 0;
  168. for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
  169. tm = rcu_dereference(tm->tcpm_next)) {
  170. if (addr_same(&tm->tcpm_addr, addr))
  171. break;
  172. depth++;
  173. }
  174. return tcp_get_encode(tm, depth);
  175. }
  176. static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
  177. struct dst_entry *dst)
  178. {
  179. struct tcp_metrics_block *tm;
  180. struct inetpeer_addr addr;
  181. unsigned int hash;
  182. struct net *net;
  183. addr.family = req->rsk_ops->family;
  184. switch (addr.family) {
  185. case AF_INET:
  186. addr.addr.a4 = inet_rsk(req)->rmt_addr;
  187. hash = (__force unsigned int) addr.addr.a4;
  188. break;
  189. case AF_INET6:
  190. *(struct in6_addr *)addr.addr.a6 = inet6_rsk(req)->rmt_addr;
  191. hash = ipv6_addr_hash(&inet6_rsk(req)->rmt_addr);
  192. break;
  193. default:
  194. return NULL;
  195. }
  196. hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8);
  197. net = dev_net(dst->dev);
  198. hash &= net->ipv4.tcp_metrics_hash_mask;
  199. for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
  200. tm = rcu_dereference(tm->tcpm_next)) {
  201. if (addr_same(&tm->tcpm_addr, &addr))
  202. break;
  203. }
  204. tcpm_check_stamp(tm, dst);
  205. return tm;
  206. }
  207. static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw)
  208. {
  209. struct inet6_timewait_sock *tw6;
  210. struct tcp_metrics_block *tm;
  211. struct inetpeer_addr addr;
  212. unsigned int hash;
  213. struct net *net;
  214. addr.family = tw->tw_family;
  215. switch (addr.family) {
  216. case AF_INET:
  217. addr.addr.a4 = tw->tw_daddr;
  218. hash = (__force unsigned int) addr.addr.a4;
  219. break;
  220. case AF_INET6:
  221. tw6 = inet6_twsk((struct sock *)tw);
  222. *(struct in6_addr *)addr.addr.a6 = tw6->tw_v6_daddr;
  223. hash = ipv6_addr_hash(&tw6->tw_v6_daddr);
  224. break;
  225. default:
  226. return NULL;
  227. }
  228. hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8);
  229. net = twsk_net(tw);
  230. hash &= net->ipv4.tcp_metrics_hash_mask;
  231. for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
  232. tm = rcu_dereference(tm->tcpm_next)) {
  233. if (addr_same(&tm->tcpm_addr, &addr))
  234. break;
  235. }
  236. return tm;
  237. }
  238. static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
  239. struct dst_entry *dst,
  240. bool create)
  241. {
  242. struct tcp_metrics_block *tm;
  243. struct inetpeer_addr addr;
  244. unsigned int hash;
  245. struct net *net;
  246. bool reclaim;
  247. addr.family = sk->sk_family;
  248. switch (addr.family) {
  249. case AF_INET:
  250. addr.addr.a4 = inet_sk(sk)->inet_daddr;
  251. hash = (__force unsigned int) addr.addr.a4;
  252. break;
  253. case AF_INET6:
  254. *(struct in6_addr *)addr.addr.a6 = inet6_sk(sk)->daddr;
  255. hash = ipv6_addr_hash(&inet6_sk(sk)->daddr);
  256. break;
  257. default:
  258. return NULL;
  259. }
  260. hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8);
  261. net = dev_net(dst->dev);
  262. hash &= net->ipv4.tcp_metrics_hash_mask;
  263. tm = __tcp_get_metrics(&addr, net, hash);
  264. reclaim = false;
  265. if (tm == TCP_METRICS_RECLAIM_PTR) {
  266. reclaim = true;
  267. tm = NULL;
  268. }
  269. if (!tm && create)
  270. tm = tcpm_new(dst, &addr, hash, reclaim);
  271. else
  272. tcpm_check_stamp(tm, dst);
  273. return tm;
  274. }
  275. /* Save metrics learned by this TCP session. This function is called
  276. * only, when TCP finishes successfully i.e. when it enters TIME-WAIT
  277. * or goes from LAST-ACK to CLOSE.
  278. */
  279. void tcp_update_metrics(struct sock *sk)
  280. {
  281. const struct inet_connection_sock *icsk = inet_csk(sk);
  282. struct dst_entry *dst = __sk_dst_get(sk);
  283. struct tcp_sock *tp = tcp_sk(sk);
  284. struct tcp_metrics_block *tm;
  285. unsigned long rtt;
  286. u32 val;
  287. int m;
  288. if (sysctl_tcp_nometrics_save || !dst)
  289. return;
  290. if (dst->flags & DST_HOST)
  291. dst_confirm(dst);
  292. rcu_read_lock();
  293. if (icsk->icsk_backoff || !tp->srtt) {
  294. /* This session failed to estimate rtt. Why?
  295. * Probably, no packets returned in time. Reset our
  296. * results.
  297. */
  298. tm = tcp_get_metrics(sk, dst, false);
  299. if (tm && !tcp_metric_locked(tm, TCP_METRIC_RTT))
  300. tcp_metric_set(tm, TCP_METRIC_RTT, 0);
  301. goto out_unlock;
  302. } else
  303. tm = tcp_get_metrics(sk, dst, true);
  304. if (!tm)
  305. goto out_unlock;
  306. rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT);
  307. m = rtt - tp->srtt;
  308. /* If newly calculated rtt larger than stored one, store new
  309. * one. Otherwise, use EWMA. Remember, rtt overestimation is
  310. * always better than underestimation.
  311. */
  312. if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) {
  313. if (m <= 0)
  314. rtt = tp->srtt;
  315. else
  316. rtt -= (m >> 3);
  317. tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt);
  318. }
  319. if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) {
  320. unsigned long var;
  321. if (m < 0)
  322. m = -m;
  323. /* Scale deviation to rttvar fixed point */
  324. m >>= 1;
  325. if (m < tp->mdev)
  326. m = tp->mdev;
  327. var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR);
  328. if (m >= var)
  329. var = m;
  330. else
  331. var -= (var - m) >> 2;
  332. tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var);
  333. }
  334. if (tcp_in_initial_slowstart(tp)) {
  335. /* Slow start still did not finish. */
  336. if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
  337. val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
  338. if (val && (tp->snd_cwnd >> 1) > val)
  339. tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
  340. tp->snd_cwnd >> 1);
  341. }
  342. if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
  343. val = tcp_metric_get(tm, TCP_METRIC_CWND);
  344. if (tp->snd_cwnd > val)
  345. tcp_metric_set(tm, TCP_METRIC_CWND,
  346. tp->snd_cwnd);
  347. }
  348. } else if (tp->snd_cwnd > tp->snd_ssthresh &&
  349. icsk->icsk_ca_state == TCP_CA_Open) {
  350. /* Cong. avoidance phase, cwnd is reliable. */
  351. if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
  352. tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
  353. max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
  354. if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
  355. val = tcp_metric_get(tm, TCP_METRIC_CWND);
  356. tcp_metric_set(tm, TCP_METRIC_CWND, (val + tp->snd_cwnd) >> 1);
  357. }
  358. } else {
  359. /* Else slow start did not finish, cwnd is non-sense,
  360. * ssthresh may be also invalid.
  361. */
  362. if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
  363. val = tcp_metric_get(tm, TCP_METRIC_CWND);
  364. tcp_metric_set(tm, TCP_METRIC_CWND,
  365. (val + tp->snd_ssthresh) >> 1);
  366. }
  367. if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
  368. val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
  369. if (val && tp->snd_ssthresh > val)
  370. tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
  371. tp->snd_ssthresh);
  372. }
  373. if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) {
  374. val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
  375. if (val < tp->reordering &&
  376. tp->reordering != sysctl_tcp_reordering)
  377. tcp_metric_set(tm, TCP_METRIC_REORDERING,
  378. tp->reordering);
  379. }
  380. }
  381. tm->tcpm_stamp = jiffies;
  382. out_unlock:
  383. rcu_read_unlock();
  384. }
  385. /* Initialize metrics on socket. */
  386. void tcp_init_metrics(struct sock *sk)
  387. {
  388. struct dst_entry *dst = __sk_dst_get(sk);
  389. struct tcp_sock *tp = tcp_sk(sk);
  390. struct tcp_metrics_block *tm;
  391. u32 val;
  392. if (dst == NULL)
  393. goto reset;
  394. dst_confirm(dst);
  395. rcu_read_lock();
  396. tm = tcp_get_metrics(sk, dst, true);
  397. if (!tm) {
  398. rcu_read_unlock();
  399. goto reset;
  400. }
  401. if (tcp_metric_locked(tm, TCP_METRIC_CWND))
  402. tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
  403. val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
  404. if (val) {
  405. tp->snd_ssthresh = val;
  406. if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
  407. tp->snd_ssthresh = tp->snd_cwnd_clamp;
  408. } else {
  409. /* ssthresh may have been reduced unnecessarily during.
  410. * 3WHS. Restore it back to its initial default.
  411. */
  412. tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
  413. }
  414. val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
  415. if (val && tp->reordering != val) {
  416. tcp_disable_fack(tp);
  417. tcp_disable_early_retrans(tp);
  418. tp->reordering = val;
  419. }
  420. val = tcp_metric_get(tm, TCP_METRIC_RTT);
  421. if (val == 0 || tp->srtt == 0) {
  422. rcu_read_unlock();
  423. goto reset;
  424. }
  425. /* Initial rtt is determined from SYN,SYN-ACK.
  426. * The segment is small and rtt may appear much
  427. * less than real one. Use per-dst memory
  428. * to make it more realistic.
  429. *
  430. * A bit of theory. RTT is time passed after "normal" sized packet
  431. * is sent until it is ACKed. In normal circumstances sending small
  432. * packets force peer to delay ACKs and calculation is correct too.
  433. * The algorithm is adaptive and, provided we follow specs, it
  434. * NEVER underestimate RTT. BUT! If peer tries to make some clever
  435. * tricks sort of "quick acks" for time long enough to decrease RTT
  436. * to low value, and then abruptly stops to do it and starts to delay
  437. * ACKs, wait for troubles.
  438. */
  439. val = msecs_to_jiffies(val);
  440. if (val > tp->srtt) {
  441. tp->srtt = val;
  442. tp->rtt_seq = tp->snd_nxt;
  443. }
  444. val = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR);
  445. if (val > tp->mdev) {
  446. tp->mdev = val;
  447. tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
  448. }
  449. rcu_read_unlock();
  450. tcp_set_rto(sk);
  451. reset:
  452. if (tp->srtt == 0) {
  453. /* RFC6298: 5.7 We've failed to get a valid RTT sample from
  454. * 3WHS. This is most likely due to retransmission,
  455. * including spurious one. Reset the RTO back to 3secs
  456. * from the more aggressive 1sec to avoid more spurious
  457. * retransmission.
  458. */
  459. tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK;
  460. inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
  461. }
  462. /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
  463. * retransmitted. In light of RFC6298 more aggressive 1sec
  464. * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
  465. * retransmission has occurred.
  466. */
  467. if (tp->total_retrans > 1)
  468. tp->snd_cwnd = 1;
  469. else
  470. tp->snd_cwnd = tcp_init_cwnd(tp, dst);
  471. tp->snd_cwnd_stamp = tcp_time_stamp;
  472. }
  473. bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check)
  474. {
  475. struct tcp_metrics_block *tm;
  476. bool ret;
  477. if (!dst)
  478. return false;
  479. rcu_read_lock();
  480. tm = __tcp_get_metrics_req(req, dst);
  481. if (paws_check) {
  482. if (tm &&
  483. (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL &&
  484. (s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW)
  485. ret = false;
  486. else
  487. ret = true;
  488. } else {
  489. if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp)
  490. ret = true;
  491. else
  492. ret = false;
  493. }
  494. rcu_read_unlock();
  495. return ret;
  496. }
  497. EXPORT_SYMBOL_GPL(tcp_peer_is_proven);
  498. void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst)
  499. {
  500. struct tcp_metrics_block *tm;
  501. rcu_read_lock();
  502. tm = tcp_get_metrics(sk, dst, true);
  503. if (tm) {
  504. struct tcp_sock *tp = tcp_sk(sk);
  505. if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) {
  506. tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp;
  507. tp->rx_opt.ts_recent = tm->tcpm_ts;
  508. }
  509. }
  510. rcu_read_unlock();
  511. }
  512. EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp);
  513. /* VJ's idea. Save last timestamp seen from this destination and hold
  514. * it at least for normal timewait interval to use for duplicate
  515. * segment detection in subsequent connections, before they enter
  516. * synchronized state.
  517. */
  518. bool tcp_remember_stamp(struct sock *sk)
  519. {
  520. struct dst_entry *dst = __sk_dst_get(sk);
  521. bool ret = false;
  522. if (dst) {
  523. struct tcp_metrics_block *tm;
  524. rcu_read_lock();
  525. tm = tcp_get_metrics(sk, dst, true);
  526. if (tm) {
  527. struct tcp_sock *tp = tcp_sk(sk);
  528. if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 ||
  529. ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
  530. tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
  531. tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
  532. tm->tcpm_ts = tp->rx_opt.ts_recent;
  533. }
  534. ret = true;
  535. }
  536. rcu_read_unlock();
  537. }
  538. return ret;
  539. }
  540. bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
  541. {
  542. struct tcp_metrics_block *tm;
  543. bool ret = false;
  544. rcu_read_lock();
  545. tm = __tcp_get_metrics_tw(tw);
  546. if (tw) {
  547. const struct tcp_timewait_sock *tcptw;
  548. struct sock *sk = (struct sock *) tw;
  549. tcptw = tcp_twsk(sk);
  550. if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 ||
  551. ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
  552. tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
  553. tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
  554. tm->tcpm_ts = tcptw->tw_ts_recent;
  555. }
  556. ret = true;
  557. }
  558. rcu_read_unlock();
  559. return ret;
  560. }
  561. static DEFINE_SEQLOCK(fastopen_seqlock);
  562. void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
  563. struct tcp_fastopen_cookie *cookie,
  564. int *syn_loss, unsigned long *last_syn_loss)
  565. {
  566. struct tcp_metrics_block *tm;
  567. rcu_read_lock();
  568. tm = tcp_get_metrics(sk, __sk_dst_get(sk), false);
  569. if (tm) {
  570. struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen;
  571. unsigned int seq;
  572. do {
  573. seq = read_seqbegin(&fastopen_seqlock);
  574. if (tfom->mss)
  575. *mss = tfom->mss;
  576. *cookie = tfom->cookie;
  577. *syn_loss = tfom->syn_loss;
  578. *last_syn_loss = *syn_loss ? tfom->last_syn_loss : 0;
  579. } while (read_seqretry(&fastopen_seqlock, seq));
  580. }
  581. rcu_read_unlock();
  582. }
  583. void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
  584. struct tcp_fastopen_cookie *cookie, bool syn_lost)
  585. {
  586. struct tcp_metrics_block *tm;
  587. rcu_read_lock();
  588. tm = tcp_get_metrics(sk, __sk_dst_get(sk), true);
  589. if (tm) {
  590. struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen;
  591. write_seqlock_bh(&fastopen_seqlock);
  592. tfom->mss = mss;
  593. if (cookie->len > 0)
  594. tfom->cookie = *cookie;
  595. if (syn_lost) {
  596. ++tfom->syn_loss;
  597. tfom->last_syn_loss = jiffies;
  598. } else
  599. tfom->syn_loss = 0;
  600. write_sequnlock_bh(&fastopen_seqlock);
  601. }
  602. rcu_read_unlock();
  603. }
  604. static unsigned long tcpmhash_entries;
  605. static int __init set_tcpmhash_entries(char *str)
  606. {
  607. ssize_t ret;
  608. if (!str)
  609. return 0;
  610. ret = kstrtoul(str, 0, &tcpmhash_entries);
  611. if (ret)
  612. return 0;
  613. return 1;
  614. }
  615. __setup("tcpmhash_entries=", set_tcpmhash_entries);
  616. static int __net_init tcp_net_metrics_init(struct net *net)
  617. {
  618. int slots, size;
  619. slots = tcpmhash_entries;
  620. if (!slots) {
  621. if (totalram_pages >= 128 * 1024)
  622. slots = 16 * 1024;
  623. else
  624. slots = 8 * 1024;
  625. }
  626. size = slots * sizeof(struct tcpm_hash_bucket);
  627. net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL);
  628. if (!net->ipv4.tcp_metrics_hash)
  629. return -ENOMEM;
  630. net->ipv4.tcp_metrics_hash_mask = (slots - 1);
  631. return 0;
  632. }
  633. static void __net_exit tcp_net_metrics_exit(struct net *net)
  634. {
  635. kfree(net->ipv4.tcp_metrics_hash);
  636. }
  637. static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
  638. .init = tcp_net_metrics_init,
  639. .exit = tcp_net_metrics_exit,
  640. };
  641. void __init tcp_metrics_init(void)
  642. {
  643. register_pernet_subsys(&tcp_net_metrics_ops);
  644. }