|
@@ -355,6 +355,12 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
|
|
|
rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) *
|
|
|
tcp_default_init_rwnd(mss);
|
|
|
|
|
|
+ /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
|
|
|
+ * Allow enough cushion so that sender is not limited by our window
|
|
|
+ */
|
|
|
+ if (sysctl_tcp_moderate_rcvbuf)
|
|
|
+ rcvmem <<= 2;
|
|
|
+
|
|
|
if (sk->sk_rcvbuf < rcvmem)
|
|
|
sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
|
|
|
}
|
|
@@ -373,6 +379,8 @@ void tcp_init_buffer_space(struct sock *sk)
|
|
|
tcp_fixup_sndbuf(sk);
|
|
|
|
|
|
tp->rcvq_space.space = tp->rcv_wnd;
|
|
|
+ tp->rcvq_space.time = tcp_time_stamp;
|
|
|
+ tp->rcvq_space.seq = tp->copied_seq;
|
|
|
|
|
|
maxwin = tcp_full_space(sk);
|
|
|
|
|
@@ -512,48 +520,62 @@ void tcp_rcv_space_adjust(struct sock *sk)
|
|
|
{
|
|
|
struct tcp_sock *tp = tcp_sk(sk);
|
|
|
int time;
|
|
|
- int space;
|
|
|
-
|
|
|
- if (tp->rcvq_space.time == 0)
|
|
|
- goto new_measure;
|
|
|
+ int copied;
|
|
|
|
|
|
time = tcp_time_stamp - tp->rcvq_space.time;
|
|
|
if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
|
|
|
return;
|
|
|
|
|
|
- space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
|
|
|
+ /* Number of bytes copied to user in last RTT */
|
|
|
+ copied = tp->copied_seq - tp->rcvq_space.seq;
|
|
|
+ if (copied <= tp->rcvq_space.space)
|
|
|
+ goto new_measure;
|
|
|
+
|
|
|
+ /* A bit of theory :
|
|
|
+ * copied = bytes received in previous RTT, our base window
|
|
|
+ * To cope with packet losses, we need a 2x factor
|
|
|
+ * To cope with slow start, and sender growing its cwin by 100 %
|
|
|
+ * every RTT, we need a 4x factor, because the ACK we are sending
|
|
|
+ * now is for the next RTT, not the current one :
|
|
|
+ * <prev RTT . ><current RTT .. ><next RTT .... >
|
|
|
+ */
|
|
|
+
|
|
|
+ if (sysctl_tcp_moderate_rcvbuf &&
|
|
|
+ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
|
|
|
+ int rcvwin, rcvmem, rcvbuf;
|
|
|
|
|
|
- space = max(tp->rcvq_space.space, space);
|
|
|
+ /* minimal window to cope with packet losses, assuming
|
|
|
+ * steady state. Add some cushion because of small variations.
|
|
|
+ */
|
|
|
+ rcvwin = (copied << 1) + 16 * tp->advmss;
|
|
|
|
|
|
- if (tp->rcvq_space.space != space) {
|
|
|
- int rcvmem;
|
|
|
+ /* If rate increased by 25%,
|
|
|
+ * assume slow start, rcvwin = 3 * copied
|
|
|
+ * If rate increased by 50%,
|
|
|
+ * assume sender can use 2x growth, rcvwin = 4 * copied
|
|
|
+ */
|
|
|
+ if (copied >=
|
|
|
+ tp->rcvq_space.space + (tp->rcvq_space.space >> 2)) {
|
|
|
+ if (copied >=
|
|
|
+ tp->rcvq_space.space + (tp->rcvq_space.space >> 1))
|
|
|
+ rcvwin <<= 1;
|
|
|
+ else
|
|
|
+ rcvwin += (rcvwin >> 1);
|
|
|
+ }
|
|
|
|
|
|
- tp->rcvq_space.space = space;
|
|
|
+ rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
|
|
|
+ while (tcp_win_from_space(rcvmem) < tp->advmss)
|
|
|
+ rcvmem += 128;
|
|
|
|
|
|
- if (sysctl_tcp_moderate_rcvbuf &&
|
|
|
- !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
|
|
|
- int new_clamp = space;
|
|
|
+ rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
|
|
|
+ if (rcvbuf > sk->sk_rcvbuf) {
|
|
|
+ sk->sk_rcvbuf = rcvbuf;
|
|
|
|
|
|
- /* Receive space grows, normalize in order to
|
|
|
- * take into account packet headers and sk_buff
|
|
|
- * structure overhead.
|
|
|
- */
|
|
|
- space /= tp->advmss;
|
|
|
- if (!space)
|
|
|
- space = 1;
|
|
|
- rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
|
|
|
- while (tcp_win_from_space(rcvmem) < tp->advmss)
|
|
|
- rcvmem += 128;
|
|
|
- space *= rcvmem;
|
|
|
- space = min(space, sysctl_tcp_rmem[2]);
|
|
|
- if (space > sk->sk_rcvbuf) {
|
|
|
- sk->sk_rcvbuf = space;
|
|
|
-
|
|
|
- /* Make the window clamp follow along. */
|
|
|
- tp->window_clamp = new_clamp;
|
|
|
- }
|
|
|
+ /* Make the window clamp follow along. */
|
|
|
+ tp->window_clamp = rcvwin;
|
|
|
}
|
|
|
}
|
|
|
+ tp->rcvq_space.space = copied;
|
|
|
|
|
|
new_measure:
|
|
|
tp->rcvq_space.seq = tp->copied_seq;
|
|
@@ -5674,8 +5696,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
|
|
|
tcp_init_congestion_control(sk);
|
|
|
|
|
|
tcp_mtup_init(sk);
|
|
|
- tcp_init_buffer_space(sk);
|
|
|
tp->copied_seq = tp->rcv_nxt;
|
|
|
+ tcp_init_buffer_space(sk);
|
|
|
}
|
|
|
smp_mb();
|
|
|
tcp_set_state(sk, TCP_ESTABLISHED);
|