19 years ago · 79ffeeb9e6
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -78,6 +78,11 @@ inet_peer_gc_maxtime - INTEGER
 
				 
			
 
				 TCP variables: 
			
 
				 
			
 
				+tcp_abc - INTEGER
			
 
				+	Controls Appropriate Byte Count defined in RFC3465. If set to
			
 
				+	0 then does congestion avoid once per ack. 1 is conservative
			
 
				+	value, and 2 is more agressive.
			
 
				+
			
 
				 tcp_syn_retries - INTEGER
			
 
				 	Number of times initial SYNs for an active TCP connection attempt
			
 
				 	will be retransmitted. Should not be higher than 255. Default value
			
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -390,6 +390,7 @@ enum
 
				 	NET_TCP_BIC_BETA=108,
			
 
				 	NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
			
 
				 	NET_TCP_CONG_CONTROL=110,
			
 
				+	NET_TCP_ABC=111,
			
 
				 };
			
 
				 
			
 
				 enum {
			
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -307,6 +307,21 @@ struct tcp_sock {
 
				 	struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
			
 
				 	struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
			
 
				 
			
 
				+	struct tcp_sack_block recv_sack_cache[4];
			
 
				+
			
 
				+	/* from STCP, retrans queue hinting */
			
 
				+	struct sk_buff* lost_skb_hint;
			
 
				+
			
 
				+	struct sk_buff *scoreboard_skb_hint;
			
 
				+	struct sk_buff *retransmit_skb_hint;
			
 
				+	struct sk_buff *forward_skb_hint;
			
 
				+	struct sk_buff *fastpath_skb_hint;
			
 
				+
			
 
				+	int     fastpath_cnt_hint;
			
 
				+	int     lost_cnt_hint;
			
 
				+	int     retransmit_cnt_hint;
			
 
				+	int     forward_cnt_hint;
			
 
				+
			
 
				 	__u16	advmss;		/* Advertised MSS			*/
			
 
				 	__u16	prior_ssthresh; /* ssthresh saved at recovery start	*/
			
 
				 	__u32	lost_out;	/* Lost packets			*/
			
@@ -326,6 +341,7 @@ struct tcp_sock {
 
				 	__u32	snd_up;		/* Urgent pointer		*/
			
 
				 
			
 
				 	__u32	total_retrans;	/* Total retransmits for entire connection */
			
 
				+	__u32	bytes_acked;	/* Appropriate Byte Counting - RFC3465 */
			
 
				 
			
 
				 	unsigned int		keepalive_time;	  /* time before keep alive takes place */
			
 
				 	unsigned int		keepalive_intvl;  /* time interval between keep alive probes */
			
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1247,6 +1247,12 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk)
 
				 		     (skb != (struct sk_buff *)&(sk)->sk_write_queue);	\
			
 
				 		     skb = skb->next)
			
 
				 
			
 
				+/*from STCP for fast SACK Process*/
			
 
				+#define sk_stream_for_retrans_queue_from(skb, sk)			\
			
 
				+		for (; (skb != (sk)->sk_send_head) &&                   \
			
 
				+		     (skb != (struct sk_buff *)&(sk)->sk_write_queue);	\
			
 
				+		     skb = skb->next)
			
 
				+
			
 
				 /*
			
 
				  *	Default write policy as shown to user space via poll/select/SIGIO
			
 
				  */
			
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -89,10 +89,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 
				 				 */
			
 
				 
			
 
				 #define TCP_SYN_RETRIES	 5	/* number of times to retry active opening a
			
 
				-				 * connection: ~180sec is RFC minumum	*/
			
 
				+				 * connection: ~180sec is RFC minimum	*/
			
 
				 
			
 
				 #define TCP_SYNACK_RETRIES 5	/* number of times to retry passive opening a
			
 
				-				 * connection: ~180sec is RFC minumum	*/
			
 
				+				 * connection: ~180sec is RFC minimum	*/
			
 
				 
			
 
				 
			
 
				 #define TCP_ORPHAN_RETRIES 7	/* number of times to retry on an orphaned
			
@@ -180,7 +180,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 
				 /* Flags in tp->nonagle */
			
 
				 #define TCP_NAGLE_OFF		1	/* Nagle's algo is disabled */
			
 
				 #define TCP_NAGLE_CORK		2	/* Socket is corked	    */
			
 
				-#define TCP_NAGLE_PUSH		4	/* Cork is overriden for already queued data */
			
 
				+#define TCP_NAGLE_PUSH		4	/* Cork is overridden for already queued data */
			
 
				 
			
 
				 extern struct inet_timewait_death_row tcp_death_row;
			
 
				 
			
@@ -218,6 +218,7 @@ extern int sysctl_tcp_low_latency;
 
				 extern int sysctl_tcp_nometrics_save;
			
 
				 extern int sysctl_tcp_moderate_rcvbuf;
			
 
				 extern int sysctl_tcp_tso_win_divisor;
			
 
				+extern int sysctl_tcp_abc;
			
 
				 
			
 
				 extern atomic_t tcp_memory_allocated;
			
 
				 extern atomic_t tcp_sockets_allocated;
			
@@ -551,13 +552,13 @@ extern u32	__tcp_select_window(struct sock *sk);
 
				 
			
 
				 /* TCP timestamps are only 32-bits, this causes a slight
			
 
				  * complication on 64-bit systems since we store a snapshot
			
 
				- * of jiffies in the buffer control blocks below.  We decidely
			
 
				+ * of jiffies in the buffer control blocks below.  We decidedly
			
 
				  * only use of the low 32-bits of jiffies and hide the ugly
			
 
				  * casts with the following macro.
			
 
				  */
			
 
				 #define tcp_time_stamp		((__u32)(jiffies))
			
 
				 
			
 
				-/* This is what the send packet queueing engine uses to pass
			
 
				+/* This is what the send packet queuing engine uses to pass
			
 
				  * TCP per-packet control information to the transmission
			
 
				  * code.  We also store the host-order sequence numbers in
			
 
				  * here too.  This is 36 bytes on 32-bit architectures,
			
@@ -597,7 +598,7 @@ struct tcp_skb_cb {
 
				 #define TCPCB_EVER_RETRANS	0x80	/* Ever retransmitted frame	*/
			
 
				 #define TCPCB_RETRANS		(TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
			
 
				 
			
 
				-#define TCPCB_URG		0x20	/* Urgent pointer advenced here	*/
			
 
				+#define TCPCB_URG		0x20	/* Urgent pointer advanced here	*/
			
 
				 
			
 
				 #define TCPCB_AT_TAIL		(TCPCB_URG)
			
 
				 
			
@@ -765,6 +766,33 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
 
				 			    (tp->snd_cwnd >> 2)));
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Linear increase during slow start
			
 
				+ */
			
 
				+static inline void tcp_slow_start(struct tcp_sock *tp)
			
 
				+{
			
 
				+	if (sysctl_tcp_abc) {
			
 
				+		/* RFC3465: Slow Start
			
 
				+		 * TCP sender SHOULD increase cwnd by the number of
			
 
				+		 * previously unacknowledged bytes ACKed by each incoming
			
 
				+		 * acknowledgment, provided the increase is not more than L
			
 
				+		 */
			
 
				+		if (tp->bytes_acked < tp->mss_cache)
			
 
				+			return;
			
 
				+
			
 
				+		/* We MAY increase by 2 if discovered delayed ack */
			
 
				+		if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
			
 
				+			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
			
 
				+				tp->snd_cwnd++;
			
 
				+		}
			
 
				+	}
			
 
				+	tp->bytes_acked = 0;
			
 
				+
			
 
				+	if (tp->snd_cwnd < tp->snd_cwnd_clamp)
			
 
				+		tp->snd_cwnd++;
			
 
				+}
			
 
				+
			
 
				+
			
 
				 static inline void tcp_sync_left_out(struct tcp_sock *tp)
			
 
				 {
			
 
				 	if (tp->rx_opt.sack_ok &&
			
@@ -794,6 +822,7 @@ static inline void tcp_enter_cwr(struct sock *sk)
 
				 	struct tcp_sock *tp = tcp_sk(sk);
			
 
				 
			
 
				 	tp->prior_ssthresh = 0;
			
 
				+	tp->bytes_acked = 0;
			
 
				 	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
			
 
				 		__tcp_enter_cwr(sk);
			
 
				 		tcp_set_ca_state(sk, TCP_CA_CWR);
			
@@ -810,6 +839,27 @@ static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp)
 
				 	return 3;
			
 
				 }
			
 
				 
			
 
				+/* RFC2861 Check whether we are limited by application or congestion window
			
 
				+ * This is the inverse of cwnd check in tcp_tso_should_defer
			
 
				+ */
			
 
				+static inline int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
			
 
				+{
			
 
				+	const struct tcp_sock *tp = tcp_sk(sk);
			
 
				+	u32 left;
			
 
				+
			
 
				+	if (in_flight >= tp->snd_cwnd)
			
 
				+		return 1;
			
 
				+
			
 
				+	if (!(sk->sk_route_caps & NETIF_F_TSO))
			
 
				+		return 0;
			
 
				+
			
 
				+	left = tp->snd_cwnd - in_flight;
			
 
				+	if (sysctl_tcp_tso_win_divisor)
			
 
				+		return left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd;
			
 
				+	else
			
 
				+		return left <= tcp_max_burst(tp);
			
 
				+}
			
 
				+
			
 
				 static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, 
			
 
				 					   const struct sk_buff *skb)
			
 
				 {
			
@@ -1157,6 +1207,15 @@ static inline void tcp_mib_init(void)
 
				 	TCP_ADD_STATS_USER(TCP_MIB_MAXCONN, -1);
			
 
				 }
			
 
				 
			
 
				+/*from STCP */
			
 
				+static inline void clear_all_retrans_hints(struct tcp_sock *tp){
			
 
				+	tp->lost_skb_hint = NULL;
			
 
				+	tp->scoreboard_skb_hint = NULL;
			
 
				+	tp->retransmit_skb_hint = NULL;
			
 
				+	tp->forward_skb_hint = NULL;
			
 
				+	tp->fastpath_skb_hint = NULL;
			
 
				+}
			
 
				+
			
 
				 /* /proc */
			
 
				 enum tcp_seq_states {
			
 
				 	TCP_SEQ_STATE_LISTENING,
			
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = {
 
				 		.proc_handler	= &proc_tcp_congestion_control,
			
 
				 		.strategy	= &sysctl_tcp_congestion_control,
			
 
				 	},
			
 
				+	{
			
 
				+		.ctl_name	= NET_TCP_ABC,
			
 
				+		.procname	= "tcp_abc",
			
 
				+		.data		= &sysctl_tcp_abc,
			
 
				+		.maxlen		= sizeof(int),
			
 
				+		.mode		= 0644,
			
 
				+		.proc_handler	= &proc_dointvec,
			
 
				+	},
			
 
				 
			
 
				 	{ .ctl_name = 0 }
			
 
				 };
			
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1640,7 +1640,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 
				 	} else if (tcp_need_reset(old_state) ||
			
 
				 		   (tp->snd_nxt != tp->write_seq &&
			
 
				 		    (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
			
 
				-		/* The last check adjusts for discrepance of Linux wrt. RFC
			
 
				+		/* The last check adjusts for discrepancy of Linux wrt. RFC
			
 
				 		 * states
			
 
				 		 */
			
 
				 		tcp_send_active_reset(sk, gfp_any());
			
@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 
				 	tp->packets_out = 0;
			
 
				 	tp->snd_ssthresh = 0x7fffffff;
			
 
				 	tp->snd_cwnd_cnt = 0;
			
 
				+	tp->bytes_acked = 0;
			
 
				 	tcp_set_ca_state(sk, TCP_CA_Open);
			
 
				 	tcp_clear_retrans(tp);
			
 
				 	inet_csk_delack_init(sk);
			
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -217,17 +217,15 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack,
 
				 
			
 
				 	bictcp_low_utilization(sk, data_acked);
			
 
				 
			
 
				-	if (in_flight < tp->snd_cwnd)
			
 
				+	if (!tcp_is_cwnd_limited(sk, in_flight))
			
 
				 		return;
			
 
				 
			
 
				-	if (tp->snd_cwnd <= tp->snd_ssthresh) {
			
 
				-		/* In "safe" area, increase. */
			
 
				-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
			
 
				-			tp->snd_cwnd++;
			
 
				-	} else {
			
 
				+	if (tp->snd_cwnd <= tp->snd_ssthresh)
			
 
				+		tcp_slow_start(tp);
			
 
				+	else {
			
 
				 		bictcp_update(ca, tp->snd_cwnd);
			
 
				 
			
 
				-                /* In dangerous area, increase slowly.
			
 
				+		/* In dangerous area, increase slowly.
			
 
				 		 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
			
 
				 		 */
			
 
				 		if (tp->snd_cwnd_cnt >= ca->cnt) {
			
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -186,24 +186,32 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
 
				 {
			
 
				 	struct tcp_sock *tp = tcp_sk(sk);
			
 
				 
			
 
				-	if (in_flight < tp->snd_cwnd)
			
 
				+	if (!tcp_is_cwnd_limited(sk, in_flight))
			
 
				 		return;
			
 
				 
			
 
				-        if (tp->snd_cwnd <= tp->snd_ssthresh) {
			
 
				-                /* In "safe" area, increase. */
			
 
				-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
			
 
				-			tp->snd_cwnd++;
			
 
				-	} else {
			
 
				-                /* In dangerous area, increase slowly.
			
 
				-		 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
			
 
				-		 */
			
 
				-		if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
			
 
				-			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
			
 
				-				tp->snd_cwnd++;
			
 
				-			tp->snd_cwnd_cnt = 0;
			
 
				-		} else
			
 
				-			tp->snd_cwnd_cnt++;
			
 
				-	}
			
 
				+	/* In "safe" area, increase. */
			
 
				+        if (tp->snd_cwnd <= tp->snd_ssthresh)
			
 
				+		tcp_slow_start(tp);
			
 
				+
			
 
				+ 	/* In dangerous area, increase slowly. */
			
 
				+	else if (sysctl_tcp_abc) {
			
 
				+ 		/* RFC3465: Apppriate Byte Count
			
 
				+ 		 * increase once for each full cwnd acked
			
 
				+ 		 */
			
 
				+ 		if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
			
 
				+ 			tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
			
 
				+ 			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
			
 
				+ 				tp->snd_cwnd++;
			
 
				+ 		}
			
 
				+ 	} else {
			
 
				+ 		/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
			
 
				+ 		if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
			
 
				+ 			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
			
 
				+ 				tp->snd_cwnd++;
			
 
				+ 			tp->snd_cwnd_cnt = 0;
			
 
				+ 		} else
			
 
				+ 			tp->snd_cwnd_cnt++;
			
 
				+ 	}
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
			
 
				 
			
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -111,18 +111,17 @@ static void hstcp_init(struct sock *sk)
 
				 }
			
 
				 
			
 
				 static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
			
 
				-			     u32 in_flight, int good)
			
 
				+			     u32 in_flight, u32 pkts_acked)
			
 
				 {
			
 
				 	struct tcp_sock *tp = tcp_sk(sk);
			
 
				 	struct hstcp *ca = inet_csk_ca(sk);
			
 
				 
			
 
				-	if (in_flight < tp->snd_cwnd)
			
 
				+	if (!tcp_is_cwnd_limited(sk, in_flight))
			
 
				 		return;
			
 
				 
			
 
				-	if (tp->snd_cwnd <= tp->snd_ssthresh) {
			
 
				-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
			
 
				-			tp->snd_cwnd++;
			
 
				-	} else {
			
 
				+	if (tp->snd_cwnd <= tp->snd_ssthresh)
			
 
				+		tcp_slow_start(tp);
			
 
				+	else {
			
 
				 		/* Update AIMD parameters */
			
 
				 		if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
			
 
				 			while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
			
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -207,14 +207,13 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 
				 	struct tcp_sock *tp = tcp_sk(sk);
			
 
				 	struct htcp *ca = inet_csk_ca(sk);
			
 
				 
			
 
				-	if (in_flight < tp->snd_cwnd)
			
 
				+	if (!tcp_is_cwnd_limited(sk, in_flight))
			
 
				 		return;
			
 
				 
			
 
				-        if (tp->snd_cwnd <= tp->snd_ssthresh) {
			
 
				-                /* In "safe" area, increase. */
			
 
				-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
			
 
				-			tp->snd_cwnd++;
			
 
				-	} else {
			
 
				+        if (tp->snd_cwnd <= tp->snd_ssthresh)
			
 
				+		tcp_slow_start(tp);
			
 
				+	else {
			
 
				+
			
 
				 		measure_rtt(sk);
			
 
				 
			
 
				 		/* keep track of number of round-trip times since last backoff event */
			
@@ -224,7 +223,7 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 
				 			htcp_alpha_update(ca);
			
 
				 		}
			
 
				 
			
 
				-                /* In dangerous area, increase slowly.
			
 
				+		/* In dangerous area, increase slowly.
			
 
				 		 * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
			
 
				 		 */
			
 
				 		if ((tp->snd_cwnd_cnt++ * ca->alpha)>>7 >= tp->snd_cwnd) {
			
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -100,12 +100,12 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 
				 		ca->minrtt = tp->srtt;
			
 
				 	}
			
 
				 
			
 
				+	if (!tcp_is_cwnd_limited(sk, in_flight))
			
 
				+		return;
			
 
				+
			
 
				 	if (!ca->hybla_en)
			
 
				 		return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag);
			
 
				 
			
 
				-	if (in_flight < tp->snd_cwnd)
			
 
				-		return;
			
 
				-
			
 
				 	if (ca->rho == 0)
			
 
				 		hybla_recalc_param(sk);
			
 
				 
			
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -42,7 +42,7 @@
 
				  *		Andi Kleen	:	Moved open_request checking here
			
 
				  *					and process RSTs for open_requests.
			
 
				  *		Andi Kleen	:	Better prune_queue, and other fixes.
			
 
				- *		Andrey Savochkin:	Fix RTT measurements in the presnce of
			
 
				+ *		Andrey Savochkin:	Fix RTT measurements in the presence of
			
 
				  *					timestamps.
			
 
				  *		Andrey Savochkin:	Check sequence numbers correctly when
			
 
				  *					removing SACKs due to in sequence incoming
			
@@ -89,6 +89,7 @@ int sysctl_tcp_frto;
 
				 int sysctl_tcp_nometrics_save;
			
 
				 
			
 
				 int sysctl_tcp_moderate_rcvbuf = 1;
			
 
				+int sysctl_tcp_abc = 1;
			
 
				 
			
 
				 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
			
 
				 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
			
@@ -223,7 +224,7 @@ static void tcp_fixup_sndbuf(struct sock *sk)
 
				  *   of receiver window. Check #2.
			
 
				  *
			
 
				  * The scheme does not work when sender sends good segments opening
			
 
				- * window and then starts to feed us spagetti. But it should work
			
 
				+ * window and then starts to feed us spaghetti. But it should work
			
 
				  * in common situations. Otherwise, we have to rely on queue collapsing.
			
 
				  */
			
 
				 
			
@@ -233,7 +234,7 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
 
				 {
			
 
				 	/* Optimize this! */
			
 
				 	int truesize = tcp_win_from_space(skb->truesize)/2;
			
 
				-	int window = tcp_full_space(sk)/2;
			
 
				+	int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
			
 
				 
			
 
				 	while (tp->rcv_ssthresh <= window) {
			
 
				 		if (truesize <= skb->len)
			
@@ -277,7 +278,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
 
				 	int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
			
 
				 
			
 
				 	/* Try to select rcvbuf so that 4 mss-sized segments
			
 
				-	 * will fit to window and correspoding skbs will fit to our rcvbuf.
			
 
				+	 * will fit to window and corresponding skbs will fit to our rcvbuf.
			
 
				 	 * (was 3; 4 is minimum to allow fast retransmit to work.)
			
 
				 	 */
			
 
				 	while (tcp_win_from_space(rcvmem) < tp->advmss)
			
@@ -286,7 +287,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
 
				 		sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
			
 
				 }
			
 
				 
			
 
				-/* 4. Try to fixup all. It is made iimediately after connection enters
			
 
				+/* 4. Try to fixup all. It is made immediately after connection enters
			
 
				  *    established state.
			
 
				  */
			
 
				 static void tcp_init_buffer_space(struct sock *sk)
			
@@ -326,37 +327,18 @@ static void tcp_init_buffer_space(struct sock *sk)
 
				 static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
			
 
				 {
			
 
				 	struct inet_connection_sock *icsk = inet_csk(sk);
			
 
				-	struct sk_buff *skb;
			
 
				-	unsigned int app_win = tp->rcv_nxt - tp->copied_seq;
			
 
				-	int ofo_win = 0;
			
 
				 
			
 
				 	icsk->icsk_ack.quick = 0;
			
 
				 
			
 
				-	skb_queue_walk(&tp->out_of_order_queue, skb) {
			
 
				-		ofo_win += skb->len;
			
 
				-	}
			
 
				-
			
 
				-	/* If overcommit is due to out of order segments,
			
 
				-	 * do not clamp window. Try to expand rcvbuf instead.
			
 
				-	 */
			
 
				-	if (ofo_win) {
			
 
				-		if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
			
 
				-		    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
			
 
				-		    !tcp_memory_pressure &&
			
 
				-		    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
			
 
				-			sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
			
 
				-					    sysctl_tcp_rmem[2]);
			
 
				+	if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
			
 
				+	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
			
 
				+	    !tcp_memory_pressure &&
			
 
				+	    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
			
 
				+		sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
			
 
				+				    sysctl_tcp_rmem[2]);
			
 
				 	}
			
 
				-	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
			
 
				-		app_win += ofo_win;
			
 
				-		if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf)
			
 
				-			app_win >>= 1;
			
 
				-		if (app_win > icsk->icsk_ack.rcv_mss)
			
 
				-			app_win -= icsk->icsk_ack.rcv_mss;
			
 
				-		app_win = max(app_win, 2U*tp->advmss);
			
 
				-
			
 
				+	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
			
 
				 		tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
			
 
				-	}
			
 
				 }
			
 
				 
			
 
				 /* Receiver "autotuning" code.
			
@@ -385,8 +367,8 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
 
				 		 * are stalled on filesystem I/O.
			
 
				 		 *
			
 
				 		 * Also, since we are only going for a minimum in the
			
 
				-		 * non-timestamp case, we do not smoothe things out
			
 
				-		 * else with timestamps disabled convergance takes too
			
 
				+		 * non-timestamp case, we do not smoother things out
			
 
				+		 * else with timestamps disabled convergence takes too
			
 
				 		 * long.
			
 
				 		 */
			
 
				 		if (!win_dep) {
			
@@ -395,7 +377,7 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
 
				 		} else if (m < new_sample)
			
 
				 			new_sample = m << 3;
			
 
				 	} else {
			
 
				-		/* No previous mesaure. */
			
 
				+		/* No previous measure. */
			
 
				 		new_sample = m << 3;
			
 
				 	}
			
 
				 
			
@@ -524,7 +506,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
 
				 			if (icsk->icsk_ack.ato > icsk->icsk_rto)
			
 
				 				icsk->icsk_ack.ato = icsk->icsk_rto;
			
 
				 		} else if (m > icsk->icsk_rto) {
			
 
				-			/* Too long gap. Apparently sender falled to
			
 
				+			/* Too long gap. Apparently sender failed to
			
 
				 			 * restart window, so that we send ACKs quickly.
			
 
				 			 */
			
 
				 			tcp_incr_quickack(sk);
			
@@ -548,10 +530,9 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
 
				  * To save cycles in the RFC 1323 implementation it was better to break
			
 
				  * it up into three procedures. -- erics
			
 
				  */
			
 
				-static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
			
 
				+static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
			
 
				 {
			
 
				 	struct tcp_sock *tp = tcp_sk(sk);
			
 
				-	const struct inet_connection_sock *icsk = inet_csk(sk);
			
 
				 	long m = mrtt; /* RTT */
			
 
				 
			
 
				 	/*	The following amusing code comes from Jacobson's
			
@@ -565,7 +546,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
 
				 	 *
			
 
				 	 * Funny. This algorithm seems to be very broken.
			
 
				 	 * These formulae increase RTO, when it should be decreased, increase
			
 
				-	 * too slowly, when it should be incresed fastly, decrease too fastly
			
 
				+	 * too slowly, when it should be increased fastly, decrease too fastly
			
 
				 	 * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
			
 
				 	 * does not matter how to _calculate_ it. Seems, it was trap
			
 
				 	 * that VJ failed to avoid. 8)
			
@@ -610,9 +591,6 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
 
				 		tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
			
 
				 		tp->rtt_seq = tp->snd_nxt;
			
 
				 	}
			
 
				-
			
 
				-	if (icsk->icsk_ca_ops->rtt_sample)
			
 
				-		icsk->icsk_ca_ops->rtt_sample(sk, *usrtt);
			
 
				 }
			
 
				 
			
 
				 /* Calculate rto without backoff.  This is the second half of Van Jacobson's
			
@@ -629,14 +607,14 @@ static inline void tcp_set_rto(struct sock *sk)
 
				 	 *    at least by solaris and freebsd. "Erratic ACKs" has _nothing_
			
 
				 	 *    to do with delayed acks, because at cwnd>2 true delack timeout
			
 
				 	 *    is invisible. Actually, Linux-2.4 also generates erratic
			
 
				-	 *    ACKs in some curcumstances.
			
 
				+	 *    ACKs in some circumstances.
			
 
				 	 */
			
 
				 	inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar;
			
 
				 
			
 
				 	/* 2. Fixups made earlier cannot be right.
			
 
				 	 *    If we do not estimate RTO correctly without them,
			
 
				 	 *    all the algo is pure shit and should be replaced
			
 
				-	 *    with correct one. It is exaclty, which we pretend to do.
			
 
				+	 *    with correct one. It is exactly, which we pretend to do.
			
 
				 	 */
			
 
				 }
			
 
				 
			
@@ -794,7 +772,7 @@ static void tcp_init_metrics(struct sock *sk)
 
				 	 * to make it more realistic.
			
 
				 	 *
			
 
				 	 * A bit of theory. RTT is time passed after "normal" sized packet
			
 
				-	 * is sent until it is ACKed. In normal curcumstances sending small
			
 
				+	 * is sent until it is ACKed. In normal circumstances sending small
			
 
				 	 * packets force peer to delay ACKs and calculation is correct too.
			
 
				 	 * The algorithm is adaptive and, provided we follow specs, it
			
 
				 	 * NEVER underestimate RTT. BUT! If peer tries to make some clever
			
@@ -919,18 +897,32 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
				 	int prior_fackets;
			
 
				 	u32 lost_retrans = 0;
			
 
				 	int flag = 0;
			
 
				+	int dup_sack = 0;
			
 
				 	int i;
			
 
				 
			
 
				 	if (!tp->sacked_out)
			
 
				 		tp->fackets_out = 0;
			
 
				 	prior_fackets = tp->fackets_out;
			
 
				 
			
 
				-	for (i=0; i<num_sacks; i++, sp++) {
			
 
				-		struct sk_buff *skb;
			
 
				-		__u32 start_seq = ntohl(sp->start_seq);
			
 
				-		__u32 end_seq = ntohl(sp->end_seq);
			
 
				-		int fack_count = 0;
			
 
				-		int dup_sack = 0;
			
 
				+	/* SACK fastpath:
			
 
				+	 * if the only SACK change is the increase of the end_seq of
			
 
				+	 * the first block then only apply that SACK block
			
 
				+	 * and use retrans queue hinting otherwise slowpath */
			
 
				+	flag = 1;
			
 
				+	for (i = 0; i< num_sacks; i++) {
			
 
				+		__u32 start_seq = ntohl(sp[i].start_seq);
			
 
				+		__u32 end_seq =	 ntohl(sp[i].end_seq);
			
 
				+
			
 
				+		if (i == 0){
			
 
				+			if (tp->recv_sack_cache[i].start_seq != start_seq)
			
 
				+				flag = 0;
			
 
				+		} else {
			
 
				+			if ((tp->recv_sack_cache[i].start_seq != start_seq) ||
			
 
				+			    (tp->recv_sack_cache[i].end_seq != end_seq))
			
 
				+				flag = 0;
			
 
				+		}
			
 
				+		tp->recv_sack_cache[i].start_seq = start_seq;
			
 
				+		tp->recv_sack_cache[i].end_seq = end_seq;
			
 
				 
			
 
				 		/* Check for D-SACK. */
			
 
				 		if (i == 0) {
			
@@ -962,15 +954,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
				 			if (before(ack, prior_snd_una - tp->max_window))
			
 
				 				return 0;
			
 
				 		}
			
 
				+	}
			
 
				+
			
 
				+	if (flag)
			
 
				+		num_sacks = 1;
			
 
				+	else {
			
 
				+		int j;
			
 
				+		tp->fastpath_skb_hint = NULL;
			
 
				+
			
 
				+		/* order SACK blocks to allow in order walk of the retrans queue */
			
 
				+		for (i = num_sacks-1; i > 0; i--) {
			
 
				+			for (j = 0; j < i; j++){
			
 
				+				if (after(ntohl(sp[j].start_seq),
			
 
				+					  ntohl(sp[j+1].start_seq))){
			
 
				+					sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq);
			
 
				+					sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq);
			
 
				+					sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq);
			
 
				+					sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq);
			
 
				+				}
			
 
				+
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* clear flag as used for different purpose in following code */
			
 
				+	flag = 0;
			
 
				+
			
 
				+	for (i=0; i<num_sacks; i++, sp++) {
			
 
				+		struct sk_buff *skb;
			
 
				+		__u32 start_seq = ntohl(sp->start_seq);
			
 
				+		__u32 end_seq = ntohl(sp->end_seq);
			
 
				+		int fack_count;
			
 
				+
			
 
				+		/* Use SACK fastpath hint if valid */
			
 
				+		if (tp->fastpath_skb_hint) {
			
 
				+			skb = tp->fastpath_skb_hint;
			
 
				+			fack_count = tp->fastpath_cnt_hint;
			
 
				+		} else {
			
 
				+			skb = sk->sk_write_queue.next;
			
 
				+			fack_count = 0;
			
 
				+		}
			
 
				 
			
 
				 		/* Event "B" in the comment above. */
			
 
				 		if (after(end_seq, tp->high_seq))
			
 
				 			flag |= FLAG_DATA_LOST;
			
 
				 
			
 
				-		sk_stream_for_retrans_queue(skb, sk) {
			
 
				+		sk_stream_for_retrans_queue_from(skb, sk) {
			
 
				 			int in_sack, pcount;
			
 
				 			u8 sacked;
			
 
				 
			
 
				+			tp->fastpath_skb_hint = skb;
			
 
				+			tp->fastpath_cnt_hint = fack_count;
			
 
				+
			
 
				 			/* The retransmission queue is always in order, so
			
 
				 			 * we can short-circuit the walk early.
			
 
				 			 */
			
@@ -1045,6 +1080,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
				 						TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
			
 
				 						tp->lost_out -= tcp_skb_pcount(skb);
			
 
				 						tp->retrans_out -= tcp_skb_pcount(skb);
			
 
				+
			
 
				+						/* clear lost hint */
			
 
				+						tp->retransmit_skb_hint = NULL;
			
 
				 					}
			
 
				 				} else {
			
 
				 					/* New sack for not retransmitted frame,
			
@@ -1057,6 +1095,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
				 					if (sacked & TCPCB_LOST) {
			
 
				 						TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
			
 
				 						tp->lost_out -= tcp_skb_pcount(skb);
			
 
				+
			
 
				+						/* clear lost hint */
			
 
				+						tp->retransmit_skb_hint = NULL;
			
 
				 					}
			
 
				 				}
			
 
				 
			
@@ -1080,6 +1121,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
				 			    (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
			
 
				 				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
			
 
				 				tp->retrans_out -= tcp_skb_pcount(skb);
			
 
				+				tp->retransmit_skb_hint = NULL;
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
@@ -1107,6 +1149,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
				 				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
			
 
				 				tp->retrans_out -= tcp_skb_pcount(skb);
			
 
				 
			
 
				+				/* clear lost hint */
			
 
				+				tp->retransmit_skb_hint = NULL;
			
 
				+
			
 
				 				if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
			
 
				 					tp->lost_out += tcp_skb_pcount(skb);
			
 
				 					TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
			
@@ -1214,6 +1259,8 @@ static void tcp_enter_frto_loss(struct sock *sk)
 
				 	tcp_set_ca_state(sk, TCP_CA_Loss);
			
 
				 	tp->high_seq = tp->frto_highmark;
			
 
				 	TCP_ECN_queue_cwr(tp);
			
 
				+
			
 
				+	clear_all_retrans_hints(tp);
			
 
				 }
			
 
				 
			
 
				 void tcp_clear_retrans(struct tcp_sock *tp)
			
@@ -1251,6 +1298,7 @@ void tcp_enter_loss(struct sock *sk, int how)
 
				 	tp->snd_cwnd_cnt   = 0;
			
 
				 	tp->snd_cwnd_stamp = tcp_time_stamp;
			
 
				 
			
 
				+	tp->bytes_acked = 0;
			
 
				 	tcp_clear_retrans(tp);
			
 
				 
			
 
				 	/* Push undo marker, if it was plain RTO and nothing
			
@@ -1279,6 +1327,8 @@ void tcp_enter_loss(struct sock *sk, int how)
 
				 	tcp_set_ca_state(sk, TCP_CA_Loss);
			
 
				 	tp->high_seq = tp->snd_nxt;
			
 
				 	TCP_ECN_queue_cwr(tp);
			
 
				+
			
 
				+	clear_all_retrans_hints(tp);
			
 
				 }
			
 
				 
			
 
				 static int tcp_check_sack_reneging(struct sock *sk)
			
@@ -1503,17 +1553,37 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
 
				 			       int packets, u32 high_seq)
			
 
				 {
			
 
				 	struct sk_buff *skb;
			
 
				-	int cnt = packets;
			
 
				+	int cnt;
			
 
				 
			
 
				-	BUG_TRAP(cnt <= tp->packets_out);
			
 
				+	BUG_TRAP(packets <= tp->packets_out);
			
 
				+	if (tp->lost_skb_hint) {
			
 
				+		skb = tp->lost_skb_hint;
			
 
				+		cnt = tp->lost_cnt_hint;
			
 
				+	} else {
			
 
				+		skb = sk->sk_write_queue.next;
			
 
				+		cnt = 0;
			
 
				+	}
			
 
				 
			
 
				-	sk_stream_for_retrans_queue(skb, sk) {
			
 
				-		cnt -= tcp_skb_pcount(skb);
			
 
				-		if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
			
 
				+	sk_stream_for_retrans_queue_from(skb, sk) {
			
 
				+		/* TODO: do this better */
			
 
				+		/* this is not the most efficient way to do this... */
			
 
				+		tp->lost_skb_hint = skb;
			
 
				+		tp->lost_cnt_hint = cnt;
			
 
				+		cnt += tcp_skb_pcount(skb);
			
 
				+		if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq))
			
 
				 			break;
			
 
				 		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
			
 
				 			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
			
 
				 			tp->lost_out += tcp_skb_pcount(skb);
			
 
				+
			
 
				+			/* clear xmit_retransmit_queue hints
			
 
				+			 *  if this is beyond hint */
			
 
				+			if(tp->retransmit_skb_hint != NULL &&
			
 
				+			   before(TCP_SKB_CB(skb)->seq,
			
 
				+				  TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) {
			
 
				+
			
 
				+				tp->retransmit_skb_hint = NULL;
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 	tcp_sync_left_out(tp);
			
@@ -1540,13 +1610,28 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
 
				 	if (tcp_head_timedout(sk, tp)) {
			
 
				 		struct sk_buff *skb;
			
 
				 
			
 
				-		sk_stream_for_retrans_queue(skb, sk) {
			
 
				-			if (tcp_skb_timedout(sk, skb) &&
			
 
				-			    !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
			
 
				+		skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
			
 
				+			: sk->sk_write_queue.next;
			
 
				+
			
 
				+		sk_stream_for_retrans_queue_from(skb, sk) {
			
 
				+			if (!tcp_skb_timedout(sk, skb))
			
 
				+				break;
			
 
				+
			
 
				+			if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
			
 
				 				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
			
 
				 				tp->lost_out += tcp_skb_pcount(skb);
			
 
				+
			
 
				+				/* clear xmit_retrans hint */
			
 
				+				if (tp->retransmit_skb_hint &&
			
 
				+				    before(TCP_SKB_CB(skb)->seq,
			
 
				+					   TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
			
 
				+
			
 
				+					tp->retransmit_skb_hint = NULL;
			
 
				 			}
			
 
				 		}
			
 
				+
			
 
				+		tp->scoreboard_skb_hint = skb;
			
 
				+
			
 
				 		tcp_sync_left_out(tp);
			
 
				 	}
			
 
				 }
			
@@ -1626,6 +1711,10 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
 
				 	}
			
 
				 	tcp_moderate_cwnd(tp);
			
 
				 	tp->snd_cwnd_stamp = tcp_time_stamp;
			
 
				+
			
 
				+	/* There is something screwy going on with the retrans hints after
			
 
				+	   an undo */
			
 
				+	clear_all_retrans_hints(tp);
			
 
				 }
			
 
				 
			
 
				 static inline int tcp_may_undo(struct tcp_sock *tp)
			
@@ -1709,6 +1798,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
 
				 		sk_stream_for_retrans_queue(skb, sk) {
			
 
				 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
			
 
				 		}
			
 
				+
			
 
				+		clear_all_retrans_hints(tp);
			
 
				+
			
 
				 		DBGUNDO(sk, tp, "partial loss");
			
 
				 		tp->lost_out = 0;
			
 
				 		tp->left_out = tp->sacked_out;
			
@@ -1908,6 +2000,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 
				 			TCP_ECN_queue_cwr(tp);
			
 
				 		}
			
 
				 
			
 
				+		tp->bytes_acked = 0;
			
 
				 		tp->snd_cwnd_cnt = 0;
			
 
				 		tcp_set_ca_state(sk, TCP_CA_Recovery);
			
 
				 	}
			
@@ -1919,9 +2012,9 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 
				 }
			
 
				 
			
 
				 /* Read draft-ietf-tcplw-high-performance before mucking
			
 
				- * with this code. (Superceeds RFC1323)
			
 
				+ * with this code. (Supersedes RFC1323)
			
 
				  */
			
 
				-static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
			
 
				+static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
			
 
				 {
			
 
				 	/* RTTM Rule: A TSecr value received in a segment is used to
			
 
				 	 * update the averaged RTT measurement only if the segment
			
@@ -1932,7 +2025,7 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
 
				 	 * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
			
 
				 	 *
			
 
				 	 * Changed: reset backoff as soon as we see the first valid sample.
			
 
				-	 * If we do not, we get strongly overstimated rto. With timestamps
			
 
				+	 * If we do not, we get strongly overestimated rto. With timestamps
			
 
				 	 * samples are accepted even from very old segments: f.e., when rtt=1
			
 
				 	 * increases to 8, we retransmit 5 times and after 8 seconds delayed
			
 
				 	 * answer arrives rto becomes 120 seconds! If at least one of segments
			
@@ -1940,13 +2033,13 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
 
				 	 */
			
 
				 	struct tcp_sock *tp = tcp_sk(sk);
			
 
				 	const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
			
 
				-	tcp_rtt_estimator(sk, seq_rtt, usrtt);
			
 
				+	tcp_rtt_estimator(sk, seq_rtt);
			
 
				 	tcp_set_rto(sk);
			
 
				 	inet_csk(sk)->icsk_backoff = 0;
			
 
				 	tcp_bound_rto(sk);
			
 
				 }
			
 
				 
			
 
				-static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag)
			
 
				+static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
			
 
				 {
			
 
				 	/* We don't have a timestamp. Can only use
			
 
				 	 * packets that are not retransmitted to determine
			
@@ -1960,21 +2053,21 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag
 
				 	if (flag & FLAG_RETRANS_DATA_ACKED)
			
 
				 		return;
			
 
				 
			
 
				-	tcp_rtt_estimator(sk, seq_rtt, usrtt);
			
 
				+	tcp_rtt_estimator(sk, seq_rtt);
			
 
				 	tcp_set_rto(sk);
			
 
				 	inet_csk(sk)->icsk_backoff = 0;
			
 
				 	tcp_bound_rto(sk);
			
 
				 }
			
 
				 
			
 
				 static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
			
 
				-				      const s32 seq_rtt, u32 *usrtt)
			
 
				+				      const s32 seq_rtt)
			
 
				 {
			
 
				 	const struct tcp_sock *tp = tcp_sk(sk);
			
 
				 	/* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
			
 
				 	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
			
 
				-		tcp_ack_saw_tstamp(sk, usrtt, flag);
			
 
				+		tcp_ack_saw_tstamp(sk, flag);
			
 
				 	else if (seq_rtt >= 0)
			
 
				-		tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag);
			
 
				+		tcp_ack_no_tstamp(sk, seq_rtt, flag);
			
 
				 }
			
 
				 
			
 
				 static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
			
@@ -2054,20 +2147,27 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
 
				 	return acked;
			
 
				 }
			
 
				 
			
 
				+static inline u32 tcp_usrtt(const struct sk_buff *skb)
			
 
				+{
			
 
				+	struct timeval tv, now;
			
 
				+
			
 
				+	do_gettimeofday(&now);
			
 
				+	skb_get_timestamp(skb, &tv);
			
 
				+	return (now.tv_sec - tv.tv_sec) * 1000000 + (now.tv_usec - tv.tv_usec);
			
 
				+}
			
 
				 
			
 
				 /* Remove acknowledged frames from the retransmission queue. */
			
 
				-static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt)
			
 
				+static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
			
 
				 {
			
 
				 	struct tcp_sock *tp = tcp_sk(sk);
			
 
				+	const struct inet_connection_sock *icsk = inet_csk(sk);
			
 
				 	struct sk_buff *skb;
			
 
				 	__u32 now = tcp_time_stamp;
			
 
				 	int acked = 0;
			
 
				 	__s32 seq_rtt = -1;
			
 
				-	struct timeval usnow;
			
 
				 	u32 pkts_acked = 0;
			
 
				-
			
 
				-	if (seq_usrtt)
			
 
				-		do_gettimeofday(&usnow);
			
 
				+	void (*rtt_sample)(struct sock *sk, u32 usrtt)
			
 
				+		= icsk->icsk_ca_ops->rtt_sample;
			
 
				 
			
 
				 	while ((skb = skb_peek(&sk->sk_write_queue)) &&
			
 
				 	       skb != sk->sk_send_head) {
			
@@ -2107,16 +2207,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
 
				 					tp->retrans_out -= tcp_skb_pcount(skb);
			
 
				 				acked |= FLAG_RETRANS_DATA_ACKED;
			
 
				 				seq_rtt = -1;
			
 
				-			} else if (seq_rtt < 0)
			
 
				+			} else if (seq_rtt < 0) {
			
 
				 				seq_rtt = now - scb->when;
			
 
				-			if (seq_usrtt) {
			
 
				-				struct timeval tv;
			
 
				-			
			
 
				-				skb_get_timestamp(skb, &tv);
			
 
				-				*seq_usrtt = (usnow.tv_sec - tv.tv_sec) * 1000000
			
 
				-					+ (usnow.tv_usec - tv.tv_usec);
			
 
				+				if (rtt_sample)
			
 
				+					(*rtt_sample)(sk, tcp_usrtt(skb));
			
 
				 			}
			
 
				-
			
 
				 			if (sacked & TCPCB_SACKED_ACKED)
			
 
				 				tp->sacked_out -= tcp_skb_pcount(skb);
			
 
				 			if (sacked & TCPCB_LOST)
			
@@ -2126,17 +2221,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
 
				 				    !before(scb->end_seq, tp->snd_up))
			
 
				 					tp->urg_mode = 0;
			
 
				 			}
			
 
				-		} else if (seq_rtt < 0)
			
 
				+		} else if (seq_rtt < 0) {
			
 
				 			seq_rtt = now - scb->when;
			
 
				+			if (rtt_sample)
			
 
				+				(*rtt_sample)(sk, tcp_usrtt(skb));
			
 
				+		}
			
 
				 		tcp_dec_pcount_approx(&tp->fackets_out, skb);
			
 
				 		tcp_packets_out_dec(tp, skb);
			
 
				 		__skb_unlink(skb, &sk->sk_write_queue);
			
 
				 		sk_stream_free_skb(sk, skb);
			
 
				+		clear_all_retrans_hints(tp);
			
 
				 	}
			
 
				 
			
 
				 	if (acked&FLAG_ACKED) {
			
 
				-		const struct inet_connection_sock *icsk = inet_csk(sk);
			
 
				-		tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt);
			
 
				+		tcp_ack_update_rtt(sk, acked, seq_rtt);
			
 
				 		tcp_ack_packets_out(sk, tp);
			
 
				 
			
 
				 		if (icsk->icsk_ca_ops->pkts_acked)
			
@@ -2284,7 +2382,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
 
				 	}
			
 
				 
			
 
				 	/* F-RTO affects on two new ACKs following RTO.
			
 
				-	 * At latest on third ACK the TCP behavor is back to normal.
			
 
				+	 * At latest on third ACK the TCP behavior is back to normal.
			
 
				 	 */
			
 
				 	tp->frto_counter = (tp->frto_counter + 1) % 3;
			
 
				 }
			
@@ -2299,7 +2397,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 
				 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
			
 
				 	u32 prior_in_flight;
			
 
				 	s32 seq_rtt;
			
 
				-	s32 seq_usrtt = 0;
			
 
				 	int prior_packets;
			
 
				 
			
 
				 	/* If the ack is newer than sent or older than previous acks
			
@@ -2311,6 +2408,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 
				 	if (before(ack, prior_snd_una))
			
 
				 		goto old_ack;
			
 
				 
			
 
				+	if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
			
 
				+		tp->bytes_acked += ack - prior_snd_una;
			
 
				+
			
 
				 	if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
			
 
				 		/* Window is constant, pure forward advance.
			
 
				 		 * No more checks are required.
			
@@ -2352,14 +2452,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 
				 	prior_in_flight = tcp_packets_in_flight(tp);
			
 
				 
			
 
				 	/* See if we can take anything off of the retransmit queue. */
			
 
				-	flag |= tcp_clean_rtx_queue(sk, &seq_rtt,
			
 
				-				    icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL);
			
 
				+	flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
			
 
				 
			
 
				 	if (tp->frto_counter)
			
 
				 		tcp_process_frto(sk, prior_snd_una);
			
 
				 
			
 
				 	if (tcp_ack_is_dubious(sk, flag)) {
			
 
				-		/* Advanve CWND, if state allows this. */
			
 
				+		/* Advance CWND, if state allows this. */
			
 
				 		if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
			
 
				 			tcp_cong_avoid(sk, ack,  seq_rtt, prior_in_flight, 0);
			
 
				 		tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
			
@@ -3148,7 +3247,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 
				 {
			
 
				 	struct sk_buff *skb;
			
 
				 
			
 
				-	/* First, check that queue is collapsable and find
			
 
				+	/* First, check that queue is collapsible and find
			
 
				 	 * the point where collapsing can be useful. */
			
 
				 	for (skb = head; skb != tail; ) {
			
 
				 		/* No new bits? It is possible on ofo queue. */
			
@@ -3456,7 +3555,7 @@ static __inline__ void tcp_ack_snd_check(struct sock *sk)
 
				 
			
 
				 /*
			
 
				  *	This routine is only called when we have urgent data
			
 
				- *	signalled. Its the 'slow' part of tcp_urg. It could be
			
 
				+ *	signaled. Its the 'slow' part of tcp_urg. It could be
			
 
				  *	moved inline now as tcp_urg is only called from one
			
 
				  *	place. We handle URGent data wrong. We have to - as
			
 
				  *	BSD still doesn't use the correction from RFC961.
			
@@ -3501,7 +3600,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
 
				 	 * urgent. To do this requires some care. We cannot just ignore
			
 
				 	 * tp->copied_seq since we would read the last urgent byte again
			
 
				 	 * as data, nor can we alter copied_seq until this data arrives
			
 
				-	 * or we break the sematics of SIOCATMARK (and thus sockatmark())
			
 
				+	 * or we break the semantics of SIOCATMARK (and thus sockatmark())
			
 
				 	 *
			
 
				 	 * NOTE. Double Dutch. Rendering to plain English: author of comment
			
 
				 	 * above did something sort of 	send("A", MSG_OOB); send("B", MSG_OOB);
			
@@ -3646,7 +3745,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 
				 	tp->rx_opt.saw_tstamp = 0;
			
 
				 
			
 
				 	/*	pred_flags is 0xS?10 << 16 + snd_wnd
			
 
				-	 *	if header_predition is to be made
			
 
				+	 *	if header_prediction is to be made
			
 
				 	 *	'S' will always be tp->tcp_header_len >> 2
			
 
				 	 *	'?' will be 0 for the fast path, otherwise pred_flags is 0 to
			
 
				 	 *  turn it off	(when there are holes in the receive 
			
@@ -4242,7 +4341,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 
				 				 */
			
 
				 				if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
			
 
				 				    !tp->srtt)
			
 
				-					tcp_ack_saw_tstamp(sk, NULL, 0);
			
 
				+					tcp_ack_saw_tstamp(sk, 0);
			
 
				 
			
 
				 				if (tp->rx_opt.tstamp_ok)
			
 
				 					tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
			
@@ -4372,6 +4471,7 @@ discard:
 
				 
			
 
				 EXPORT_SYMBOL(sysctl_tcp_ecn);
			
 
				 EXPORT_SYMBOL(sysctl_tcp_reordering);
			
 
				+EXPORT_SYMBOL(sysctl_tcp_abc);
			
 
				 EXPORT_SYMBOL(tcp_parse_options);
			
 
				 EXPORT_SYMBOL(tcp_rcv_established);
			
 
				 EXPORT_SYMBOL(tcp_rcv_state_process);
			
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -39,7 +39,7 @@
 
				  *					request_sock handling and moved
			
 
				  *					most of it into the af independent code.
			
 
				  *					Added tail drop and some other bugfixes.
			
 
				- *					Added new listen sematics.
			
 
				+ *					Added new listen semantics.
			
 
				  *		Mike McLagan	:	Routing by source
			
 
				  *	Juan Jose Ciarlante:		ip_dynaddr bits
			
 
				  *		Andi Kleen:		various fixes.
			
@@ -1210,7 +1210,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 
				 
			
 
				 	/* An explanation is required here, I think.
			
 
				 	 * Packet length and doff are validated by header prediction,
			
 
				-	 * provided case of th->doff==0 is elimineted.
			
 
				+	 * provided case of th->doff==0 is eliminated.
			
 
				 	 * So, we defer the checks. */
			
 
				 	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
			
 
				 	     tcp_v4_checksum_init(skb)))
			
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -158,7 +158,7 @@ kill_with_rst:
 
				 		/* I am shamed, but failed to make it more elegant.
			
 
				 		 * Yes, it is direct reference to IP, which is impossible
			
 
				 		 * to generalize to IPv6. Taking into account that IPv6
			
 
				-		 * do not undertsnad recycling in any case, it not
			
 
				+		 * do not understand recycling in any case, it not
			
 
				 		 * a big problem in practice. --ANK */
			
 
				 		if (tw->tw_family == AF_INET &&
			
 
				 		    tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
			
@@ -194,7 +194,7 @@ kill_with_rst:
 
				 		/* In window segment, it may be only reset or bare ack. */
			
 
				 
			
 
				 		if (th->rst) {
			
 
				-			/* This is TIME_WAIT assasination, in two flavors.
			
 
				+			/* This is TIME_WAIT assassination, in two flavors.
			
 
				 			 * Oh well... nobody has a sufficient solution to this
			
 
				 			 * protocol bug yet.
			
 
				 			 */
			
@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 
				 		 */
			
 
				 		newtp->snd_cwnd = 2;
			
 
				 		newtp->snd_cwnd_cnt = 0;
			
 
				+		newtp->bytes_acked = 0;
			
 
				 
			
 
				 		newtp->frto_counter = 0;
			
 
				 		newtp->frto_highmark = 0;
			
@@ -550,7 +551,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 
				 
			
 
				 	/* RFC793 page 36: "If the connection is in any non-synchronized state ...
			
 
				 	 *                  and the incoming segment acknowledges something not yet
			
 
				-	 *                  sent (the segment carries an unaccaptable ACK) ...
			
 
				+	 *                  sent (the segment carries an unacceptable ACK) ...
			
 
				 	 *                  a reset is sent."
			
 
				 	 *
			
 
				 	 * Invalid ACK: reset will be sent by listening socket
			
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -436,6 +436,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 
				 	u16 flags;
			
 
				 
			
 
				 	BUG_ON(len > skb->len);
			
 
				+
			
 
				+ 	clear_all_retrans_hints(tp);
			
 
				 	nsize = skb_headlen(skb) - len;
			
 
				 	if (nsize < 0)
			
 
				 		nsize = 0;
			
@@ -599,7 +601,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 
				    for TCP options, but includes only bare TCP header.
			
 
				 
			
 
				    tp->rx_opt.mss_clamp is mss negotiated at connection setup.
			
 
				-   It is minumum of user_mss and mss received with SYN.
			
 
				+   It is minimum of user_mss and mss received with SYN.
			
 
				    It also does not include TCP options.
			
 
				 
			
 
				    tp->pmtu_cookie is last pmtu, seen by this function.
			
@@ -1171,7 +1173,7 @@ u32 __tcp_select_window(struct sock *sk)
 
				 {
			
 
				 	struct inet_connection_sock *icsk = inet_csk(sk);
			
 
				 	struct tcp_sock *tp = tcp_sk(sk);
			
 
				-	/* MSS for the peer's data.  Previous verions used mss_clamp
			
 
				+	/* MSS for the peer's data.  Previous versions used mss_clamp
			
 
				 	 * here.  I don't know if the value based on our guesses
			
 
				 	 * of peer's MSS is better for the performance.  It's more correct
			
 
				 	 * but may be worse for the performance because of rcv_mss
			
@@ -1260,7 +1262,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 
				 		BUG_ON(tcp_skb_pcount(skb) != 1 ||
			
 
				 		       tcp_skb_pcount(next_skb) != 1);
			
 
				 
			
 
				-		/* Ok.  We will be able to collapse the packet. */
			
 
				+		/* changing transmit queue under us so clear hints */
			
 
				+		clear_all_retrans_hints(tp);
			
 
				+
			
 
				+		/* Ok.	We will be able to collapse the packet. */
			
 
				 		__skb_unlink(next_skb, &sk->sk_write_queue);
			
 
				 
			
 
				 		memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
			
@@ -1330,6 +1335,8 @@ void tcp_simple_retransmit(struct sock *sk)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	clear_all_retrans_hints(tp);
			
 
				+
			
 
				 	if (!lost)
			
 
				 		return;
			
 
				 
			
@@ -1361,7 +1368,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 
				 	int err;
			
 
				 
			
 
				 	/* Do not sent more than we queued. 1/4 is reserved for possible
			
 
				-	 * copying overhead: frgagmentation, tunneling, mangling etc.
			
 
				+	 * copying overhead: fragmentation, tunneling, mangling etc.
			
 
				 	 */
			
 
				 	if (atomic_read(&sk->sk_wmem_alloc) >
			
 
				 	    min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
			
@@ -1468,13 +1475,25 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 
				 	const struct inet_connection_sock *icsk = inet_csk(sk);
			
 
				 	struct tcp_sock *tp = tcp_sk(sk);
			
 
				 	struct sk_buff *skb;
			
 
				-	int packet_cnt = tp->lost_out;
			
 
				+	int packet_cnt;
			
 
				+
			
 
				+	if (tp->retransmit_skb_hint) {
			
 
				+		skb = tp->retransmit_skb_hint;
			
 
				+		packet_cnt = tp->retransmit_cnt_hint;
			
 
				+	}else{
			
 
				+		skb = sk->sk_write_queue.next;
			
 
				+		packet_cnt = 0;
			
 
				+	}
			
 
				 
			
 
				 	/* First pass: retransmit lost packets. */
			
 
				-	if (packet_cnt) {
			
 
				-		sk_stream_for_retrans_queue(skb, sk) {
			
 
				+	if (tp->lost_out) {
			
 
				+		sk_stream_for_retrans_queue_from(skb, sk) {
			
 
				 			__u8 sacked = TCP_SKB_CB(skb)->sacked;
			
 
				 
			
 
				+			/* we could do better than to assign each time */
			
 
				+			tp->retransmit_skb_hint = skb;
			
 
				+			tp->retransmit_cnt_hint = packet_cnt;
			
 
				+
			
 
				 			/* Assume this retransmit will generate
			
 
				 			 * only one packet for congestion window
			
 
				 			 * calculation purposes.  This works because
			
@@ -1485,10 +1504,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 
				 			if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
			
 
				 				return;
			
 
				 
			
 
				-			if (sacked&TCPCB_LOST) {
			
 
				+			if (sacked & TCPCB_LOST) {
			
 
				 				if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
			
 
				-					if (tcp_retransmit_skb(sk, skb))
			
 
				+					if (tcp_retransmit_skb(sk, skb)) {
			
 
				+						tp->retransmit_skb_hint = NULL;
			
 
				 						return;
			
 
				+					}
			
 
				 					if (icsk->icsk_ca_state != TCP_CA_Loss)
			
 
				 						NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
			
 
				 					else
			
@@ -1501,8 +1522,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 
				 									  TCP_RTO_MAX);
			
 
				 				}
			
 
				 
			
 
				-				packet_cnt -= tcp_skb_pcount(skb);
			
 
				-				if (packet_cnt <= 0)
			
 
				+				packet_cnt += tcp_skb_pcount(skb);
			
 
				+				if (packet_cnt >= tp->lost_out)
			
 
				 					break;
			
 
				 			}
			
 
				 		}
			
@@ -1528,9 +1549,18 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 
				 	if (tcp_may_send_now(sk, tp))
			
 
				 		return;
			
 
				 
			
 
				-	packet_cnt = 0;
			
 
				+	if (tp->forward_skb_hint) {
			
 
				+		skb = tp->forward_skb_hint;
			
 
				+		packet_cnt = tp->forward_cnt_hint;
			
 
				+	} else{
			
 
				+		skb = sk->sk_write_queue.next;
			
 
				+		packet_cnt = 0;
			
 
				+	}
			
 
				+
			
 
				+	sk_stream_for_retrans_queue_from(skb, sk) {
			
 
				+		tp->forward_cnt_hint = packet_cnt;
			
 
				+		tp->forward_skb_hint = skb;
			
 
				 
			
 
				-	sk_stream_for_retrans_queue(skb, sk) {
			
 
				 		/* Similar to the retransmit loop above we
			
 
				 		 * can pretend that the retransmitted SKB
			
 
				 		 * we send out here will be composed of one
			
@@ -1547,8 +1577,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 
				 			continue;
			
 
				 
			
 
				 		/* Ok, retransmit it. */
			
 
				-		if (tcp_retransmit_skb(sk, skb))
			
 
				+		if (tcp_retransmit_skb(sk, skb)) {
			
 
				+			tp->forward_skb_hint = NULL;
			
 
				 			break;
			
 
				+		}
			
 
				 
			
 
				 		if (skb == skb_peek(&sk->sk_write_queue))
			
 
				 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
			
@@ -2058,3 +2090,4 @@ EXPORT_SYMBOL(tcp_connect);
 
				 EXPORT_SYMBOL(tcp_make_synack);
			
 
				 EXPORT_SYMBOL(tcp_simple_retransmit);
			
 
				 EXPORT_SYMBOL(tcp_sync_mss);
			
 
				+EXPORT_SYMBOL(sysctl_tcp_tso_win_divisor);
			
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -20,20 +20,20 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 
				 				    u32 in_flight, int flag)
			
 
				 {
			
 
				 	struct tcp_sock *tp = tcp_sk(sk);
			
 
				-	if (in_flight < tp->snd_cwnd)
			
 
				+
			
 
				+	if (!tcp_is_cwnd_limited(sk, in_flight))
			
 
				 		return;
			
 
				 
			
 
				-	if (tp->snd_cwnd <= tp->snd_ssthresh) {
			
 
				-		tp->snd_cwnd++;
			
 
				-	} else {
			
 
				+	if (tp->snd_cwnd <= tp->snd_ssthresh)
			
 
				+		tcp_slow_start(tp);
			
 
				+	else {
			
 
				 		tp->snd_cwnd_cnt++;
			
 
				 		if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
			
 
				-			tp->snd_cwnd++;
			
 
				+			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
			
 
				+				tp->snd_cwnd++;
			
 
				 			tp->snd_cwnd_cnt = 0;
			
 
				 		}
			
 
				 	}
			
 
				-	tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
			
 
				-	tp->snd_cwnd_stamp = tcp_time_stamp;
			
 
				 }
			
 
				 
			
 
				 static u32 tcp_scalable_ssthresh(struct sock *sk)
			
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -58,7 +58,7 @@ static void tcp_write_err(struct sock *sk)
 
				  * to prevent DoS attacks. It is called when a retransmission timeout
			
 
				  * or zero probe timeout occurs on orphaned socket.
			
 
				  *
			
 
				- * Criterium is still not confirmed experimentally and may change.
			
 
				+ * Criteria is still not confirmed experimentally and may change.
			
 
				  * We kill the socket, if:
			
 
				  * 1. If number of orphaned sockets exceeds an administratively configured
			
 
				  *    limit.
			
@@ -132,7 +132,7 @@ static int tcp_write_timeout(struct sock *sk)
 
				 			   hole detection. :-(
			
 
				 
			
 
				 			   It is place to make it. It is not made. I do not want
			
 
				-			   to make it. It is disguisting. It does not work in any
			
 
				+			   to make it. It is disgusting. It does not work in any
			
 
				 			   case. Let me to cite the same draft, which requires for
			
 
				 			   us to implement this:
			
 
				 
			
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -236,8 +236,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 
				 			/* We don't have enough RTT samples to do the Vegas
			
 
				 			 * calculation, so we'll behave like Reno.
			
 
				 			 */
			
 
				-			if (tp->snd_cwnd > tp->snd_ssthresh)
			
 
				-				tp->snd_cwnd++;
			
 
				+			tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, cnt);
			
 
				 		} else {
			
 
				 			u32 rtt, target_cwnd, diff;
			
 
				 
			
@@ -275,7 +274,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 
				 			 */
			
 
				 			diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
			
 
				 
			
 
				-			if (tp->snd_cwnd < tp->snd_ssthresh) {
			
 
				+			if (tp->snd_cwnd <= tp->snd_ssthresh) {
			
 
				 				/* Slow start.  */
			
 
				 				if (diff > gamma) {
			
 
				 					/* Going too fast. Time to slow down
			
@@ -295,6 +294,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 
				 							    V_PARAM_SHIFT)+1);
			
 
				 
			
 
				 				}
			
 
				+				tcp_slow_start(tp);
			
 
				 			} else {
			
 
				 				/* Congestion avoidance. */
			
 
				 				u32 next_snd_cwnd;
			
@@ -327,37 +327,17 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 
				 				else if (next_snd_cwnd < tp->snd_cwnd)
			
 
				 					tp->snd_cwnd--;
			
 
				 			}
			
 
				-		}
			
 
				 
			
 
				-		/* Wipe the slate clean for the next RTT. */
			
 
				-		vegas->cntRTT = 0;
			
 
				-		vegas->minRTT = 0x7fffffff;
			
 
				+			if (tp->snd_cwnd < 2)
			
 
				+				tp->snd_cwnd = 2;
			
 
				+			else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
			
 
				+				tp->snd_cwnd = tp->snd_cwnd_clamp;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				-	/* The following code is executed for every ack we receive,
			
 
				-	 * except for conditions checked in should_advance_cwnd()
			
 
				-	 * before the call to tcp_cong_avoid(). Mainly this means that
			
 
				-	 * we only execute this code if the ack actually acked some
			
 
				-	 * data.
			
 
				-	 */
			
 
				-
			
 
				-	/* If we are in slow start, increase our cwnd in response to this ACK.
			
 
				-	 * (If we are not in slow start then we are in congestion avoidance,
			
 
				-	 * and adjust our congestion window only once per RTT. See the code
			
 
				-	 * above.)
			
 
				-	 */
			
 
				-	if (tp->snd_cwnd <= tp->snd_ssthresh)
			
 
				-		tp->snd_cwnd++;
			
 
				-
			
 
				-	/* to keep cwnd from growing without bound */
			
 
				-	tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
			
 
				-
			
 
				-	/* Make sure that we are never so timid as to reduce our cwnd below
			
 
				-	 * 2 MSS.
			
 
				-	 *
			
 
				-	 * Going below 2 MSS would risk huge delayed ACKs from our receiver.
			
 
				-	 */
			
 
				-	tp->snd_cwnd = max(tp->snd_cwnd, 2U);
			
 
				+	/* Wipe the slate clean for the next RTT. */
			
 
				+	vegas->cntRTT = 0;
			
 
				+	vegas->minRTT = 0x7fffffff;
			
 
				 }
			
 
				 
			
 
				 /* Extract info for Tcp socket info provided via netlink. */