|
@@ -352,6 +352,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
|
|
|
const int code = icmp_hdr(icmp_skb)->code;
|
|
|
struct sock *sk;
|
|
|
struct sk_buff *skb;
|
|
|
+ struct request_sock *req;
|
|
|
__u32 seq;
|
|
|
__u32 remaining;
|
|
|
int err;
|
|
@@ -394,9 +395,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
|
|
|
|
|
|
icsk = inet_csk(sk);
|
|
|
tp = tcp_sk(sk);
|
|
|
+ req = tp->fastopen_rsk;
|
|
|
seq = ntohl(th->seq);
|
|
|
if (sk->sk_state != TCP_LISTEN &&
|
|
|
- !between(seq, tp->snd_una, tp->snd_nxt)) {
|
|
|
+ !between(seq, tp->snd_una, tp->snd_nxt) &&
|
|
|
+ (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
|
|
|
+ /* For a Fast Open socket, allow seq to be snt_isn. */
|
|
|
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
|
|
|
goto out;
|
|
|
}
|
|
@@ -435,6 +439,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
|
|
|
!icsk->icsk_backoff)
|
|
|
break;
|
|
|
|
|
|
+ /* XXX (TFO) - revisit the following logic for TFO */
|
|
|
+
|
|
|
if (sock_owned_by_user(sk))
|
|
|
break;
|
|
|
|
|
@@ -466,6 +472,14 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
+ /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
|
|
|
+ * than following the TCP_SYN_RECV case and closing the socket,
|
|
|
+ * we ignore the ICMP error and keep trying like a fully established
|
|
|
+ * socket. Is this the right thing to do?
|
|
|
+ */
|
|
|
+ if (req && req->sk == NULL)
|
|
|
+ goto out;
|
|
|
+
|
|
|
switch (sk->sk_state) {
|
|
|
struct request_sock *req, **prev;
|
|
|
case TCP_LISTEN:
|
|
@@ -498,7 +512,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
|
|
|
|
|
|
case TCP_SYN_SENT:
|
|
|
case TCP_SYN_RECV: /* Cannot happen.
|
|
|
- It can f.e. if SYNs crossed.
|
|
|
+ It can f.e. if SYNs crossed,
|
|
|
+ or Fast Open.
|
|
|
*/
|
|
|
if (!sock_owned_by_user(sk)) {
|
|
|
sk->sk_err = err;
|
|
@@ -809,8 +824,12 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
|
|
|
static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
|
|
|
struct request_sock *req)
|
|
|
{
|
|
|
- tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
|
|
|
- tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
|
|
|
+ /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
|
|
|
+ * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
|
|
|
+ */
|
|
|
+ tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
|
|
|
+ tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
|
|
|
+ tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
|
|
|
req->ts_recent,
|
|
|
0,
|
|
|
tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
|
|
@@ -1272,6 +1291,178 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
|
|
|
};
|
|
|
#endif
|
|
|
|
|
|
+static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
|
|
|
+ struct request_sock *req,
|
|
|
+ struct tcp_fastopen_cookie *foc,
|
|
|
+ struct tcp_fastopen_cookie *valid_foc)
|
|
|
+{
|
|
|
+ bool skip_cookie = false;
|
|
|
+ struct fastopen_queue *fastopenq;
|
|
|
+
|
|
|
+ if (likely(!fastopen_cookie_present(foc))) {
|
|
|
+ /* See include/net/tcp.h for the meaning of these knobs */
|
|
|
+ if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
|
|
|
+ ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
|
|
|
+ (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
|
|
|
+ skip_cookie = true; /* no cookie to validate */
|
|
|
+ else
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
|
|
|
+ /* A FO option is present; bump the counter. */
|
|
|
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
|
|
|
+
|
|
|
+ /* Make sure the listener has enabled fastopen, and we don't
|
|
|
+ * exceed the max # of pending TFO requests allowed before trying
|
|
|
+ * to validating the cookie in order to avoid burning CPU cycles
|
|
|
+ * unnecessarily.
|
|
|
+ *
|
|
|
+ * XXX (TFO) - The implication of checking the max_qlen before
|
|
|
+ * processing a cookie request is that clients can't differentiate
|
|
|
+ * between qlen overflow causing Fast Open to be disabled
|
|
|
+ * temporarily vs a server not supporting Fast Open at all.
|
|
|
+ */
|
|
|
+ if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
|
|
|
+ fastopenq == NULL || fastopenq->max_qlen == 0)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ if (fastopenq->qlen >= fastopenq->max_qlen) {
|
|
|
+ struct request_sock *req1;
|
|
|
+ spin_lock(&fastopenq->lock);
|
|
|
+ req1 = fastopenq->rskq_rst_head;
|
|
|
+ if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
|
|
|
+ spin_unlock(&fastopenq->lock);
|
|
|
+ NET_INC_STATS_BH(sock_net(sk),
|
|
|
+ LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
|
|
|
+ /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
|
|
|
+ foc->len = -1;
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ fastopenq->rskq_rst_head = req1->dl_next;
|
|
|
+ fastopenq->qlen--;
|
|
|
+ spin_unlock(&fastopenq->lock);
|
|
|
+ reqsk_free(req1);
|
|
|
+ }
|
|
|
+ if (skip_cookie) {
|
|
|
+ tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
|
|
|
+ if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
|
|
|
+ tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
|
|
|
+ if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
|
|
|
+ memcmp(&foc->val[0], &valid_foc->val[0],
|
|
|
+ TCP_FASTOPEN_COOKIE_SIZE) != 0)
|
|
|
+ return false;
|
|
|
+ valid_foc->len = -1;
|
|
|
+ }
|
|
|
+ /* Acknowledge the data received from the peer. */
|
|
|
+ tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
|
|
|
+ return true;
|
|
|
+ } else if (foc->len == 0) { /* Client requesting a cookie */
|
|
|
+ tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
|
|
|
+ NET_INC_STATS_BH(sock_net(sk),
|
|
|
+ LINUX_MIB_TCPFASTOPENCOOKIEREQD);
|
|
|
+ } else {
|
|
|
+ /* Client sent a cookie with wrong size. Treat it
|
|
|
+ * the same as invalid and return a valid one.
|
|
|
+ */
|
|
|
+ tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
|
|
|
+ }
|
|
|
+ return false;
|
|
|
+}
|
|
|
+
|
|
|
+static int tcp_v4_conn_req_fastopen(struct sock *sk,
|
|
|
+ struct sk_buff *skb,
|
|
|
+ struct sk_buff *skb_synack,
|
|
|
+ struct request_sock *req,
|
|
|
+ struct request_values *rvp)
|
|
|
+{
|
|
|
+ struct tcp_sock *tp = tcp_sk(sk);
|
|
|
+ struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
|
|
|
+ const struct inet_request_sock *ireq = inet_rsk(req);
|
|
|
+ struct sock *child;
|
|
|
+
|
|
|
+ req->retrans = 0;
|
|
|
+ req->sk = NULL;
|
|
|
+
|
|
|
+ child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
|
|
|
+ if (child == NULL) {
|
|
|
+ NET_INC_STATS_BH(sock_net(sk),
|
|
|
+ LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
|
|
|
+ kfree_skb(skb_synack);
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+ ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
|
|
|
+ ireq->rmt_addr, ireq->opt);
|
|
|
+ /* XXX (TFO) - is it ok to ignore error and continue? */
|
|
|
+
|
|
|
+ spin_lock(&queue->fastopenq->lock);
|
|
|
+ queue->fastopenq->qlen++;
|
|
|
+ spin_unlock(&queue->fastopenq->lock);
|
|
|
+
|
|
|
+ /* Initialize the child socket. Have to fix some values to take
|
|
|
+ * into account the child is a Fast Open socket and is created
|
|
|
+ * only out of the bits carried in the SYN packet.
|
|
|
+ */
|
|
|
+ tp = tcp_sk(child);
|
|
|
+
|
|
|
+ tp->fastopen_rsk = req;
|
|
|
+ /* Do a hold on the listner sk so that if the listener is being
|
|
|
+ * closed, the child that has been accepted can live on and still
|
|
|
+ * access listen_lock.
|
|
|
+ */
|
|
|
+ sock_hold(sk);
|
|
|
+ tcp_rsk(req)->listener = sk;
|
|
|
+
|
|
|
+ /* RFC1323: The window in SYN & SYN/ACK segments is never
|
|
|
+ * scaled. So correct it appropriately.
|
|
|
+ */
|
|
|
+ tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
|
|
|
+
|
|
|
+ /* Activate the retrans timer so that SYNACK can be retransmitted.
|
|
|
+ * The request socket is not added to the SYN table of the parent
|
|
|
+ * because it's been added to the accept queue directly.
|
|
|
+ */
|
|
|
+ inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
|
|
|
+ TCP_TIMEOUT_INIT, TCP_RTO_MAX);
|
|
|
+
|
|
|
+ /* Add the child socket directly into the accept queue */
|
|
|
+ inet_csk_reqsk_queue_add(sk, req, child);
|
|
|
+
|
|
|
+ /* Now finish processing the fastopen child socket. */
|
|
|
+ inet_csk(child)->icsk_af_ops->rebuild_header(child);
|
|
|
+ tcp_init_congestion_control(child);
|
|
|
+ tcp_mtup_init(child);
|
|
|
+ tcp_init_buffer_space(child);
|
|
|
+ tcp_init_metrics(child);
|
|
|
+
|
|
|
+ /* Queue the data carried in the SYN packet. We need to first
|
|
|
+ * bump skb's refcnt because the caller will attempt to free it.
|
|
|
+ *
|
|
|
+ * XXX (TFO) - we honor a zero-payload TFO request for now.
|
|
|
+ * (Any reason not to?)
|
|
|
+ */
|
|
|
+ if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
|
|
|
+ /* Don't queue the skb if there is no payload in SYN.
|
|
|
+ * XXX (TFO) - How about SYN+FIN?
|
|
|
+ */
|
|
|
+ tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
|
|
|
+ } else {
|
|
|
+ skb = skb_get(skb);
|
|
|
+ skb_dst_drop(skb);
|
|
|
+ __skb_pull(skb, tcp_hdr(skb)->doff * 4);
|
|
|
+ skb_set_owner_r(skb, child);
|
|
|
+ __skb_queue_tail(&child->sk_receive_queue, skb);
|
|
|
+ tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
|
|
|
+ }
|
|
|
+ sk->sk_data_ready(sk, 0);
|
|
|
+ bh_unlock_sock(child);
|
|
|
+ sock_put(child);
|
|
|
+ WARN_ON(req->sk == NULL);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
|
|
|
{
|
|
|
struct tcp_extend_values tmp_ext;
|
|
@@ -1285,6 +1476,11 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
|
|
|
__be32 daddr = ip_hdr(skb)->daddr;
|
|
|
__u32 isn = TCP_SKB_CB(skb)->when;
|
|
|
bool want_cookie = false;
|
|
|
+ struct flowi4 fl4;
|
|
|
+ struct tcp_fastopen_cookie foc = { .len = -1 };
|
|
|
+ struct tcp_fastopen_cookie valid_foc = { .len = -1 };
|
|
|
+ struct sk_buff *skb_synack;
|
|
|
+ int do_fastopen;
|
|
|
|
|
|
/* Never answer to SYNs send to broadcast or multicast */
|
|
|
if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
|
|
@@ -1319,7 +1515,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
|
|
|
tcp_clear_options(&tmp_opt);
|
|
|
tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
|
|
|
tmp_opt.user_mss = tp->rx_opt.user_mss;
|
|
|
- tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
|
|
|
+ tcp_parse_options(skb, &tmp_opt, &hash_location, 0,
|
|
|
+ want_cookie ? NULL : &foc);
|
|
|
|
|
|
if (tmp_opt.cookie_plus > 0 &&
|
|
|
tmp_opt.saw_tstamp &&
|
|
@@ -1377,8 +1574,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
|
|
|
isn = cookie_v4_init_sequence(sk, skb, &req->mss);
|
|
|
req->cookie_ts = tmp_opt.tstamp_ok;
|
|
|
} else if (!isn) {
|
|
|
- struct flowi4 fl4;
|
|
|
-
|
|
|
/* VJ's idea. We save last timestamp seen
|
|
|
* from the destination in peer table, when entering
|
|
|
* state TIME-WAIT, and check against it before
|
|
@@ -1419,14 +1614,52 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
|
|
|
tcp_rsk(req)->snt_isn = isn;
|
|
|
tcp_rsk(req)->snt_synack = tcp_time_stamp;
|
|
|
|
|
|
- if (tcp_v4_send_synack(sk, dst, req,
|
|
|
- (struct request_values *)&tmp_ext,
|
|
|
- skb_get_queue_mapping(skb),
|
|
|
- want_cookie) ||
|
|
|
- want_cookie)
|
|
|
+ if (dst == NULL) {
|
|
|
+ dst = inet_csk_route_req(sk, &fl4, req);
|
|
|
+ if (dst == NULL)
|
|
|
+ goto drop_and_free;
|
|
|
+ }
|
|
|
+ do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
|
|
|
+
|
|
|
+ /* We don't call tcp_v4_send_synack() directly because we need
|
|
|
+ * to make sure a child socket can be created successfully before
|
|
|
+ * sending back synack!
|
|
|
+ *
|
|
|
+ * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
|
|
|
+ * (or better yet, call tcp_send_synack() in the child context
|
|
|
+ * directly, but will have to fix bunch of other code first)
|
|
|
+ * after syn_recv_sock() except one will need to first fix the
|
|
|
+ * latter to remove its dependency on the current implementation
|
|
|
+ * of tcp_v4_send_synack()->tcp_select_initial_window().
|
|
|
+ */
|
|
|
+ skb_synack = tcp_make_synack(sk, dst, req,
|
|
|
+ (struct request_values *)&tmp_ext,
|
|
|
+ fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
|
|
|
+
|
|
|
+ if (skb_synack) {
|
|
|
+ __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
|
|
|
+ skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
|
|
|
+ } else
|
|
|
+ goto drop_and_free;
|
|
|
+
|
|
|
+ if (likely(!do_fastopen)) {
|
|
|
+ int err;
|
|
|
+ err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
|
|
|
+ ireq->rmt_addr, ireq->opt);
|
|
|
+ err = net_xmit_eval(err);
|
|
|
+ if (err || want_cookie)
|
|
|
+ goto drop_and_free;
|
|
|
+
|
|
|
+ tcp_rsk(req)->listener = NULL;
|
|
|
+ /* Add the request_sock to the SYN table */
|
|
|
+ inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
|
|
|
+ if (fastopen_cookie_present(&foc) && foc.len != 0)
|
|
|
+ NET_INC_STATS_BH(sock_net(sk),
|
|
|
+ LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
|
|
|
+ } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req,
|
|
|
+ (struct request_values *)&tmp_ext))
|
|
|
goto drop_and_free;
|
|
|
|
|
|
- inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
|
|
|
return 0;
|
|
|
|
|
|
drop_and_release:
|
|
@@ -1977,6 +2210,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
|
|
|
tcp_cookie_values_release);
|
|
|
tp->cookie_values = NULL;
|
|
|
}
|
|
|
+ BUG_ON(tp->fastopen_rsk != NULL);
|
|
|
|
|
|
/* If socket is aborted during connect operation */
|
|
|
tcp_free_fastopen_req(tp);
|
|
@@ -2425,6 +2659,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
|
|
|
const struct tcp_sock *tp = tcp_sk(sk);
|
|
|
const struct inet_connection_sock *icsk = inet_csk(sk);
|
|
|
const struct inet_sock *inet = inet_sk(sk);
|
|
|
+ struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
|
|
|
__be32 dest = inet->inet_daddr;
|
|
|
__be32 src = inet->inet_rcv_saddr;
|
|
|
__u16 destp = ntohs(inet->inet_dport);
|
|
@@ -2469,7 +2704,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
|
|
|
jiffies_to_clock_t(icsk->icsk_ack.ato),
|
|
|
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
|
|
|
tp->snd_cwnd,
|
|
|
- tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
|
|
|
+ sk->sk_state == TCP_LISTEN ?
|
|
|
+ (fastopenq ? fastopenq->max_qlen : 0) :
|
|
|
+ (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh),
|
|
|
len);
|
|
|
}
|
|
|
|