From 0e45f4da5981895e885dd72fe912a3f8e32bae73 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 18 Nov 2015 18:17:30 -0800 Subject: tcp: disable Fast Open on timeouts after handshake Some middle-boxes black-hole the data after the Fast Open handshake (https://www.ietf.org/proceedings/94/slides/slides-94-tcpm-13.pdf). The exact reason is unknown. The work-around is to disable Fast Open temporarily after multiple recurring timeouts with few or no data delivered in the established state. Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Reported-by: Christoph Paasch Signed-off-by: David S. Miller --- net/ipv4/tcp_timer.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net/ipv4/tcp_timer.c') diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index c9c716a483e4..448603a81966 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -176,6 +176,18 @@ static int tcp_write_timeout(struct sock *sk) syn_set = true; } else { if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { + /* Some middle-boxes may black-hole Fast Open _after_ + * the handshake. Therefore we conservatively disable + * Fast Open on this path on recurring timeouts with + * few or zero bytes acked after Fast Open. + */ + if (tp->syn_data_acked && + tp->bytes_acked <= tp->rx_opt.mss_clamp) { + tcp_fastopen_cache_set(sk, 0, NULL, true, 0); + if (icsk->icsk_retransmits == sysctl_tcp_retries1) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENACTIVEFAIL); + } /* Black hole detection */ tcp_mtu_probing(icsk, sk); -- cgit v1.2.3 From dd52bc2b4ed16db66f9347aa263d8f1dc889b4b6 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 18 Nov 2015 18:17:31 -0800 Subject: tcp: fix Fast Open snmp over-counting bug Fix incrementing TCPFastOpenActiveFailed snmp stats multiple times when the handshake experiences multiple SYN timeouts. Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp_timer.c') diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 448603a81966..193ba1fa8a9a 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -168,7 +168,7 @@ static int tcp_write_timeout(struct sock *sk) dst_negative_advice(sk); if (tp->syn_fastopen || tp->syn_data) tcp_fastopen_cache_set(sk, 0, NULL, true, 0); - if (tp->syn_data) + if (tp->syn_data && icsk->icsk_retransmits == 1) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); } -- cgit v1.2.3 From 13b287e8d1cad951634389f85b8c9b816bd3bb1e Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 7 Jan 2016 16:38:43 +0200 Subject: ipv4: Namespaceify tcp_keepalive_time sysctl knob Different net namespaces might have different requirements as to the keepalive time of tcp sockets. This might be required in cases where different firewall rules are in place which require tcp timeout sockets to be increased/decreased independently of the host. Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- net/ipv4/tcp_timer.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv4/tcp_timer.c') diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 193ba1fa8a9a..166f27b43cc0 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -24,7 +24,6 @@ int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; -int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME; int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES; int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; -- cgit v1.2.3 From 9bd6861bd4326e3afd3f14a9ec8a723771fb20bb Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 7 Jan 2016 16:38:44 +0200 Subject: ipv4: Namespecify tcp_keepalive_probes sysctl knob This is required to have full tcp keepalive mechanism namespace support. Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- net/ipv4/tcp_timer.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv4/tcp_timer.c') diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 166f27b43cc0..0ccb120d591a 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -24,7 +24,6 @@ int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; -int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES; int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; -- cgit v1.2.3 From b840d15d39128d08ed4486085e5507d2617b9ae1 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 7 Jan 2016 16:38:45 +0200 Subject: ipv4: Namespecify the tcp_keepalive_intvl sysctl knob This is the final part required to namespaceify the tcp keep alive mechanism. Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- net/ipv4/tcp_timer.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv4/tcp_timer.c') diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0ccb120d591a..a4730a28b220 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -24,7 +24,6 @@ int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; -int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; int sysctl_tcp_orphan_retries __read_mostly; -- cgit v1.2.3