From 3e59cb0ddfd2c59991f38e89352ad8a3c71b2374 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 17 May 2013 13:45:05 +0000 Subject: tcp: remove bad timeout logic in fast recovery tcp_timeout_skb() was intended to trigger fast recovery on timeout, unfortunately in reality it often causes spurious retransmission storms during fast recovery. The particular sign is a fast retransmit over the highest sacked sequence (SND.FACK). Currently the RTO timer re-arming (as in RFC6298) offers a nice cushion to avoid spurious timeout: when SND.UNA advances the sender re-arms RTO and extends the timeout by icsk_rto. The sender does not offset the time elapsed since the packet at SND.UNA was sent. But if the next (DUP)ACK arrives later than ~RTTVAR and triggers tcp_fastretrans_alert(), then tcp_timeout_skb() will mark any packet sent before the icsk_rto interval lost, including one that's above the highest sacked sequence. Most likely a large part of scorebard will be marked. If most packets are not lost then the subsequent DUPACKs with new SACK blocks will cause the sender to continue to retransmit packets beyond SND.FACK spuriously. Even if only one packet is lost the sender may falsely retransmit almost the entire window. The situation becomes common in the world of bufferbloat: the RTT continues to grow as the queue builds up but RTTVAR remains small and close to the minimum 200ms. If a data packet is lost and the DUPACK triggered by the next data packet is slightly delayed, then a spurious retransmission storm forms. As the original comment on tcp_timeout_skb() suggests: the usefulness of this feature is questionable. It also wastes cycles walking the sack scoreboard and is actually harmful because of false recovery. It's time to remove this. Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Nandita Dukkipati Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 5bba80fbd1d9..e1c3723f161f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1193,7 +1193,6 @@ static inline void tcp_mib_init(struct net *net) static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp) { tp->lost_skb_hint = NULL; - tp->scoreboard_skb_hint = NULL; } static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) -- cgit v1.2.3 From 71cea17ed39fdf1c0634f530ddc6a2c2fc601c2b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 May 2013 06:52:26 +0000 Subject: tcp: md5: remove spinlock usage in fast path TCP md5 code uses per cpu variables but protects access to them with a shared spinlock, which is a contention point. [ tcp_md5sig_pool_lock is locked twice per incoming packet ] Makes things much simpler, by allocating crypto structures once, first time a socket needs md5 keys, and not deallocating them as they are really small. Next step would be to allow crypto allocations being done in a NUMA aware way. Signed-off-by: Eric Dumazet Cc: Herbert Xu Signed-off-by: David S. Miller --- include/net/tcp.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index e1c3723f161f..bf1cc3dced5e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1283,11 +1283,13 @@ static inline struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, #define tcp_twsk_md5_key(twsk) NULL #endif -extern struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *); -extern void tcp_free_md5sig_pool(void); +extern bool tcp_alloc_md5sig_pool(void); extern struct tcp_md5sig_pool *tcp_get_md5sig_pool(void); -extern void tcp_put_md5sig_pool(void); +static inline void tcp_put_md5sig_pool(void) +{ + local_bh_enable(); +} extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, const struct tcphdr *); extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *, -- cgit v1.2.3 From 28850dc7c71da9d0c0e39246e9ff6913f41f8d0a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 7 Jun 2013 05:11:46 +0000 Subject: net: tcp: move GRO/GSO functions to tcp_offload Would be good to make things explicit and move those functions to a new file called tcp_offload.c, thus make this similar to tcpv6_offload.c. While moving all related functions into tcp_offload.c, we can also make some of them static, since they are only used there. Also, add an explicit registration function. Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index bf1cc3dced5e..0d637e9403a5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1541,15 +1541,14 @@ extern struct request_sock_ops tcp6_request_sock_ops; extern void tcp_v4_destroy_sock(struct sock *sk); -extern int tcp_v4_gso_send_check(struct sk_buff *skb); extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, netdev_features_t features); extern struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb); -extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head, - struct sk_buff *skb); extern int tcp_gro_complete(struct sk_buff *skb); -extern int tcp4_gro_complete(struct sk_buff *skb); + +extern void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, + __be32 daddr); #ifdef CONFIG_PROC_FS extern int tcp4_proc_init(void); @@ -1584,6 +1583,8 @@ struct tcp_request_sock_ops { #endif }; +extern int tcpv4_offload_init(void); + extern void tcp_v4_init(void); extern void tcp_init(void); -- cgit v1.2.3 From 85f16525a2eb66e6092cbd8dcf42371df8334ed0 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 11 Jun 2013 15:35:32 -0700 Subject: tcp: properly send new data in fast recovery in first RTT Linux sends new unset data during disorder and recovery state if all (suspected) lost packets have been retransmitted ( RFC5681, section 3.2 step 1 & 2, RFC3517 section 4, NexSeg() Rule 2). One requirement is to keep the receive window about twice the estimated sender's congestion window (tcp_rcv_space_adjust()), assuming the fast retransmits repair the losses in the next round trip. But currently it's not the case on the first round trip in either normal or Fast Open connection, beucase the initial receive window is identical to (expected) sender's initial congestion window. The fix is to double it. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 0d637e9403a5..6fa80831dc40 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -61,9 +61,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); */ #define MAX_TCP_WINDOW 32767U -/* Offer an initial receive window of 10 mss. */ -#define TCP_DEFAULT_INIT_RCVWND 10 - /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ #define TCP_MIN_MSS 88U @@ -1047,6 +1044,8 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) rx_opt->num_sacks = 0; } +extern u32 tcp_default_init_rwnd(u32 mss); + /* Determine a window scaling and initial window to offer. */ extern void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, -- cgit v1.2.3 From 7ae8639c9d6d5aba7990b4fad3506ff7b4667a45 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Jun 2013 01:21:06 -0700 Subject: tcp: remove invalid __rcu annotation struct tcp_fastopen_context has a field named tfm, which is a pointer to a crypto_cipher structure. It currently has a __rcu annotation, which is not needed at all. tcp_fastopen_ctx is the pointer fetched by rcu_dereference(), but once we have a pointer to current tcp_fastopen_context, we do not use/need rcu_dereference() to access tfm. This fixes a lot of sparse errors like the following : net/ipv4/tcp_fastopen.c:21:31: warning: incorrect type in argument 1 (different address spaces) net/ipv4/tcp_fastopen.c:21:31: expected struct crypto_cipher *tfm net/ipv4/tcp_fastopen.c:21:31: got struct crypto_cipher [noderef] *tfm Signed-off-by: Eric Dumazet Cc: Jerry Chu Signed-off-by: David S. Miller --- include/net/tcp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 6fa80831dc40..d1980054ec75 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1319,9 +1319,9 @@ void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc); /* Fastopen key context */ struct tcp_fastopen_context { - struct crypto_cipher __rcu *tfm; - __u8 key[TCP_FASTOPEN_KEY_LENGTH]; - struct rcu_head rcu; + struct crypto_cipher *tfm; + __u8 key[TCP_FASTOPEN_KEY_LENGTH]; + struct rcu_head rcu; }; /* write queue abstraction */ -- cgit v1.2.3