From 6c9ff979d1921e9fd05d89e1383121c2503759b9 Mon Sep 17 00:00:00 2001 From: Alex Bergmann Date: Fri, 31 Aug 2012 02:48:31 +0000 Subject: tcp: Increase timeout for SYN segments Commit 9ad7c049 ("tcp: RFC2988bis + taking RTT sample from 3WHS for the passive open side") changed the initRTO from 3secs to 1sec in accordance to RFC6298 (former RFC2988bis). This reduced the time till the last SYN retransmission packet gets sent from 93secs to 31secs. RFC1122 is stating that the retransmission should be done for at least 3 minutes, but this seems to be quite high. "However, the values of R1 and R2 may be different for SYN and data segments. In particular, R2 for a SYN segment MUST be set large enough to provide retransmission of the segment for at least 3 minutes. The application can close the connection (i.e., give up on the open attempt) sooner, of course." This patch increases the value of TCP_SYN_RETRIES to the value of 6, providing a retransmission window of 63secs. The comments for SYN and SYNACK retries have also been updated to describe the current settings. The same goes for the documentation file "Documentation/networking/ip-sysctl.txt". Signed-off-by: Alexander Bergmann Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 9a0021d16d91..0fca06f16463 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -98,11 +98,21 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); * 15 is ~13-30min depending on RTO. */ -#define TCP_SYN_RETRIES 5 /* number of times to retry active opening a - * connection: ~180sec is RFC minimum */ +#define TCP_SYN_RETRIES 6 /* This is how many retries are done + * when active opening a connection. + * RFC1122 says the minimum retry MUST + * be at least 180secs. Nevertheless + * this value is corresponding to + * 63secs of retransmission with the + * current initial RTO. + */ -#define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a - * connection: ~180sec is RFC minimum */ +#define TCP_SYNACK_RETRIES 5 /* This is how may retries are done + * when passive opening a connection. + * This is corresponding to 31secs of + * retransmission with the current + * initial RTO. + */ #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT * state, about 60 seconds */ -- cgit v1.2.3 From 1046716368979dee857a2b8a91c4a8833f21b9cb Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Fri, 31 Aug 2012 12:29:11 +0000 Subject: tcp: TCP Fast Open Server - header & support functions This patch adds all the necessary data structure and support functions to implement TFO server side. It also documents a number of flags for the sysctl_tcp_fastopen knob, and adds a few Linux extension MIBs. In addition, it includes the following: 1. a new TCP_FASTOPEN socket option an application must call to supply a max backlog allowed in order to enable TFO on its listener. 2. A number of key data structures: "fastopen_rsk" in tcp_sock - for a big socket to access its request_sock for retransmission and ack processing purpose. It is non-NULL iff 3WHS not completed. "fastopenq" in request_sock_queue - points to a per Fast Open listener data structure "fastopen_queue" to keep track of qlen (# of outstanding Fast Open requests) and max_qlen, among other things. "listener" in tcp_request_sock - to point to the original listener for book-keeping purpose, i.e., to maintain qlen against max_qlen as part of defense against IP spoofing attack. 3. various data structure and functions, many in tcp_fastopen.c, to support server side Fast Open cookie operations, including /proc/sys/net/ipv4/tcp_fastopen_key to allow manual rekeying. Signed-off-by: H.K. Jerry Chu Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- include/net/tcp.h | 46 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 0fca06f16463..9f8821e3293a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -224,8 +224,24 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); /* Bit Flags for sysctl_tcp_fastopen */ #define TFO_CLIENT_ENABLE 1 +#define TFO_SERVER_ENABLE 2 #define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */ +/* Process SYN data but skip cookie validation */ +#define TFO_SERVER_COOKIE_NOT_CHKED 0x100 +/* Accept SYN data w/o any cookie option */ +#define TFO_SERVER_COOKIE_NOT_REQD 0x200 + +/* Force enable TFO on all listeners, i.e., not requiring the + * TCP_FASTOPEN socket option. SOCKOPT1/2 determine how to set max_qlen. + */ +#define TFO_SERVER_WO_SOCKOPT1 0x400 +#define TFO_SERVER_WO_SOCKOPT2 0x800 +/* Always create TFO child sockets on a TFO listener even when + * cookie/data not present. (For testing purpose!) + */ +#define TFO_SERVER_ALWAYS 0x1000 + extern struct inet_timewait_death_row tcp_death_row; /* sysctl variables for tcp */ @@ -421,12 +437,6 @@ extern void tcp_metrics_init(void); extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check); extern bool tcp_remember_stamp(struct sock *sk); extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); -extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, - struct tcp_fastopen_cookie *cookie, - int *syn_loss, unsigned long *last_syn_loss); -extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss, - struct tcp_fastopen_cookie *cookie, - bool syn_lost); extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); extern void tcp_disable_fack(struct tcp_sock *tp); extern void tcp_close(struct sock *sk, long timeout); @@ -537,6 +547,7 @@ extern void tcp_send_delayed_ack(struct sock *sk); extern void tcp_cwnd_application_limited(struct sock *sk); extern void tcp_resume_early_retransmit(struct sock *sk); extern void tcp_rearm_rto(struct sock *sk); +extern void tcp_reset(struct sock *sk); /* tcp_timer.c */ extern void tcp_init_xmit_timers(struct sock *); @@ -586,6 +597,7 @@ extern int tcp_mtu_to_mss(struct sock *sk, int pmtu); extern int tcp_mss_to_mtu(struct sock *sk, int mss); extern void tcp_mtup_init(struct sock *sk); extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt); +extern void tcp_init_buffer_space(struct sock *sk); static inline void tcp_bound_rto(const struct sock *sk) { @@ -1104,6 +1116,7 @@ static inline void tcp_openreq_init(struct request_sock *req, req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ req->cookie_ts = 0; tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; + tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; req->mss = rx_opt->mss_clamp; req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; ireq->tstamp_ok = rx_opt->tstamp_ok; @@ -1308,15 +1321,34 @@ extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key); +/* From tcp_fastopen.c */ +extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, + struct tcp_fastopen_cookie *cookie, + int *syn_loss, unsigned long *last_syn_loss); +extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss, + struct tcp_fastopen_cookie *cookie, + bool syn_lost); struct tcp_fastopen_request { /* Fast Open cookie. Size 0 means a cookie request */ struct tcp_fastopen_cookie cookie; struct msghdr *data; /* data in MSG_FASTOPEN */ u16 copied; /* queued in tcp_connect() */ }; - void tcp_free_fastopen_req(struct tcp_sock *tp); +extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; +int tcp_fastopen_reset_cipher(void *key, unsigned int len); +void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc); + +#define TCP_FASTOPEN_KEY_LENGTH 16 + +/* Fastopen key context */ +struct tcp_fastopen_context { + struct crypto_cipher __rcu *tfm; + __u8 key[TCP_FASTOPEN_KEY_LENGTH]; + struct rcu_head rcu; +}; + /* write queue abstraction */ static inline void tcp_write_queue_purge(struct sock *sk) { -- cgit v1.2.3 From 8336886f786fdacbc19b719c1f7ea91eb70706d4 Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Fri, 31 Aug 2012 12:29:12 +0000 Subject: tcp: TCP Fast Open Server - support TFO listeners This patch builds on top of the previous patch to add the support for TFO listeners. This includes - 1. allocating, properly initializing, and managing the per listener fastopen_queue structure when TFO is enabled 2. changes to the inet_csk_accept code to support TFO. E.g., the request_sock can no longer be freed upon accept(), not until 3WHS finishes 3. allowing a TCP_SYN_RECV socket to properly poll() and sendmsg() if it's a TFO socket 4. properly closing a TFO listener, and a TFO socket before 3WHS finishes 5. supporting TCP_FASTOPEN socket option 6. modifying tcp_check_req() to use to check a TFO socket as well as request_sock 7. supporting TCP's TFO cookie option 8. adding a new SYN-ACK retransmit handler to use the timer directly off the TFO socket rather than the listener socket. Note that TFO server side will not retransmit anything other than SYN-ACK until the 3WHS is completed. The patch also contains an important function "reqsk_fastopen_remove()" to manage the somewhat complex relation between a listener, its request_sock, and the corresponding child socket. See the comment above the function for the detail. Signed-off-by: H.K. Jerry Chu Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- include/net/tcp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 9f8821e3293a..1421b02a7905 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -424,7 +424,8 @@ extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock * const struct tcphdr *th); extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, struct request_sock *req, - struct request_sock **prev); + struct request_sock **prev, + bool fastopen); extern int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); extern bool tcp_use_frto(struct sock *sk); @@ -478,7 +479,8 @@ extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, extern int tcp_connect(struct sock *sk); extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp); + struct request_values *rvp, + struct tcp_fastopen_cookie *foc); extern int tcp_disconnect(struct sock *sk, int flags); void tcp_connect_init(struct sock *sk); -- cgit v1.2.3 From 684bad1107571d35610a674c61b3544efb5a5b13 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Sun, 2 Sep 2012 17:38:04 +0000 Subject: tcp: use PRR to reduce cwin in CWR state Use proportional rate reduction (PRR) algorithm to reduce cwnd in CWR state, in addition to Recovery state. Retire the current rate-halving in CWR. When losses are detected via ACKs in CWR state, the sender enters Recovery state but the cwnd reduction continues and does not restart. Rename and refactor cwnd reduction functions since both CWR and Recovery use the same algorithm: tcp_init_cwnd_reduction() is new and initiates reduction state variables. tcp_cwnd_reduction() is previously tcp_update_cwnd_in_recovery(). tcp_ends_cwnd_reduction() is previously tcp_complete_cwr(). The rate halving functions and logic such as tcp_cwnd_down(), tcp_min_cwnd(), and the cwnd moderation inside tcp_enter_cwr() are removed. The unused parameter, flag, in tcp_cwnd_reduction() is also removed. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 1421b02a7905..a8cb00c0c6d9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -913,15 +913,21 @@ static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp) return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; } +static inline bool tcp_in_cwnd_reduction(const struct sock *sk) +{ + return (TCPF_CA_CWR | TCPF_CA_Recovery) & + (1 << inet_csk(sk)->icsk_ca_state); +} + /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. - * The exception is rate halving phase, when cwnd is decreasing towards + * The exception is cwnd reduction phase, when cwnd is decreasing towards * ssthresh. */ static inline __u32 tcp_current_ssthresh(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery)) + if (tcp_in_cwnd_reduction(sk)) return tp->snd_ssthresh; else return max(tp->snd_ssthresh, -- cgit v1.2.3 From 623df484a777f3c00c1ea3d6a7565b8d8ac688a1 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 22 Sep 2012 04:18:54 +0000 Subject: tcp: extract code to compute SYNACK RTT In preparation for adding another spot where we compute the SYNACK RTT, extract this code so that it can be shared. Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index a8cb00c0c6d9..a718d0e3d8e7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1137,6 +1137,15 @@ static inline void tcp_openreq_init(struct request_sock *req, ireq->loc_port = tcp_hdr(skb)->dest; } +/* Compute time elapsed between SYNACK and the ACK completing 3WHS */ +static inline void tcp_synack_rtt_meas(struct sock *sk, + struct request_sock *req) +{ + if (tcp_rsk(req)->snt_synack) + tcp_valid_rtt_meas(sk, + tcp_time_stamp - tcp_rsk(req)->snt_synack); +} + extern void tcp_enter_memory_pressure(struct sock *sk); static inline int keepalive_intvl_when(const struct tcp_sock *tp) -- cgit v1.2.3 From 016818d076871c4ee34db1e8d74dc17ac1de626a Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 22 Sep 2012 04:18:55 +0000 Subject: tcp: TCP Fast Open Server - take SYNACK RTT after completing 3WHS When taking SYNACK RTT samples for servers using TCP Fast Open, fix the code to ensure that we only call tcp_valid_rtt_meas() after we receive the ACK that completes the 3-way handshake. Previously we were always taking an RTT sample in tcp_v4_syn_recv_sock(). However, for TCP Fast Open connections tcp_v4_conn_req_fastopen() calls tcp_v4_syn_recv_sock() at the time we receive the SYN. So for TFO we must wait until tcp_rcv_state_process() to take the RTT sample. To fix this, we wait until after TFO calls tcp_v4_syn_recv_sock() before we set the snt_synack timestamp, since tcp_synack_rtt_meas() already ensures that we only take a SYNACK RTT sample if snt_synack is non-zero. To be careful, we only take a snt_synack timestamp when a SYNACK transmit or retransmit succeeds. Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index a718d0e3d8e7..6feeccd83dd7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1125,6 +1125,7 @@ static inline void tcp_openreq_init(struct request_sock *req, req->cookie_ts = 0; tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; + tcp_rsk(req)->snt_synack = 0; req->mss = rx_opt->mss_clamp; req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; ireq->tstamp_ok = rx_opt->tstamp_ok; -- cgit v1.2.3