From 5d134f1c1f36166e8a738de92c4d2f4c262ff91b Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 5 Jan 2013 16:10:48 +0000 Subject: tcp: make sysctl_tcp_ecn namespace aware As per suggestion from Eric Dumazet this patch makes tcp_ecn sysctl namespace aware. The reason behind this patch is to ease the testing of ecn problems on the internet and allows applications to tune their own use of ecn. Cc: Eric Dumazet Cc: David Miller Cc: Stephen Hemminger Signed-off-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 93825dd3a7c0..3164ad272a74 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1027,7 +1027,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) treq->rmt_addr = ipv6_hdr(skb)->saddr; treq->loc_addr = ipv6_hdr(skb)->daddr; if (!want_cookie || tmp_opt.tstamp_ok) - TCP_ECN_create_request(req, skb); + TCP_ECN_create_request(req, skb, sock_net(sk)); treq->iif = sk->sk_bound_dev_if; -- cgit v1.2.3 From e7219858ac1f98213a4714d0e24e7a003e1bf6a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?= =?UTF-8?q?=E6=98=8E?= Date: Sun, 13 Jan 2013 05:02:01 +0000 Subject: ipv6: Use ipv6_get_dsfield() instead of ipv6_tclass(). Commit 7a3198a8 ("ipv6: helper function to get tclass") introduced ipv6_tclass(), but similar function is already available as ipv6_get_dsfield(). We might be able to call ipv6_tclass() from ipv6_get_dsfield(), but it is confusing to have two versions. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3164ad272a74..3701c3c6e2eb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1163,7 +1163,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); + newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count @@ -1243,7 +1243,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); + newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); /* Clone native IPv6 options from listening socket (if any) @@ -1456,7 +1456,7 @@ ipv6_pktoptions: if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; if (np->rxopt.bits.rxtclass) - np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); + np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); if (ipv6_opt_accepted(sk, opt_skb)) { skb_set_owner_r(opt_skb, sk); opt_skb = xchg(&np->pktoptions, opt_skb); -- cgit v1.2.3 From 5ba24953e9707387cce87b07f0d5fbdd03c5c11b Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 22 Jan 2013 09:50:39 +0000 Subject: soreuseport: TCP/IPv6 implementation Motivation for soreuseport would be something like a web server binding to port 80 running with multiple threads, where each thread might have it's own listener socket. This could be done as an alternative to other models: 1) have one listener thread which dispatches completed connections to workers. 2) accept on a single listener socket from multiple threads. In case #1 the listener thread can easily become the bottleneck with high connection turn-over rate. In case #2, the proportion of connections accepted per thread tends to be uneven under high connection load (assuming simple event loop: while (1) { accept(); process() }, wakeup does not promote fairness among the sockets. We have seen the disproportion to be as high as 3:1 ratio between thread accepting most connections and the one accepting the fewest. With so_reusport the distribution is uniform. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3701c3c6e2eb..06087e58738a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -834,7 +834,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) * no RST generated if md5 hash doesn't match. */ sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev), - &tcp_hashinfo, &ipv6h->daddr, + &tcp_hashinfo, &ipv6h->saddr, + th->source, &ipv6h->daddr, ntohs(th->source), inet6_iif(skb)); if (!sk1) return; @@ -1598,6 +1599,7 @@ do_time_wait: struct sock *sk2; sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, + &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb)); if (sk2 != NULL) { -- cgit v1.2.3 From 5f1e942cb45d06968b0ce94472d97014e0e1fdc9 Mon Sep 17 00:00:00 2001 From: Vijay Subramanian Date: Thu, 31 Jan 2013 08:24:19 +0000 Subject: tcp: ipv6: Update MIB counters for drops This patch updates LINUX_MIB_LISTENDROPS and LINUX_MIB_LISTENOVERFLOWS in tcp_v6_conn_request() and tcp_v6_err(). tcp_v6_conn_request() in particular can drop SYNs for various reasons which are not currently tracked. Signed-off-by: Vijay Subramanian Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 93825dd3a7c0..4f43537197ef 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -423,6 +423,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } inet_csk_reqsk_queue_drop(sk, req, prev); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); goto out; case TCP_SYN_SENT: @@ -958,8 +959,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop; } - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); goto drop; + } req = inet6_reqsk_alloc(&tcp6_request_sock_ops); if (req == NULL) @@ -1108,6 +1111,7 @@ drop_and_release: drop_and_free: reqsk_free(req); drop: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return 0; /* don't send reset */ } -- cgit v1.2.3 From ee684b6f2830047d19877e5547989740f18b1a5d Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 11 Feb 2013 05:50:19 +0000 Subject: tcp: send packets with a socket timestamp A socket timestamp is a sum of the global tcp_time_stamp and a per-socket offset. A socket offset is added in places where externally visible tcp timestamp option is parsed/initialized. Connections in the SYN_RECV state are not supported, global tcp_time_stamp is used for them, because repair mode doesn't support this state. In a future it can be implemented by the similar way as for TIME_WAIT sockets. Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Cc: Eric Dumazet Cc: Pavel Emelyanov Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bbb28ae7e5f3..9b6460055df5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -713,7 +713,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { #endif static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, - u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass) + u32 tsval, u32 tsecr, + struct tcp_md5sig_key *key, int rst, u8 tclass) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -725,7 +726,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, struct dst_entry *dst; __be32 *topt; - if (ts) + if (tsecr) tot_len += TCPOLEN_TSTAMP_ALIGNED; #ifdef CONFIG_TCP_MD5SIG if (key) @@ -755,11 +756,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, topt = (__be32 *)(t1 + 1); - if (ts) { + if (tsecr) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - *topt++ = htonl(tcp_time_stamp); - *topt++ = htonl(ts); + *topt++ = htonl(tsval); + *topt++ = htonl(tsecr); } #ifdef CONFIG_TCP_MD5SIG @@ -860,7 +861,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); - tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0); + tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0); #ifdef CONFIG_TCP_MD5SIG release_sk1: @@ -871,10 +872,11 @@ release_sk1: #endif } -static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, +static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, + u32 win, u32 tsval, u32 tsecr, struct tcp_md5sig_key *key, u8 tclass) { - tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass); + tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -884,6 +886,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, + tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw), tw->tw_tclass); @@ -893,7 +896,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, + tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, + req->rcv_wnd, tcp_time_stamp, req->ts_recent, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0); } -- cgit v1.2.3