diff options
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 90 |
1 files changed, 54 insertions, 36 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2259114c7242..575e19dcc017 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -84,7 +84,6 @@ #include <crypto/hash.h> #include <linux/scatterlist.h> -int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; #ifdef CONFIG_TCP_MD5SIG @@ -95,12 +94,12 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static __u32 tcp_v4_init_sequence(const struct sk_buff *skb) +static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff) { return secure_tcp_sequence_number(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, tcp_hdr(skb)->dest, - tcp_hdr(skb)->source); + tcp_hdr(skb)->source, tsoff); } int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -120,7 +119,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) and use initial timestamp retrieved from peer table. */ if (tcptw->tw_ts_recent_stamp && - (!twp || (sysctl_tcp_tw_reuse && + (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; if (tp->write_seq == 0) @@ -146,7 +145,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct flowi4 *fl4; struct rtable *rt; int err; + u32 seq; struct ip_options_rcu *inet_opt; + struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; @@ -197,7 +198,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tp->write_seq = 0; } - if (tcp_death_row.sysctl_tw_recycle && + if (tcp_death_row->sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) tcp_fetch_timewait_stamp(sk, &rt->dst); @@ -216,7 +217,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * complete initialization after this. */ tcp_set_state(sk, TCP_SYN_SENT); - err = inet_hash_connect(&tcp_death_row, sk); + err = inet_hash_connect(tcp_death_row, sk); if (err) goto failure; @@ -232,18 +233,27 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* OK, now commit destination to socket. */ sk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(sk, &rt->dst); + rt = NULL; - if (!tp->write_seq && likely(!tp->repair)) - tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, - inet->inet_daddr, - inet->inet_sport, - usin->sin_port); + if (likely(!tp->repair)) { + seq = secure_tcp_sequence_number(inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + usin->sin_port, + &tp->tsoffset); + if (!tp->write_seq) + tp->write_seq = seq; + } inet->inet_id = tp->write_seq ^ jiffies; + if (tcp_fastopen_defer_connect(sk, &err)) + return err; + if (err) + goto failure; + err = tcp_connect(sk); - rt = NULL; if (err) goto failure; @@ -269,10 +279,13 @@ EXPORT_SYMBOL(tcp_v4_connect); */ void tcp_v4_mtu_reduced(struct sock *sk) { - struct dst_entry *dst; struct inet_sock *inet = inet_sk(sk); - u32 mtu = tcp_sk(sk)->mtu_info; + struct dst_entry *dst; + u32 mtu; + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) + return; + mtu = tcp_sk(sk)->mtu_info; dst = inet_csk_update_pmtu(sk, mtu); if (!dst) return; @@ -418,7 +431,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) switch (type) { case ICMP_REDIRECT: - do_redirect(icmp_skb, sk); + if (!sock_owned_by_user(sk)) + do_redirect(icmp_skb, sk); goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ @@ -442,7 +456,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (!sock_owned_by_user(sk)) { tcp_v4_mtu_reduced(sk); } else { - if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags)) + if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); } goto out; @@ -691,6 +705,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) offsetof(struct inet_timewait_sock, tw_bound_dev_if)); arg.tos = ip_hdr(skb)->tos; + arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -711,7 +726,7 @@ out: outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct net *net, +static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, @@ -726,6 +741,7 @@ static void tcp_v4_send_ack(struct net *net, #endif ]; } rep; + struct net *net = sock_net(sk); struct ip_reply_arg arg; memset(&rep.th, 0, sizeof(struct tcphdr)); @@ -775,6 +791,7 @@ static void tcp_v4_send_ack(struct net *net, if (oif) arg.bound_dev_if = oif; arg.tos = tos; + arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -790,7 +807,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(sock_net(sk), skb, + tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, @@ -818,10 +835,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, * exception of <SYN> segments, MUST be right-shifted by * Rcv.Wind.Shift bits: */ - tcp_v4_send_ack(sock_net(sk), skb, seq, + tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, - tcp_time_stamp, + tcp_time_stamp + tcp_rsk(req)->ts_off, req->ts_recent, 0, tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, @@ -1315,10 +1332,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, tcp_ca_openreq_child(newsk, dst); tcp_sync_mss(newsk, dst_mtu(dst)); - newtp->advmss = dst_metric_advmss(dst); - if (tcp_sk(sk)->rx_opt.user_mss && - tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) - newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; + newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); tcp_initialize_rcv_mss(newsk); @@ -1552,8 +1566,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) * It has been noticed pure SACK packets were sometimes dropped * (if cooked by drivers without copybreak feature). */ - if (!skb->data_len) - skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); + skb_condense(skb); if (unlikely(sk_add_backlog(sk, skb, limit))) { bh_unlock_sock(sk); @@ -1813,7 +1826,6 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .getsockopt = ip_getsockopt, .addr2sockaddr = inet_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in), - .bind_conflict = inet_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ip_setsockopt, .compat_getsockopt = compat_ip_getsockopt, @@ -1884,9 +1896,7 @@ void tcp_v4_destroy_sock(struct sock *sk) tcp_free_fastopen_req(tp); tcp_saved_syn_free(tp); - local_bh_disable(); sk_sockets_allocated_dec(sk); - local_bh_enable(); } EXPORT_SYMBOL(tcp_v4_destroy_sock); @@ -1908,7 +1918,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) if (!sk) { get_head: ilb = &tcp_hashinfo.listening_hash[st->bucket]; - spin_lock_bh(&ilb->lock); + spin_lock(&ilb->lock); sk = sk_head(&ilb->head); st->offset = 0; goto get_sk; @@ -1925,7 +1935,7 @@ get_sk: if (sk->sk_family == st->family) return sk; } - spin_unlock_bh(&ilb->lock); + spin_unlock(&ilb->lock); st->offset = 0; if (++st->bucket < INET_LHTABLE_SIZE) goto get_head; @@ -2133,7 +2143,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) switch (st->state) { case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) - spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); + spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock); break; case TCP_SEQ_STATE_ESTABLISHED: if (v) @@ -2225,7 +2235,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) int state; if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; @@ -2372,6 +2382,7 @@ struct proto tcp_prot = { .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, + .keepalive = tcp_set_keepalive, .recvmsg = tcp_recvmsg, .sendmsg = tcp_sendmsg, .sendpage = tcp_sendpage, @@ -2415,7 +2426,7 @@ static void __net_exit tcp_sk_exit(struct net *net) static int __net_init tcp_sk_init(struct net *net) { - int res, cpu; + int res, cpu, cnt; net->ipv4.tcp_sk = alloc_percpu(struct sock *); if (!net->ipv4.tcp_sk) @@ -2452,6 +2463,14 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_orphan_retries = 0; net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; + net->ipv4.sysctl_tcp_tw_reuse = 0; + + cnt = tcp_hashinfo.ehash_mask + 1; + net->ipv4.tcp_death_row.sysctl_tw_recycle = 0; + net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2; + net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; + + net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256); return 0; fail: @@ -2462,7 +2481,7 @@ fail: static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) { - inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET); + inet_twsk_purge(&tcp_hashinfo, AF_INET); } static struct pernet_operations __net_initdata tcp_sk_ops = { @@ -2473,7 +2492,6 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { void __init tcp_v4_init(void) { - inet_hashinfo_init(&tcp_hashinfo); if (register_pernet_subsys(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n"); } |