summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c90
1 files changed, 54 insertions, 36 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2259114c7242..575e19dcc017 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -84,7 +84,6 @@
#include <crypto/hash.h>
#include <linux/scatterlist.h>
-int sysctl_tcp_tw_reuse __read_mostly;
int sysctl_tcp_low_latency __read_mostly;
#ifdef CONFIG_TCP_MD5SIG
@@ -95,12 +94,12 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
struct inet_hashinfo tcp_hashinfo;
EXPORT_SYMBOL(tcp_hashinfo);
-static __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
+static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff)
{
return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr,
tcp_hdr(skb)->dest,
- tcp_hdr(skb)->source);
+ tcp_hdr(skb)->source, tsoff);
}
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -120,7 +119,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
and use initial timestamp retrieved from peer table.
*/
if (tcptw->tw_ts_recent_stamp &&
- (!twp || (sysctl_tcp_tw_reuse &&
+ (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&
get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
if (tp->write_seq == 0)
@@ -146,7 +145,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct flowi4 *fl4;
struct rtable *rt;
int err;
+ u32 seq;
struct ip_options_rcu *inet_opt;
+ struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
@@ -197,7 +198,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
tp->write_seq = 0;
}
- if (tcp_death_row.sysctl_tw_recycle &&
+ if (tcp_death_row->sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
tcp_fetch_timewait_stamp(sk, &rt->dst);
@@ -216,7 +217,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
* complete initialization after this.
*/
tcp_set_state(sk, TCP_SYN_SENT);
- err = inet_hash_connect(&tcp_death_row, sk);
+ err = inet_hash_connect(tcp_death_row, sk);
if (err)
goto failure;
@@ -232,18 +233,27 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* OK, now commit destination to socket. */
sk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(sk, &rt->dst);
+ rt = NULL;
- if (!tp->write_seq && likely(!tp->repair))
- tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
- inet->inet_daddr,
- inet->inet_sport,
- usin->sin_port);
+ if (likely(!tp->repair)) {
+ seq = secure_tcp_sequence_number(inet->inet_saddr,
+ inet->inet_daddr,
+ inet->inet_sport,
+ usin->sin_port,
+ &tp->tsoffset);
+ if (!tp->write_seq)
+ tp->write_seq = seq;
+ }
inet->inet_id = tp->write_seq ^ jiffies;
+ if (tcp_fastopen_defer_connect(sk, &err))
+ return err;
+ if (err)
+ goto failure;
+
err = tcp_connect(sk);
- rt = NULL;
if (err)
goto failure;
@@ -269,10 +279,13 @@ EXPORT_SYMBOL(tcp_v4_connect);
*/
void tcp_v4_mtu_reduced(struct sock *sk)
{
- struct dst_entry *dst;
struct inet_sock *inet = inet_sk(sk);
- u32 mtu = tcp_sk(sk)->mtu_info;
+ struct dst_entry *dst;
+ u32 mtu;
+ if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+ return;
+ mtu = tcp_sk(sk)->mtu_info;
dst = inet_csk_update_pmtu(sk, mtu);
if (!dst)
return;
@@ -418,7 +431,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
switch (type) {
case ICMP_REDIRECT:
- do_redirect(icmp_skb, sk);
+ if (!sock_owned_by_user(sk))
+ do_redirect(icmp_skb, sk);
goto out;
case ICMP_SOURCE_QUENCH:
/* Just silently ignore these. */
@@ -442,7 +456,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (!sock_owned_by_user(sk)) {
tcp_v4_mtu_reduced(sk);
} else {
- if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
+ if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
sock_hold(sk);
}
goto out;
@@ -691,6 +705,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
offsetof(struct inet_timewait_sock, tw_bound_dev_if));
arg.tos = ip_hdr(skb)->tos;
+ arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -711,7 +726,7 @@ out:
outside socket context is ugly, certainly. What can I do?
*/
-static void tcp_v4_send_ack(struct net *net,
+static void tcp_v4_send_ack(const struct sock *sk,
struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key,
@@ -726,6 +741,7 @@ static void tcp_v4_send_ack(struct net *net,
#endif
];
} rep;
+ struct net *net = sock_net(sk);
struct ip_reply_arg arg;
memset(&rep.th, 0, sizeof(struct tcphdr));
@@ -775,6 +791,7 @@ static void tcp_v4_send_ack(struct net *net,
if (oif)
arg.bound_dev_if = oif;
arg.tos = tos;
+ arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -790,7 +807,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
- tcp_v4_send_ack(sock_net(sk), skb,
+ tcp_v4_send_ack(sk, skb,
tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp + tcptw->tw_ts_offset,
@@ -818,10 +835,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
* exception of <SYN> segments, MUST be right-shifted by
* Rcv.Wind.Shift bits:
*/
- tcp_v4_send_ack(sock_net(sk), skb, seq,
+ tcp_v4_send_ack(sk, skb, seq,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
- tcp_time_stamp,
+ tcp_time_stamp + tcp_rsk(req)->ts_off,
req->ts_recent,
0,
tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
@@ -1315,10 +1332,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
tcp_ca_openreq_child(newsk, dst);
tcp_sync_mss(newsk, dst_mtu(dst));
- newtp->advmss = dst_metric_advmss(dst);
- if (tcp_sk(sk)->rx_opt.user_mss &&
- tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
- newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
+ newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
tcp_initialize_rcv_mss(newsk);
@@ -1552,8 +1566,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
* It has been noticed pure SACK packets were sometimes dropped
* (if cooked by drivers without copybreak feature).
*/
- if (!skb->data_len)
- skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
+ skb_condense(skb);
if (unlikely(sk_add_backlog(sk, skb, limit))) {
bh_unlock_sock(sk);
@@ -1813,7 +1826,6 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
.getsockopt = ip_getsockopt,
.addr2sockaddr = inet_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in),
- .bind_conflict = inet_csk_bind_conflict,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
@@ -1884,9 +1896,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
tcp_free_fastopen_req(tp);
tcp_saved_syn_free(tp);
- local_bh_disable();
sk_sockets_allocated_dec(sk);
- local_bh_enable();
}
EXPORT_SYMBOL(tcp_v4_destroy_sock);
@@ -1908,7 +1918,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
if (!sk) {
get_head:
ilb = &tcp_hashinfo.listening_hash[st->bucket];
- spin_lock_bh(&ilb->lock);
+ spin_lock(&ilb->lock);
sk = sk_head(&ilb->head);
st->offset = 0;
goto get_sk;
@@ -1925,7 +1935,7 @@ get_sk:
if (sk->sk_family == st->family)
return sk;
}
- spin_unlock_bh(&ilb->lock);
+ spin_unlock(&ilb->lock);
st->offset = 0;
if (++st->bucket < INET_LHTABLE_SIZE)
goto get_head;
@@ -2133,7 +2143,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
if (v != SEQ_START_TOKEN)
- spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
+ spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
break;
case TCP_SEQ_STATE_ESTABLISHED:
if (v)
@@ -2225,7 +2235,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
int state;
if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
- icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
timer_expires = icsk->icsk_timeout;
@@ -2372,6 +2382,7 @@ struct proto tcp_prot = {
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
+ .keepalive = tcp_set_keepalive,
.recvmsg = tcp_recvmsg,
.sendmsg = tcp_sendmsg,
.sendpage = tcp_sendpage,
@@ -2415,7 +2426,7 @@ static void __net_exit tcp_sk_exit(struct net *net)
static int __net_init tcp_sk_init(struct net *net)
{
- int res, cpu;
+ int res, cpu, cnt;
net->ipv4.tcp_sk = alloc_percpu(struct sock *);
if (!net->ipv4.tcp_sk)
@@ -2452,6 +2463,14 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_orphan_retries = 0;
net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
+ net->ipv4.sysctl_tcp_tw_reuse = 0;
+
+ cnt = tcp_hashinfo.ehash_mask + 1;
+ net->ipv4.tcp_death_row.sysctl_tw_recycle = 0;
+ net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
+ net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
+
+ net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
return 0;
fail:
@@ -2462,7 +2481,7 @@ fail:
static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
- inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
+ inet_twsk_purge(&tcp_hashinfo, AF_INET);
}
static struct pernet_operations __net_initdata tcp_sk_ops = {
@@ -2473,7 +2492,6 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
void __init tcp_v4_init(void)
{
- inet_hashinfo_init(&tcp_hashinfo);
if (register_pernet_subsys(&tcp_sk_ops))
panic("Failed to create the TCP control socket.\n");
}