summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Quetchenbach <virtualphtn@gmail.com>2008-09-21 00:21:51 -0700
committerGreg Kroah-Hartman <gregkh@suse.de>2009-07-02 16:31:54 -0700
commitfe05dfbd8f652ac0ef0d30d9498066bb9b8da0e0 (patch)
tree5b3b188783a611961719d8c757801ea07b942a71
parentd571e2bcf7b4f0cf8da41fed61d95c4c1b247d40 (diff)
tcp: advertise MSS requested by user
commit f5fff5dc8a7a3f395b0525c02ba92c95d42b7390 upstream. I'm trying to use the TCP_MAXSEG option to setsockopt() to set the MSS for both sides of a bidirectional connection. man tcp says: "If this option is set before connection establishment, it also changes the MSS value announced to the other end in the initial packet." However, the kernel only uses the MTU/route cache to set the advertised MSS. That means if I set the MSS to, say, 500 before calling connect(), I will send at most 500-byte packets, but I will still receive 1500-byte packets in reply. This is a bug, either in the kernel or the documentation. This patch (applies to latest net-2.6) reduces the advertised value to that requested by the user as long as setsockopt() is called before connect() or accept(). This seems like the behavior that one would expect as well as that which is documented. I've tried to make sure that things that depend on the advertised MSS are set correctly. Signed-off-by: Tom Quetchenbach <virtualphtn@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> Cc: Willy Tarreau <w@1wt.eu> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/tcp_output.c13
2 files changed, 14 insertions, 3 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 011478e46c40..0b3c4049b184 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1364,6 +1364,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
tcp_mtup_init(newsk);
tcp_sync_mss(newsk, dst_mtu(dst));
newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
+ if (tcp_sk(sk)->rx_opt.user_mss &&
+ tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
+ newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
+
tcp_initialize_rcv_mss(newsk);
#ifdef CONFIG_TCP_MD5SIG
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d083f9a9c12d..c89cd75deb7a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2252,6 +2252,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
struct sk_buff *skb;
struct tcp_md5sig_key *md5;
__u8 *md5_hash_location;
+ int mss;
skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
if (skb == NULL)
@@ -2262,13 +2263,17 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
skb->dst = dst_clone(dst);
+ mss = dst_metric(dst, RTAX_ADVMSS);
+ if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
+ mss = tp->rx_opt.user_mss;
+
if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
__u8 rcv_wscale;
/* Set this up on the first call only */
req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
/* tcp_full_space because it is guaranteed to be the first packet */
tcp_select_initial_window(tcp_full_space(sk),
- dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+ mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
&req->rcv_wnd,
&req->window_clamp,
ireq->wscale_ok,
@@ -2283,8 +2288,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
else
#endif
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- tcp_header_size = tcp_synack_options(sk, req,
- dst_metric(dst, RTAX_ADVMSS),
+ tcp_header_size = tcp_synack_options(sk, req, mss,
skb, &opts, &md5) +
sizeof(struct tcphdr);
@@ -2353,6 +2357,9 @@ static void tcp_connect_init(struct sock *sk)
if (!tp->window_clamp)
tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
tp->advmss = dst_metric(dst, RTAX_ADVMSS);
+ if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
+ tp->advmss = tp->rx_opt.user_mss;
+
tcp_initialize_rcv_mss(sk);
tcp_select_initial_window(tcp_full_space(sk),