summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c93
1 files changed, 65 insertions, 28 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 13abf4eaa051..f893e90061eb 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -102,11 +102,14 @@ int sysctl_tcp_abc __read_mostly;
#define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */
#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */
+#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
+#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained DSACK info */
#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
+#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
#define IsReno(tp) ((tp)->rx_opt.sack_ok == 0)
#define IsFack(tp) ((tp)->rx_opt.sack_ok & 2)
@@ -552,6 +555,16 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
tcp_grow_window(sk, skb);
}
+static u32 tcp_rto_min(struct sock *sk)
+{
+ struct dst_entry *dst = __sk_dst_get(sk);
+ u32 rto_min = TCP_RTO_MIN;
+
+ if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
+ rto_min = dst->metrics[RTAX_RTO_MIN-1];
+ return rto_min;
+}
+
/* Called to compute a smoothed rtt estimate. The data fed to this
* routine either comes from timestamps, or from segments that were
* known _not_ to have been retransmitted [see Karn/Partridge
@@ -613,13 +626,13 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
if (tp->mdev_max < tp->rttvar)
tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2;
tp->rtt_seq = tp->snd_nxt;
- tp->mdev_max = TCP_RTO_MIN;
+ tp->mdev_max = tcp_rto_min(sk);
}
} else {
/* no previous measure. */
tp->srtt = m<<3; /* take the measured time to be rtt */
tp->mdev = m<<1; /* make sure rto = 3*rtt */
- tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
+ tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
tp->rtt_seq = tp->snd_nxt;
}
}
@@ -752,7 +765,15 @@ void tcp_update_metrics(struct sock *sk)
}
}
-/* Numbers are taken from RFC2414. */
+/* Numbers are taken from RFC3390.
+ *
+ * John Heffner states:
+ *
+ * The RFC specifies a window of no more than 4380 bytes
+ * unless 2*MSS > 4380. Reading the pseudocode in the RFC
+ * is a bit misleading because they use a clamp at 4380 bytes
+ * rather than use a multiplier in the relevant range.
+ */
__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
{
__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
@@ -964,12 +985,14 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
/* Check for D-SACK. */
if (before(ntohl(sp[0].start_seq), TCP_SKB_CB(ack_skb)->ack_seq)) {
+ flag |= FLAG_DSACKING_ACK;
found_dup_sack = 1;
tp->rx_opt.sack_ok |= 4;
NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV);
} else if (num_sacks > 1 &&
!after(ntohl(sp[0].end_seq), ntohl(sp[1].end_seq)) &&
!before(ntohl(sp[0].start_seq), ntohl(sp[1].start_seq))) {
+ flag |= FLAG_DSACKING_ACK;
found_dup_sack = 1;
tp->rx_opt.sack_ok |= 4;
NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV);
@@ -1856,7 +1879,7 @@ static void tcp_cwnd_down(struct sock *sk, int flag)
struct tcp_sock *tp = tcp_sk(sk);
int decr = tp->snd_cwnd_cnt + 1;
- if ((flag&FLAG_FORWARD_PROGRESS) ||
+ if ((flag&(FLAG_ANY_PROGRESS|FLAG_DSACKING_ACK)) ||
(IsReno(tp) && !(flag&FLAG_NOT_DUP))) {
tp->snd_cwnd_cnt = decr&1;
decr >>= 1;
@@ -2107,15 +2130,13 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb)
* tcp_xmit_retransmit_queue().
*/
static void
-tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
- int prior_packets, int flag)
+tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- int is_dupack = (tp->snd_una == prior_snd_una &&
- (!(flag&FLAG_NOT_DUP) ||
- ((flag&FLAG_DATA_SACKED) &&
- (tp->fackets_out > tp->reordering))));
+ int is_dupack = !(flag&(FLAG_SND_UNA_ADVANCED|FLAG_NOT_DUP));
+ int do_lost = is_dupack || ((flag&FLAG_DATA_SACKED) &&
+ (tp->fackets_out > tp->reordering));
/* Some technical things:
* 1. Reno does not count dupacks (sacked_out) automatically. */
@@ -2192,14 +2213,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
/* F. Process state. */
switch (icsk->icsk_ca_state) {
case TCP_CA_Recovery:
- if (prior_snd_una == tp->snd_una) {
+ if (!(flag & FLAG_SND_UNA_ADVANCED)) {
if (IsReno(tp) && is_dupack)
tcp_add_reno_sack(sk);
} else {
int acked = prior_packets - tp->packets_out;
if (IsReno(tp))
tcp_remove_reno_sacks(sk, acked);
- is_dupack = tcp_try_undo_partial(sk, acked);
+ do_lost = tcp_try_undo_partial(sk, acked);
}
break;
case TCP_CA_Loss:
@@ -2215,7 +2236,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
/* Loss is undone; fall through to processing in Open state. */
default:
if (IsReno(tp)) {
- if (tp->snd_una != prior_snd_una)
+ if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
if (is_dupack)
tcp_add_reno_sack(sk);
@@ -2264,7 +2285,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
tcp_set_ca_state(sk, TCP_CA_Recovery);
}
- if (is_dupack || tcp_head_timedout(sk))
+ if (do_lost || tcp_head_timedout(sk))
tcp_update_scoreboard(sk);
tcp_cwnd_down(sk, flag);
tcp_xmit_retransmit_queue(sk);
@@ -2329,11 +2350,11 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
tcp_ack_no_tstamp(sk, seq_rtt, flag);
}
-static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
+static void tcp_cong_avoid(struct sock *sk, u32 ack,
u32 in_flight, int good)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good);
+ icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight, good);
tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
}
@@ -2399,6 +2420,9 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
__u32 dval = min(tp->fackets_out, packets_acked);
tp->fackets_out -= dval;
}
+ /* hint's skb might be NULL but we don't need to care */
+ tp->fastpath_cnt_hint -= min_t(u32, packets_acked,
+ tp->fastpath_cnt_hint);
tp->packets_out -= packets_acked;
BUG_ON(tcp_skb_pcount(skb) == 0);
@@ -2496,12 +2520,23 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
tcp_ack_update_rtt(sk, acked, seq_rtt);
tcp_ack_packets_out(sk);
- /* Is the ACK triggering packet unambiguous? */
- if (acked & FLAG_RETRANS_DATA_ACKED)
- last_ackt = net_invalid_timestamp();
+ if (ca_ops->pkts_acked) {
+ s32 rtt_us = -1;
+
+ /* Is the ACK triggering packet unambiguous? */
+ if (!(acked & FLAG_RETRANS_DATA_ACKED)) {
+ /* High resolution needed and available? */
+ if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
+ !ktime_equal(last_ackt,
+ net_invalid_timestamp()))
+ rtt_us = ktime_us_delta(ktime_get_real(),
+ last_ackt);
+ else if (seq_rtt > 0)
+ rtt_us = jiffies_to_usecs(seq_rtt);
+ }
- if (ca_ops->pkts_acked)
- ca_ops->pkts_acked(sk, pkts_acked, last_ackt);
+ ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
+ }
}
#if FASTRETRANS_DEBUG > 0
@@ -2673,7 +2708,7 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag)
* to prove that the RTO is indeed spurious. It transfers the control
* from F-RTO to the conventional RTO recovery
*/
-static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
+static int tcp_process_frto(struct sock *sk, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2693,8 +2728,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
* ACK isn't duplicate nor advances window, e.g., opposite dir
* data, winupdate
*/
- if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) &&
- !(flag&FLAG_FORWARD_PROGRESS))
+ if (!(flag&FLAG_ANY_PROGRESS) && (flag&FLAG_NOT_DUP))
return 1;
if (!(flag&FLAG_DATA_ACKED)) {
@@ -2774,6 +2808,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
if (before(ack, prior_snd_una))
goto old_ack;
+ if (after(ack, prior_snd_una))
+ flag |= FLAG_SND_UNA_ADVANCED;
+
if (sysctl_tcp_abc) {
if (icsk->icsk_ca_state < TCP_CA_CWR)
tp->bytes_acked += ack - prior_snd_una;
@@ -2826,17 +2863,17 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
if (tp->frto_counter)
- frto_cwnd = tcp_process_frto(sk, prior_snd_una, flag);
+ frto_cwnd = tcp_process_frto(sk, flag);
if (tcp_ack_is_dubious(sk, flag)) {
/* Advance CWND, if state allows this. */
if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
tcp_may_raise_cwnd(sk, flag))
- tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0);
- tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
+ tcp_cong_avoid(sk, ack, prior_in_flight, 0);
+ tcp_fastretrans_alert(sk, prior_packets, flag);
} else {
if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
- tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1);
+ tcp_cong_avoid(sk, ack, prior_in_flight, 1);
}
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP))