From 45f6fad84cc305103b28d73482b344d7f5b76f39 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 29 Nov 2015 19:37:57 -0800
Subject: ipv6: add complete rcu protection around np->opt

This patch addresses multiple problems :

UDP/RAW sendmsg() need to get a stable struct ipv6_txoptions
while socket is not locked : Other threads can change np->opt
concurrently. Dmitry posted a syzkaller
(http://github.com/google/syzkaller) program desmonstrating
use-after-free.

Starting with TCP/DCCP lockless listeners, tcp_v6_syn_recv_sock()
and dccp_v6_request_recv_sock() also need to use RCU protection
to dereference np->opt once (before calling ipv6_dup_options())

This patch adds full RCU protection to np->opt

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

(limited to 'net/ipv6/tcp_ipv6.c')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c5429a636f1a..6a50bb4a0dae 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -120,6 +120,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct in6_addr *saddr = NULL, *final_p, final;
+	struct ipv6_txoptions *opt;
 	struct flowi6 fl6;
 	struct dst_entry *dst;
 	int addr_type;
@@ -235,7 +236,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl6.fl6_dport = usin->sin6_port;
 	fl6.fl6_sport = inet->inet_sport;
 
-	final_p = fl6_update_dst(&fl6, np->opt, &final);
+	opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+	final_p = fl6_update_dst(&fl6, opt, &final);
 
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
@@ -263,9 +265,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		tcp_fetch_timewait_stamp(sk, dst);
 
 	icsk->icsk_ext_hdr_len = 0;
-	if (np->opt)
-		icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
-					  np->opt->opt_nflen);
+	if (opt)
+		icsk->icsk_ext_hdr_len = opt->opt_flen +
+					 opt->opt_nflen;
 
 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 
@@ -461,7 +463,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
 		if (np->repflow && ireq->pktopts)
 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
 
-		err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
+		err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
+			       np->tclass);
 		err = net_xmit_eval(err);
 	}
 
@@ -972,6 +975,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 	struct inet_request_sock *ireq;
 	struct ipv6_pinfo *newnp;
 	const struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6_txoptions *opt;
 	struct tcp6_sock *newtcp6sk;
 	struct inet_sock *newinet;
 	struct tcp_sock *newtp;
@@ -1098,13 +1102,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 	   but we make one more one thing there: reattach optmem
 	   to newsk.
 	 */
-	if (np->opt)
-		newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+	opt = rcu_dereference(np->opt);
+	if (opt) {
+		opt = ipv6_dup_options(newsk, opt);
+		RCU_INIT_POINTER(newnp->opt, opt);
+	}
 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
-	if (newnp->opt)
-		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
-						     newnp->opt->opt_flen);
+	if (opt)
+		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+						    opt->opt_flen;
 
 	tcp_ca_openreq_child(newsk, dst);
 
-- 
cgit v1.2.3


From 6bd4f355df2eae80b8a5c7b097371cd1e05f20d5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 2 Dec 2015 21:53:57 -0800
Subject: ipv6: kill sk_dst_lock

While testing the np->opt RCU conversion, I found that UDP/IPv6 was
using a mixture of xchg() and sk_dst_lock to protect concurrent changes
to sk->sk_dst_cache, leading to possible corruptions and crashes.

ip6_sk_dst_lookup_flow() uses sk_dst_check() anyway, so the simplest
way to fix the mess is to remove sk_dst_lock completely, as we did for
IPv4.

__ip6_dst_store() and ip6_dst_store() share same implementation.

sk_setup_caps() being called with socket lock being held or not,
we have to use sk_dst_set() instead of __sk_dst_set()

Note that I had to move the "np->dst_cookie = rt6_get_cookie(rt);"
in ip6_dst_store() before the sk_setup_caps(sk, dst) call.

This is because ip6_dst_store() can be called from process context,
without any lock held.

As soon as the dst is installed in sk->sk_dst_cache, dst can be freed
from another cpu doing a concurrent ip6_dst_store()

Doing the dst dereference before doing the install is needed to make
sure no use after free would trigger.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv6/tcp_ipv6.c')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6a50bb4a0dae..e7aab561b7b4 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -257,7 +257,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
 	sk->sk_gso_type = SKB_GSO_TCPV6;
-	__ip6_dst_store(sk, dst, NULL, NULL);
+	ip6_dst_store(sk, dst, NULL, NULL);
 
 	if (tcp_death_row.sysctl_tw_recycle &&
 	    !tp->rx_opt.ts_recent_stamp &&
@@ -1060,7 +1060,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 	 */
 
 	newsk->sk_gso_type = SKB_GSO_TCPV6;
-	__ip6_dst_store(newsk, dst, NULL, NULL);
+	ip6_dst_store(newsk, dst, NULL, NULL);
 	inet6_sk_rx_dst_set(newsk, skb);
 
 	newtcp6sk = (struct tcp6_sock *)newsk;
-- 
cgit v1.2.3


From 5037e9ef9454917b047f9f3a19b4dd179fbf7cd4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 14 Dec 2015 14:08:53 -0800
Subject: net: fix IP early demux races

David Wilder reported crashes caused by dst reuse.

<quote David>
  I am seeing a crash on a distro V4.2.3 kernel caused by a double
  release of a dst_entry.  In ipv4_dst_destroy() the call to
  list_empty() finds a poisoned next pointer, indicating the dst_entry
  has already been removed from the list and freed. The crash occurs
  18 to 24 hours into a run of a network stress exerciser.
</quote>

Thanks to his detailed report and analysis, we were able to understand
the core issue.

IP early demux can associate a dst to skb, after a lookup in TCP/UDP
sockets.

When socket cache is not properly set, we want to store into
sk->sk_dst_cache the dst for future IP early demux lookups,
by acquiring a stable refcount on the dst.

Problem is this acquisition is simply using an atomic_inc(),
which works well, unless the dst was queued for destruction from
dst_release() noticing dst refcount went to zero, if DST_NOCACHE
was set on dst.

We need to make sure current refcount is not zero before incrementing
it, or risk double free as David reported.

This patch, being a stable candidate, adds two new helpers, and use
them only from IP early demux problematic paths.

It might be possible to merge in net-next skb_dst_force() and
skb_dst_force_safe(), but I prefer having the smallest patch for stable
kernels : Maybe some skb_dst_force() callers do not expect skb->dst
can suddenly be cleared.

Can probably be backported back to linux-3.6 kernels

Reported-by: David J. Wilder <dwilder@us.ibm.com>
Tested-by: David J. Wilder <dwilder@us.ibm.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net/ipv6/tcp_ipv6.c')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e7aab561b7b4..6b8a8a9091fa 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -93,10 +93,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 
-	if (dst) {
+	if (dst && dst_hold_safe(dst)) {
 		const struct rt6_info *rt = (const struct rt6_info *)dst;
 
-		dst_hold(dst);
 		sk->sk_rx_dst = dst;
 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
 		inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
-- 
cgit v1.2.3