From 6ece90f9a13e2592cbd6634f74bcb306169b5ab6 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 29 Sep 2015 21:10:05 +0200
Subject: netfilter: fix Kconfig dependencies for nf_dup_ipv{4,6}

net/built-in.o: In function `nf_dup_ipv4': (.text+0xed24d): undefined reference to `nf_conntrack_untracked'
net/built-in.o: In function `nf_dup_ipv4': (.text+0xed267): undefined reference to `nf_conntrack_untracked'
net/built-in.o: In function `nf_dup_ipv6': (.text+0x158aef): undefined reference to `nf_conntrack_untracked'
net/built-in.o: In function `nf_dup_ipv6': (.text+0x158b09): undefined reference to `nf_conntrack_untracked'

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 690d27d3f2f9..a35584176535 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -75,6 +75,7 @@ endif # NF_TABLES
 
 config NF_DUP_IPV4
 	tristate "Netfilter IPv4 packet duplication to alternate destination"
+	depends on !NF_CONNTRACK || NF_CONNTRACK
 	help
 	  This option enables the nf_dup_ipv4 core, which duplicates an IPv4
 	  packet to be rerouted to another destination.
-- 
cgit v1.2.3


From cc4998febd567d1c671684abce5595344bd4e8b2 Mon Sep 17 00:00:00 2001
From: lucien <lucien.xin@gmail.com>
Date: Tue, 6 Oct 2015 21:03:07 +0800
Subject: netfilter: ipt_rpfilter: remove the nh_scope test in
 rpfilter_lookup_reverse

--accept-local  option works for res.type == RTN_LOCAL, which should be
from the local table, but there, the fib_info's nh->nh_scope =
RT_SCOPE_NOWHERE ( > RT_SCOPE_HOST). in fib_create_info().

	if (cfg->fc_scope == RT_SCOPE_HOST) {
		struct fib_nh *nh = fi->fib_nh;

		/* Local address is added. */
		if (nhs != 1 || nh->nh_gw)
			goto err_inval;
		nh->nh_scope = RT_SCOPE_NOWHERE;   <===
		nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
		err = -ENODEV;
		if (!nh->nh_dev)
			goto failure;

but in our rpfilter_lookup_reverse():

	if (dev_match || flags & XT_RPFILTER_LOOSE)
		return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;

if nh->nh_scope > RT_SCOPE_HOST, it will fail. --accept-local option
will never be passed.

it seems the test is bogus and can be removed to fix this issue.

	if (dev_match || flags & XT_RPFILTER_LOOSE)
		return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;

ipv6 does not have this issue.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/ipt_rpfilter.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 8618fd150c96..c4ffc9de1654 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -61,9 +61,7 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
 	if (FIB_RES_DEV(res) == dev)
 		dev_match = true;
 #endif
-	if (dev_match || flags & XT_RPFILTER_LOOSE)
-		return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;
-	return dev_match;
+	return dev_match || flags & XT_RPFILTER_LOOSE;
 }
 
 static bool rpfilter_is_local(const struct sk_buff *skb)
-- 
cgit v1.2.3


From ca064bd89363a6e7e71b1c5226ff1b718957a9d4 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 19 Oct 2015 10:30:05 +0200
Subject: xfrm: Fix pmtu discovery for local generated packets.

Commit 044a832a777 ("xfrm: Fix local error reporting crash
with interfamily tunnels") moved the setting of skb->protocol
behind the last access of the inner mode family to fix an
interfamily crash. Unfortunately now skb->protocol might not
be set at all, so we fail dispatch to the inner address family.
As a reault, the local error handler is not called and the
mtu value is not reported back to userspace.

We fix this by setting skb->protocol on message size errors
before we call xfrm_local_error.

Fixes: 044a832a7779c ("xfrm: Fix local error reporting crash with interfamily tunnels")
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/xfrm4_output.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net/ipv4')

diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 2878dbfffeb7..41a261355662 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -30,6 +30,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
 
 	mtu = dst_mtu(skb_dst(skb));
 	if (skb->len > mtu) {
+		skb->protocol = htons(ETH_P_IP);
+
 		if (skb->sk)
 			xfrm_local_error(skb, mtu);
 		else
-- 
cgit v1.2.3


From e2e8009ff72ad2a795b67785f3238af152146368 Mon Sep 17 00:00:00 2001
From: Renato Westphal <renatowestphal@gmail.com>
Date: Mon, 19 Oct 2015 18:51:34 -0200
Subject: tcp: remove improper preemption check in tcp_xmit_probe_skb()

Commit e520af48c7e5a introduced the following bug when setting the
TCP_REPAIR sockoption:

[ 2860.657036] BUG: using __this_cpu_add() in preemptible [00000000] code: daemon/12164
[ 2860.657045] caller is __this_cpu_preempt_check+0x13/0x20
[ 2860.657049] CPU: 1 PID: 12164 Comm: daemon Not tainted 4.2.3 #1
[ 2860.657051] Hardware name: Dell Inc. PowerEdge R210 II/0JP7TR, BIOS 2.0.5 03/13/2012
[ 2860.657054]  ffffffff81c7f071 ffff880231e9fdf8 ffffffff8185d765 0000000000000002
[ 2860.657058]  0000000000000001 ffff880231e9fe28 ffffffff8146ed91 ffff880231e9fe18
[ 2860.657062]  ffffffff81cd1a5d ffff88023534f200 ffff8800b9811000 ffff880231e9fe38
[ 2860.657065] Call Trace:
[ 2860.657072]  [<ffffffff8185d765>] dump_stack+0x4f/0x7b
[ 2860.657075]  [<ffffffff8146ed91>] check_preemption_disabled+0xe1/0xf0
[ 2860.657078]  [<ffffffff8146edd3>] __this_cpu_preempt_check+0x13/0x20
[ 2860.657082]  [<ffffffff817e0bc7>] tcp_xmit_probe_skb+0xc7/0x100
[ 2860.657085]  [<ffffffff817e1e2d>] tcp_send_window_probe+0x2d/0x30
[ 2860.657089]  [<ffffffff817d1d8c>] do_tcp_setsockopt.isra.29+0x74c/0x830
[ 2860.657093]  [<ffffffff817d1e9c>] tcp_setsockopt+0x2c/0x30
[ 2860.657097]  [<ffffffff81767b74>] sock_common_setsockopt+0x14/0x20
[ 2860.657100]  [<ffffffff817669e1>] SyS_setsockopt+0x71/0xc0
[ 2860.657104]  [<ffffffff81865172>] entry_SYSCALL_64_fastpath+0x16/0x75

Since tcp_xmit_probe_skb() can be called from process context, use
NET_INC_STATS() instead of NET_INC_STATS_BH().

Fixes: e520af48c7e5 ("tcp: add TCPWinProbe and TCPKeepAlive SNMP counters")
Signed-off-by: Renato Westphal <renatow@taghos.com.br>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1100ffe4a722..3dbee0d83b15 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3405,7 +3405,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
 	 */
 	tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
 	skb_mstamp_get(&skb->skb_mstamp);
-	NET_INC_STATS_BH(sock_net(sk), mib);
+	NET_INC_STATS(sock_net(sk), mib);
 	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
 }
 
-- 
cgit v1.2.3


From fc4099f17240767554ff3a73977acb78ef615404 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Thu, 22 Oct 2015 18:17:16 -0700
Subject: openvswitch: Fix egress tunnel info.

While transitioning to netdev based vport we broke OVS
feature which allows user to retrieve tunnel packet egress
information for lwtunnel devices.  Following patch fixes it
by introducing ndo operation to get the tunnel egress info.
Same ndo operation can be used for lwtunnel devices and compat
ovs-tnl-vport devices. So after adding such device operation
we can remove similar operation from ovs-vport.

Fixes: 614732eaa12d ("openvswitch: Use regular VXLAN net_device device").
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 46 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 9 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index bd0679d90519..614521437e30 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -498,10 +498,26 @@ static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
 					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 }
 
+static struct rtable *gre_get_rt(struct sk_buff *skb,
+				 struct net_device *dev,
+				 struct flowi4 *fl,
+				 const struct ip_tunnel_key *key)
+{
+	struct net *net = dev_net(dev);
+
+	memset(fl, 0, sizeof(*fl));
+	fl->daddr = key->u.ipv4.dst;
+	fl->saddr = key->u.ipv4.src;
+	fl->flowi4_tos = RT_TOS(key->tos);
+	fl->flowi4_mark = skb->mark;
+	fl->flowi4_proto = IPPROTO_GRE;
+
+	return ip_route_output_key(net, fl);
+}
+
 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_tunnel_info *tun_info;
-	struct net *net = dev_net(dev);
 	const struct ip_tunnel_key *key;
 	struct flowi4 fl;
 	struct rtable *rt;
@@ -516,14 +532,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto err_free_skb;
 
 	key = &tun_info->key;
-	memset(&fl, 0, sizeof(fl));
-	fl.daddr = key->u.ipv4.dst;
-	fl.saddr = key->u.ipv4.src;
-	fl.flowi4_tos = RT_TOS(key->tos);
-	fl.flowi4_mark = skb->mark;
-	fl.flowi4_proto = IPPROTO_GRE;
-
-	rt = ip_route_output_key(net, &fl);
+	rt = gre_get_rt(skb, dev, &fl, key);
 	if (IS_ERR(rt))
 		goto err_free_skb;
 
@@ -566,6 +575,24 @@ err_free_skb:
 	dev->stats.tx_dropped++;
 }
 
+static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+	struct ip_tunnel_info *info = skb_tunnel_info(skb);
+	struct rtable *rt;
+	struct flowi4 fl4;
+
+	if (ip_tunnel_info_af(info) != AF_INET)
+		return -EINVAL;
+
+	rt = gre_get_rt(skb, dev, &fl4, &info->key);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
+
+	ip_rt_put(rt);
+	info->key.u.ipv4.src = fl4.saddr;
+	return 0;
+}
+
 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
 			      struct net_device *dev)
 {
@@ -1023,6 +1050,7 @@ static const struct net_device_ops gre_tap_netdev_ops = {
 	.ndo_change_mtu		= ip_tunnel_change_mtu,
 	.ndo_get_stats64	= ip_tunnel_get_stats64,
 	.ndo_get_iflink		= ip_tunnel_get_iflink,
+	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
 };
 
 static void ipgre_tap_setup(struct net_device *dev)
-- 
cgit v1.2.3


From c80dbe04612986fd6104b4a1be21681b113b5ac9 Mon Sep 17 00:00:00 2001
From: Andrew Shewmaker <agshew@gmail.com>
Date: Sun, 18 Oct 2015 21:59:08 -0700
Subject: tcp: allow dctcp alpha to drop to zero

If alpha is strictly reduced by alpha >> dctcp_shift_g and if alpha is less
than 1 << dctcp_shift_g, then alpha may never reach zero. For example,
given shift_g=4 and alpha=15, alpha >> dctcp_shift_g yields 0 and alpha
remains 15. The effect isn't noticeable in this case below cwnd=137, but
could gradually drive uncongested flows with leftover alpha down to
cwnd=137. A larger dctcp_shift_g would have a greater effect.

This change causes alpha=15 to drop to 0 instead of being decrementing by 1
as it would when alpha=16. However, it requires one less conditional to
implement since it doesn't have to guard against subtracting 1 from 0U. A
decay of 15 is not unreasonable since an equal or greater amount occurs at
alpha >= 240.

Signed-off-by: Andrew G. Shewmaker <agshew@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_dctcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 7092a61c4dc8..7e538f71f5fb 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -209,7 +209,7 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
 
 		/* alpha = (1 - g) * alpha + g * F */
 
-		alpha -= alpha >> dctcp_shift_g;
+		alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g);
 		if (bytes_ecn) {
 			/* If dctcp_shift_g == 1, a 32bit value would overflow
 			 * after 8 Mbytes.
-- 
cgit v1.2.3