summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig7
-rw-r--r--net/ipv4/arp.c11
-rw-r--r--net/ipv4/cipso_ipv4.c64
-rw-r--r--net/ipv4/datagram.c6
-rw-r--r--net/ipv4/devinet.c415
-rw-r--r--net/ipv4/fib_frontend.c13
-rw-r--r--net/ipv4/fib_hash.c6
-rw-r--r--net/ipv4/fib_lookup.h3
-rw-r--r--net/ipv4/fib_rules.c2
-rw-r--r--net/ipv4/fib_semantics.c5
-rw-r--r--net/ipv4/fib_trie.c6
-rw-r--r--net/ipv4/icmp.c12
-rw-r--r--net/ipv4/igmp.c18
-rw-r--r--net/ipv4/inet_connection_sock.c4
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/ipmr.c23
-rw-r--r--net/ipv4/ipvs/Kconfig30
-rw-r--r--net/ipv4/netfilter/ip_tables.c81
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c15
-rw-r--r--net/ipv4/netfilter/nf_nat_ftp.c20
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c6
-rw-r--r--net/ipv4/proc.c23
-rw-r--r--net/ipv4/route.c89
-rw-r--r--net/ipv4/sysctl_net_ipv4.c6
-rw-r--r--net/ipv4/tcp.c8
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_cong.c40
-rw-r--r--net/ipv4/tcp_cubic.c2
-rw-r--r--net/ipv4/tcp_input.c10
-rw-r--r--net/ipv4/tcp_ipv4.c10
-rw-r--r--net/ipv4/tcp_probe.c5
-rw-r--r--net/ipv4/tcp_timer.c8
-rw-r--r--net/ipv4/udp.c246
-rw-r--r--net/ipv4/udp_impl.h6
-rw-r--r--net/ipv4/udplite.c7
-rw-r--r--net/ipv4/xfrm4_input.c6
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c2
37 files changed, 587 insertions, 634 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index c68196cc56ab..010fbb2d45e9 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -43,11 +43,11 @@ config IP_ADVANCED_ROUTER
asymmetric routing (packets from you to a host take a different path
than packets from that host to you) or if you operate a non-routing
host which has several IP addresses on different interfaces. To turn
- rp_filter off use:
+ rp_filter on use:
- echo 0 > /proc/sys/net/ipv4/conf/<device>/rp_filter
+ echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter
or
- echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter
+ echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter
If unsure, say N here.
@@ -577,6 +577,7 @@ config TCP_CONG_VENO
config TCP_CONG_YEAH
tristate "YeAH TCP"
depends on EXPERIMENTAL
+ select TCP_CONG_VEGAS
default n
---help---
YeAH-TCP is a sender-side high-speed enabled TCP congestion control
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7110779a0244..e00767e8ebd9 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -877,7 +877,7 @@ static int arp_process(struct sk_buff *skb)
n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
- if (ipv4_devconf.arp_accept) {
+ if (IPV4_DEVCONF_ALL(ARP_ACCEPT)) {
/* Unsolicited ARP is not accepted by default.
It is possible, that this option should be enabled for some
devices (strip is candidate)
@@ -987,11 +987,11 @@ static int arp_req_set(struct arpreq *r, struct net_device * dev)
return 0;
}
if (dev == NULL) {
- ipv4_devconf.proxy_arp = 1;
+ IPV4_DEVCONF_ALL(PROXY_ARP) = 1;
return 0;
}
if (__in_dev_get_rtnl(dev)) {
- __in_dev_get_rtnl(dev)->cnf.proxy_arp = 1;
+ IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, 1);
return 0;
}
return -ENXIO;
@@ -1093,11 +1093,12 @@ static int arp_req_delete(struct arpreq *r, struct net_device * dev)
return pneigh_delete(&arp_tbl, &ip, dev);
if (mask == 0) {
if (dev == NULL) {
- ipv4_devconf.proxy_arp = 0;
+ IPV4_DEVCONF_ALL(PROXY_ARP) = 0;
return 0;
}
if (__in_dev_get_rtnl(dev)) {
- __in_dev_get_rtnl(dev)->cnf.proxy_arp = 0;
+ IN_DEV_CONF_SET(__in_dev_get_rtnl(dev),
+ PROXY_ARP, 0);
return 0;
}
return -ENXIO;
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 86a2b52aad38..ab56a052ce31 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -45,6 +45,7 @@
#include <net/cipso_ipv4.h>
#include <asm/atomic.h>
#include <asm/bug.h>
+#include <asm/unaligned.h>
struct cipso_v4_domhsh_entry {
char *domain;
@@ -1000,7 +1001,7 @@ static int cipso_v4_map_cat_enum_valid(const struct cipso_v4_doi *doi_def,
return -EFAULT;
for (iter = 0; iter < enumcat_len; iter += 2) {
- cat = ntohs(*((__be16 *)&enumcat[iter]));
+ cat = ntohs(get_unaligned((__be16 *)&enumcat[iter]));
if (cat <= cat_prev)
return -EFAULT;
cat_prev = cat;
@@ -1068,8 +1069,8 @@ static int cipso_v4_map_cat_enum_ntoh(const struct cipso_v4_doi *doi_def,
for (iter = 0; iter < net_cat_len; iter += 2) {
ret_val = netlbl_secattr_catmap_setbit(secattr->mls_cat,
- ntohs(*((__be16 *)&net_cat[iter])),
- GFP_ATOMIC);
+ ntohs(get_unaligned((__be16 *)&net_cat[iter])),
+ GFP_ATOMIC);
if (ret_val != 0)
return ret_val;
}
@@ -1102,9 +1103,10 @@ static int cipso_v4_map_cat_rng_valid(const struct cipso_v4_doi *doi_def,
return -EFAULT;
for (iter = 0; iter < rngcat_len; iter += 4) {
- cat_high = ntohs(*((__be16 *)&rngcat[iter]));
+ cat_high = ntohs(get_unaligned((__be16 *)&rngcat[iter]));
if ((iter + 4) <= rngcat_len)
- cat_low = ntohs(*((__be16 *)&rngcat[iter + 2]));
+ cat_low = ntohs(
+ get_unaligned((__be16 *)&rngcat[iter + 2]));
else
cat_low = 0;
@@ -1201,9 +1203,10 @@ static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def,
u16 cat_high;
for (net_iter = 0; net_iter < net_cat_len; net_iter += 4) {
- cat_high = ntohs(*((__be16 *)&net_cat[net_iter]));
+ cat_high = ntohs(get_unaligned((__be16 *)&net_cat[net_iter]));
if ((net_iter + 4) <= net_cat_len)
- cat_low = ntohs(*((__be16 *)&net_cat[net_iter + 2]));
+ cat_low = ntohs(
+ get_unaligned((__be16 *)&net_cat[net_iter + 2]));
else
cat_low = 0;
@@ -1565,7 +1568,7 @@ int cipso_v4_validate(unsigned char **option)
}
rcu_read_lock();
- doi_def = cipso_v4_doi_search(ntohl(*((__be32 *)&opt[2])));
+ doi_def = cipso_v4_doi_search(ntohl(get_unaligned((__be32 *)&opt[2])));
if (doi_def == NULL) {
err_offset = 2;
goto validate_return_locked;
@@ -1709,22 +1712,22 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
}
/**
- * cipso_v4_socket_setattr - Add a CIPSO option to a socket
- * @sock: the socket
+ * cipso_v4_sock_setattr - Add a CIPSO option to a socket
+ * @sk: the socket
* @doi_def: the CIPSO DOI to use
* @secattr: the specific security attributes of the socket
*
* Description:
* Set the CIPSO option on the given socket using the DOI definition and
* security attributes passed to the function. This function requires
- * exclusive access to @sock->sk, which means it either needs to be in the
- * process of being created or locked via lock_sock(sock->sk). Returns zero on
- * success and negative values on failure.
+ * exclusive access to @sk, which means it either needs to be in the
+ * process of being created or locked. Returns zero on success and negative
+ * values on failure.
*
*/
-int cipso_v4_socket_setattr(const struct socket *sock,
- const struct cipso_v4_doi *doi_def,
- const struct netlbl_lsm_secattr *secattr)
+int cipso_v4_sock_setattr(struct sock *sk,
+ const struct cipso_v4_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr)
{
int ret_val = -EPERM;
u32 iter;
@@ -1732,7 +1735,6 @@ int cipso_v4_socket_setattr(const struct socket *sock,
u32 buf_len = 0;
u32 opt_len;
struct ip_options *opt = NULL;
- struct sock *sk;
struct inet_sock *sk_inet;
struct inet_connection_sock *sk_conn;
@@ -1740,7 +1742,6 @@ int cipso_v4_socket_setattr(const struct socket *sock,
* defined yet but it is not a problem as the only users of these
* "lite" PF_INET sockets are functions which do an accept() call
* afterwards so we will label the socket as part of the accept(). */
- sk = sock->sk;
if (sk == NULL)
return 0;
@@ -1858,7 +1859,7 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
if (ret_val == 0)
return ret_val;
- doi = ntohl(*(__be32 *)&cipso_ptr[2]);
+ doi = ntohl(get_unaligned((__be32 *)&cipso_ptr[2]));
rcu_read_lock();
doi_def = cipso_v4_doi_search(doi);
if (doi_def == NULL) {
@@ -1892,29 +1893,6 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
}
/**
- * cipso_v4_socket_getattr - Get the security attributes from a socket
- * @sock: the socket
- * @secattr: the security attributes
- *
- * Description:
- * Query @sock to see if there is a CIPSO option attached to the socket and if
- * there is return the CIPSO security attributes in @secattr. Returns zero on
- * success and negative values on failure.
- *
- */
-int cipso_v4_socket_getattr(const struct socket *sock,
- struct netlbl_lsm_secattr *secattr)
-{
- int ret_val;
-
- lock_sock(sock->sk);
- ret_val = cipso_v4_sock_getattr(sock->sk, secattr);
- release_sock(sock->sk);
-
- return ret_val;
-}
-
-/**
* cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option
* @skb: the packet
* @secattr: the security attributes
@@ -1936,7 +1914,7 @@ int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0)
return 0;
- doi = ntohl(*(__be32 *)&cipso_ptr[2]);
+ doi = ntohl(get_unaligned((__be32 *)&cipso_ptr[2]));
rcu_read_lock();
doi_def = cipso_v4_doi_search(doi);
if (doi_def == NULL)
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index dd02a45d0f67..0301dd468cf4 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -50,8 +50,12 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
RT_CONN_FLAGS(sk), oif,
sk->sk_protocol,
inet->sport, usin->sin_port, sk, 1);
- if (err)
+ if (err) {
+ if (err == -ENETUNREACH)
+ IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
return err;
+ }
+
if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) {
ip_rt_put(rt);
return -EACCES;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 7f95e6e9beeb..abf6352f990f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -64,21 +64,27 @@
#include <net/rtnetlink.h>
struct ipv4_devconf ipv4_devconf = {
- .accept_redirects = 1,
- .send_redirects = 1,
- .secure_redirects = 1,
- .shared_media = 1,
+ .data = {
+ [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
+ [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
+ [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
+ [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
+ },
};
static struct ipv4_devconf ipv4_devconf_dflt = {
- .accept_redirects = 1,
- .send_redirects = 1,
- .secure_redirects = 1,
- .shared_media = 1,
- .accept_source_route = 1,
+ .data = {
+ [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
+ [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
+ [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
+ [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
+ [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
+ },
};
-static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
+#define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr)
+
+static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
[IFA_LOCAL] = { .type = NLA_U32 },
[IFA_ADDRESS] = { .type = NLA_U32 },
[IFA_BROADCAST] = { .type = NLA_U32 },
@@ -141,7 +147,7 @@ void in_dev_finish_destroy(struct in_device *idev)
}
}
-struct in_device *inetdev_init(struct net_device *dev)
+static struct in_device *inetdev_init(struct net_device *dev)
{
struct in_device *in_dev;
@@ -321,12 +327,8 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
}
}
- if (destroy) {
+ if (destroy)
inet_free_ifa(ifa1);
-
- if (!in_dev->ifa_list)
- inetdev_destroy(in_dev);
- }
}
static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
@@ -399,12 +401,10 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
ASSERT_RTNL();
if (!in_dev) {
- in_dev = inetdev_init(dev);
- if (!in_dev) {
- inet_free_ifa(ifa);
- return -ENOBUFS;
- }
+ inet_free_ifa(ifa);
+ return -ENOBUFS;
}
+ ipv4_devconf_setall(in_dev);
if (ifa->ifa_dev != in_dev) {
BUG_TRAP(!ifa->ifa_dev);
in_dev_hold(in_dev);
@@ -514,13 +514,12 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
in_dev = __in_dev_get_rtnl(dev);
if (in_dev == NULL) {
- in_dev = inetdev_init(dev);
- if (in_dev == NULL) {
- err = -ENOBUFS;
- goto errout;
- }
+ err = -ENOBUFS;
+ goto errout;
}
+ ipv4_devconf_setall(in_dev);
+
ifa = inet_alloc_ifa();
if (ifa == NULL) {
/*
@@ -1057,11 +1056,12 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
if (!in_dev) {
if (event == NETDEV_REGISTER) {
in_dev = inetdev_init(dev);
- if (!in_dev)
- panic("devinet: Failed to create loopback\n");
if (dev == &loopback_dev) {
- in_dev->cnf.no_xfrm = 1;
- in_dev->cnf.no_policy = 1;
+ if (!in_dev)
+ panic("devinet: "
+ "Failed to create loopback\n");
+ IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
+ IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
}
}
goto out;
@@ -1237,13 +1237,98 @@ errout:
#ifdef CONFIG_SYSCTL
+static void devinet_copy_dflt_conf(int i)
+{
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+ for_each_netdev(dev) {
+ struct in_device *in_dev;
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(dev);
+ if (in_dev && !test_bit(i, in_dev->cnf.state))
+ in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i];
+ rcu_read_unlock();
+ }
+ read_unlock(&dev_base_lock);
+}
+
+static int devinet_conf_proc(ctl_table *ctl, int write,
+ struct file* filp, void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+ if (write) {
+ struct ipv4_devconf *cnf = ctl->extra1;
+ int i = (int *)ctl->data - cnf->data;
+
+ set_bit(i, cnf->state);
+
+ if (cnf == &ipv4_devconf_dflt)
+ devinet_copy_dflt_conf(i);
+ }
+
+ return ret;
+}
+
+static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
+ void __user *oldval, size_t __user *oldlenp,
+ void __user *newval, size_t newlen)
+{
+ struct ipv4_devconf *cnf;
+ int *valp = table->data;
+ int new;
+ int i;
+
+ if (!newval || !newlen)
+ return 0;
+
+ if (newlen != sizeof(int))
+ return -EINVAL;
+
+ if (get_user(new, (int __user *)newval))
+ return -EFAULT;
+
+ if (new == *valp)
+ return 0;
+
+ if (oldval && oldlenp) {
+ size_t len;
+
+ if (get_user(len, oldlenp))
+ return -EFAULT;
+
+ if (len) {
+ if (len > table->maxlen)
+ len = table->maxlen;
+ if (copy_to_user(oldval, valp, len))
+ return -EFAULT;
+ if (put_user(len, oldlenp))
+ return -EFAULT;
+ }
+ }
+
+ *valp = new;
+
+ cnf = table->extra1;
+ i = (int *)table->data - cnf->data;
+
+ set_bit(i, cnf->state);
+
+ if (cnf == &ipv4_devconf_dflt)
+ devinet_copy_dflt_conf(i);
+
+ return 1;
+}
+
void inet_forward_change(void)
{
struct net_device *dev;
- int on = ipv4_devconf.forwarding;
+ int on = IPV4_DEVCONF_ALL(FORWARDING);
- ipv4_devconf.accept_redirects = !on;
- ipv4_devconf_dflt.forwarding = on;
+ IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
+ IPV4_DEVCONF_DFLT(FORWARDING) = on;
read_lock(&dev_base_lock);
for_each_netdev(dev) {
@@ -1251,7 +1336,7 @@ void inet_forward_change(void)
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
if (in_dev)
- in_dev->cnf.forwarding = on;
+ IN_DEV_CONF_SET(in_dev, FORWARDING, on);
rcu_read_unlock();
}
read_unlock(&dev_base_lock);
@@ -1268,9 +1353,9 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
if (write && *valp != val) {
- if (valp == &ipv4_devconf.forwarding)
+ if (valp == &IPV4_DEVCONF_ALL(FORWARDING))
inet_forward_change();
- else if (valp != &ipv4_devconf_dflt.forwarding)
+ else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING))
rt_cache_flush(0);
}
@@ -1295,42 +1380,43 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
- int *valp = table->data;
- int new;
+ int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
+ newval, newlen);
- if (!newval || !newlen)
- return 0;
+ if (ret == 1)
+ rt_cache_flush(0);
- if (newlen != sizeof(int))
- return -EINVAL;
+ return ret;
+}
- if (get_user(new, (int __user *)newval))
- return -EFAULT;
- if (new == *valp)
- return 0;
+#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
+ { \
+ .ctl_name = NET_IPV4_CONF_ ## attr, \
+ .procname = name, \
+ .data = ipv4_devconf.data + \
+ NET_IPV4_CONF_ ## attr - 1, \
+ .maxlen = sizeof(int), \
+ .mode = mval, \
+ .proc_handler = proc, \
+ .strategy = sysctl, \
+ .extra1 = &ipv4_devconf, \
+ }
- if (oldval && oldlenp) {
- size_t len;
+#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
+ DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
+ devinet_conf_sysctl)
- if (get_user(len, oldlenp))
- return -EFAULT;
+#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
+ DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
+ devinet_conf_sysctl)
- if (len) {
- if (len > table->maxlen)
- len = table->maxlen;
- if (copy_to_user(oldval, valp, len))
- return -EFAULT;
- if (put_user(len, oldlenp))
- return -EFAULT;
- }
- }
-
- *valp = new;
- rt_cache_flush(0);
- return 1;
-}
+#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
+ DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
+#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
+ DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
+ ipv4_doint_and_flush_strategy)
static struct devinet_sysctl_table {
struct ctl_table_header *sysctl_header;
@@ -1341,178 +1427,34 @@ static struct devinet_sysctl_table {
ctl_table devinet_root_dir[2];
} devinet_sysctl = {
.devinet_vars = {
- {
- .ctl_name = NET_IPV4_CONF_FORWARDING,
- .procname = "forwarding",
- .data = &ipv4_devconf.forwarding,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &devinet_sysctl_forward,
- },
- {
- .ctl_name = NET_IPV4_CONF_MC_FORWARDING,
- .procname = "mc_forwarding",
- .data = &ipv4_devconf.mc_forwarding,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_CONF_ACCEPT_REDIRECTS,
- .procname = "accept_redirects",
- .data = &ipv4_devconf.accept_redirects,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_CONF_SECURE_REDIRECTS,
- .procname = "secure_redirects",
- .data = &ipv4_devconf.secure_redirects,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_CONF_SHARED_MEDIA,
- .procname = "shared_media",
- .data = &ipv4_devconf.shared_media,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_CONF_RP_FILTER,
- .procname = "rp_filter",
- .data = &ipv4_devconf.rp_filter,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_CONF_SEND_REDIRECTS,
- .procname = "send_redirects",
- .data = &ipv4_devconf.send_redirects,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE,
- .procname = "accept_source_route",
- .data = &ipv4_devconf.accept_source_route,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_CONF_PROXY_ARP,
- .procname = "proxy_arp",
- .data = &ipv4_devconf.proxy_arp,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_CONF_MEDIUM_ID,
- .procname = "medium_id",
- .data = &ipv4_devconf.medium_id,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_CONF_BOOTP_RELAY,
- .procname = "bootp_relay",
- .data = &ipv4_devconf.bootp_relay,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_CONF_LOG_MARTIANS,
- .procname = "log_martians",
- .data = &ipv4_devconf.log_martians,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_CONF_TAG,
- .procname = "tag",
- .data = &ipv4_devconf.tag,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_CONF_ARPFILTER,
- .procname = "arp_filter",
- .data = &ipv4_devconf.arp_filter,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_CONF_ARP_ANNOUNCE,
- .procname = "arp_announce",
- .data = &ipv4_devconf.arp_announce,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_CONF_ARP_IGNORE,
- .procname = "arp_ignore",
- .data = &ipv4_devconf.arp_ignore,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_CONF_ARP_ACCEPT,
- .procname = "arp_accept",
- .data = &ipv4_devconf.arp_accept,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_CONF_NOXFRM,
- .procname = "disable_xfrm",
- .data = &ipv4_devconf.no_xfrm,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &ipv4_doint_and_flush,
- .strategy = &ipv4_doint_and_flush_strategy,
- },
- {
- .ctl_name = NET_IPV4_CONF_NOPOLICY,
- .procname = "disable_policy",
- .data = &ipv4_devconf.no_policy,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &ipv4_doint_and_flush,
- .strategy = &ipv4_doint_and_flush_strategy,
- },
- {
- .ctl_name = NET_IPV4_CONF_FORCE_IGMP_VERSION,
- .procname = "force_igmp_version",
- .data = &ipv4_devconf.force_igmp_version,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &ipv4_doint_and_flush,
- .strategy = &ipv4_doint_and_flush_strategy,
- },
- {
- .ctl_name = NET_IPV4_CONF_PROMOTE_SECONDARIES,
- .procname = "promote_secondaries",
- .data = &ipv4_devconf.promote_secondaries,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &ipv4_doint_and_flush,
- .strategy = &ipv4_doint_and_flush_strategy,
- },
+ DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
+ devinet_sysctl_forward,
+ devinet_conf_sysctl),
+ DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
+
+ DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
+ DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
+ DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
+ DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
+ DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
+ DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
+ "accept_source_route"),
+ DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
+ DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
+ DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
+ DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
+ DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
+ DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
+ DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
+ DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
+ DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
+
+ DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
+ DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
+ DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
+ "force_igmp_version"),
+ DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
+ "promote_secondaries"),
},
.devinet_dev = {
{
@@ -1561,6 +1503,7 @@ static void devinet_sysctl_register(struct in_device *in_dev,
return;
for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
+ t->devinet_vars[i].extra1 = p;
}
if (dev) {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 837f2957fa83..311d633f7f39 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -250,8 +250,6 @@ e_inval:
return -EINVAL;
}
-#ifndef CONFIG_IP_NOSIOCRT
-
static inline __be32 sk_extract_addr(struct sockaddr *addr)
{
return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
@@ -443,16 +441,7 @@ int ip_rt_ioctl(unsigned int cmd, void __user *arg)
return -EINVAL;
}
-#else
-
-int ip_rt_ioctl(unsigned int cmd, void *arg)
-{
- return -EINVAL;
-}
-
-#endif
-
-struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
+const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
[RTA_DST] = { .type = NLA_U32 },
[RTA_SRC] = { .type = NLA_U32 },
[RTA_IIF] = { .type = NLA_U32 },
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 9cfecf1215c9..07e843a47dde 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -456,6 +456,8 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
fib_release_info(fi_drop);
if (state & FA_S_ACCESSED)
rt_cache_flush(-1);
+ rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id,
+ &cfg->fc_nlinfo, NLM_F_REPLACE);
return 0;
}
@@ -523,7 +525,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
rt_cache_flush(-1);
rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
- &cfg->fc_nlinfo);
+ &cfg->fc_nlinfo, 0);
return 0;
out_free_new_fa:
@@ -589,7 +591,7 @@ static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
fa = fa_to_delete;
rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len,
- tb->tb_id, &cfg->fc_nlinfo);
+ tb->tb_id, &cfg->fc_nlinfo, 0);
kill_fn = 0;
write_lock_bh(&fib_hash_lock);
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 0e8b70bad4e1..eef9eec17e0c 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -30,7 +30,8 @@ extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
int dst_len, u8 tos, struct fib_info *fi,
unsigned int);
extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
- int dst_len, u32 tb_id, struct nl_info *info);
+ int dst_len, u32 tb_id, struct nl_info *info,
+ unsigned int nlm_flags);
extern struct fib_alias *fib_find_alias(struct list_head *fah,
u8 tos, u32 prio);
extern int fib_detect_death(struct fib_info *fi, int order,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 33083ad52e9f..2a947840210e 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -169,7 +169,7 @@ static struct fib_table *fib_empty_table(void)
return NULL;
}
-static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = {
+static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
FRA_GENERIC_POLICY,
[FRA_FLOW] = { .type = NLA_U32 },
};
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 406ea7050aed..bb94550d95c3 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -301,7 +301,8 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
}
void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
- int dst_len, u32 tb_id, struct nl_info *info)
+ int dst_len, u32 tb_id, struct nl_info *info,
+ unsigned int nlm_flags)
{
struct sk_buff *skb;
u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
@@ -313,7 +314,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
err = fib_dump_info(skb, info->pid, seq, event, tb_id,
fa->fa_type, fa->fa_scope, key, dst_len,
- fa->fa_tos, fa->fa_info, 0);
+ fa->fa_tos, fa->fa_info, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 9be7da7c3a8f..30e332ade61b 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1226,6 +1226,8 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
fib_release_info(fi_drop);
if (state & FA_S_ACCESSED)
rt_cache_flush(-1);
+ rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
+ tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
goto succeeded;
}
@@ -1278,7 +1280,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
rt_cache_flush(-1);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
- &cfg->fc_nlinfo);
+ &cfg->fc_nlinfo, 0);
succeeded:
return 0;
@@ -1624,7 +1626,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
fa = fa_to_delete;
rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
- &cfg->fc_nlinfo);
+ &cfg->fc_nlinfo, 0);
l = fib_find_node(t, key);
li = find_leaf_info(l, plen);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index d38cbba92a4d..02a899bec196 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -514,9 +514,15 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
saddr = iph->daddr;
if (!(rt->rt_flags & RTCF_LOCAL)) {
- if (sysctl_icmp_errors_use_inbound_ifaddr)
- saddr = inet_select_addr(skb_in->dev, 0, RT_SCOPE_LINK);
- else
+ struct net_device *dev = NULL;
+
+ if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr)
+ dev = dev_get_by_index(rt->fl.iif);
+
+ if (dev) {
+ saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
+ dev_put(dev);
+ } else
saddr = 0;
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f4dd47453108..a646409c2d06 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -128,14 +128,16 @@
* contradict to specs provided this delay is small enough.
*/
-#define IGMP_V1_SEEN(in_dev) (ipv4_devconf.force_igmp_version == 1 || \
- (in_dev)->cnf.force_igmp_version == 1 || \
- ((in_dev)->mr_v1_seen && \
- time_before(jiffies, (in_dev)->mr_v1_seen)))
-#define IGMP_V2_SEEN(in_dev) (ipv4_devconf.force_igmp_version == 2 || \
- (in_dev)->cnf.force_igmp_version == 2 || \
- ((in_dev)->mr_v2_seen && \
- time_before(jiffies, (in_dev)->mr_v2_seen)))
+#define IGMP_V1_SEEN(in_dev) \
+ (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 1 || \
+ IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \
+ ((in_dev)->mr_v1_seen && \
+ time_before(jiffies, (in_dev)->mr_v1_seen)))
+#define IGMP_V2_SEEN(in_dev) \
+ (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 2 || \
+ IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \
+ ((in_dev)->mr_v2_seen && \
+ time_before(jiffies, (in_dev)->mr_v2_seen)))
static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im);
static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 43fb1600f1f0..fbe7714f21d0 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -31,10 +31,8 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
/*
* This array holds the first and last local port number.
- * For high-usage systems, use sysctl to change this to
- * 32768-61000
*/
-int sysctl_local_port_range[2] = { 1024, 4999 };
+int sysctl_local_port_range[2] = { 32768, 61000 };
int inet_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d6427d918512..34ea4547ebbe 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1352,7 +1352,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
}
{
- struct flowi fl = { .nl_u = { .ip4_u =
+ struct flowi fl = { .oif = arg->bound_dev_if,
+ .nl_u = { .ip4_u =
{ .daddr = daddr,
.saddr = rt->rt_spec_dst,
.tos = RT_TOS(ip_hdr(skb)->tos) } },
@@ -1376,6 +1377,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
inet->tos = ip_hdr(skb)->tos;
sk->sk_priority = skb->priority;
sk->sk_protocol = ip_hdr(skb)->protocol;
+ sk->sk_bound_dev_if = arg->bound_dev_if;
ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
&ipc, rt, MSG_DONTWAIT);
if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 0ebae413ae87..d96582acdf69 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -152,9 +152,11 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
dev->flags |= IFF_MULTICAST;
in_dev = __in_dev_get_rtnl(dev);
- if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
+ if (in_dev == NULL)
goto failure;
- in_dev->cnf.rp_filter = 0;
+
+ ipv4_devconf_setall(in_dev);
+ IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
if (dev_open(dev))
goto failure;
@@ -218,10 +220,15 @@ static struct net_device *ipmr_reg_vif(void)
}
dev->iflink = 0;
- if ((in_dev = inetdev_init(dev)) == NULL)
+ rcu_read_lock();
+ if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
+ rcu_read_unlock();
goto failure;
+ }
- in_dev->cnf.rp_filter = 0;
+ ipv4_devconf_setall(in_dev);
+ IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
+ rcu_read_unlock();
if (dev_open(dev))
goto failure;
@@ -281,7 +288,7 @@ static int vif_delete(int vifi)
dev_set_allmulti(dev, -1);
if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
- in_dev->cnf.mc_forwarding--;
+ IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
ip_rt_multicast_event(in_dev);
}
@@ -426,7 +433,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
return -EADDRNOTAVAIL;
- in_dev->cnf.mc_forwarding++;
+ IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
dev_set_allmulti(dev, +1);
ip_rt_multicast_event(in_dev);
@@ -841,7 +848,7 @@ static void mrtsock_destruct(struct sock *sk)
{
rtnl_lock();
if (sk == mroute_socket) {
- ipv4_devconf.mc_forwarding--;
+ IPV4_DEVCONF_ALL(MC_FORWARDING)--;
write_lock_bh(&mrt_lock);
mroute_socket=NULL;
@@ -890,7 +897,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
mroute_socket=sk;
write_unlock_bh(&mrt_lock);
- ipv4_devconf.mc_forwarding++;
+ IPV4_DEVCONF_ALL(MC_FORWARDING)++;
}
rtnl_unlock();
return ret;
diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig
index 891b9355cf96..09d0c3f35669 100644
--- a/net/ipv4/ipvs/Kconfig
+++ b/net/ipv4/ipvs/Kconfig
@@ -1,10 +1,7 @@
#
# IP Virtual Server configuration
#
-menu "IP: Virtual Server Configuration"
- depends on NETFILTER
-
-config IP_VS
+menuconfig IP_VS
tristate "IP virtual server support (EXPERIMENTAL)"
depends on NETFILTER
---help---
@@ -25,9 +22,10 @@ config IP_VS
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
+if IP_VS
+
config IP_VS_DEBUG
bool "IP virtual server debugging"
- depends on IP_VS
---help---
Say Y here if you want to get additional messages useful in
debugging the IP virtual server code. You can change the debug
@@ -35,7 +33,6 @@ config IP_VS_DEBUG
config IP_VS_TAB_BITS
int "IPVS connection table size (the Nth power of 2)"
- depends on IP_VS
default "12"
---help---
The IPVS connection hash table uses the chaining scheme to handle
@@ -61,42 +58,35 @@ config IP_VS_TAB_BITS
needed for your box.
comment "IPVS transport protocol load balancing support"
- depends on IP_VS
config IP_VS_PROTO_TCP
bool "TCP load balancing support"
- depends on IP_VS
---help---
This option enables support for load balancing TCP transport
protocol. Say Y if unsure.
config IP_VS_PROTO_UDP
bool "UDP load balancing support"
- depends on IP_VS
---help---
This option enables support for load balancing UDP transport
protocol. Say Y if unsure.
config IP_VS_PROTO_ESP
bool "ESP load balancing support"
- depends on IP_VS
---help---
This option enables support for load balancing ESP (Encapsulation
Security Payload) transport protocol. Say Y if unsure.
config IP_VS_PROTO_AH
bool "AH load balancing support"
- depends on IP_VS
---help---
This option enables support for load balancing AH (Authentication
Header) transport protocol. Say Y if unsure.
comment "IPVS scheduler"
- depends on IP_VS
config IP_VS_RR
tristate "round-robin scheduling"
- depends on IP_VS
---help---
The robin-robin scheduling algorithm simply directs network
connections to different real servers in a round-robin manner.
@@ -106,7 +96,6 @@ config IP_VS_RR
config IP_VS_WRR
tristate "weighted round-robin scheduling"
- depends on IP_VS
---help---
The weighted robin-robin scheduling algorithm directs network
connections to different real servers based on server weights
@@ -120,7 +109,6 @@ config IP_VS_WRR
config IP_VS_LC
tristate "least-connection scheduling"
- depends on IP_VS
---help---
The least-connection scheduling algorithm directs network
connections to the server with the least number of active
@@ -131,7 +119,6 @@ config IP_VS_LC
config IP_VS_WLC
tristate "weighted least-connection scheduling"
- depends on IP_VS
---help---
The weighted least-connection scheduling algorithm directs network
connections to the server with the least active connections
@@ -142,7 +129,6 @@ config IP_VS_WLC
config IP_VS_LBLC
tristate "locality-based least-connection scheduling"
- depends on IP_VS
---help---
The locality-based least-connection scheduling algorithm is for
destination IP load balancing. It is usually used in cache cluster.
@@ -157,7 +143,6 @@ config IP_VS_LBLC
config IP_VS_LBLCR
tristate "locality-based least-connection with replication scheduling"
- depends on IP_VS
---help---
The locality-based least-connection with replication scheduling
algorithm is also for destination IP load balancing. It is
@@ -176,7 +161,6 @@ config IP_VS_LBLCR
config IP_VS_DH
tristate "destination hashing scheduling"
- depends on IP_VS
---help---
The destination hashing scheduling algorithm assigns network
connections to the servers through looking up a statically assigned
@@ -187,7 +171,6 @@ config IP_VS_DH
config IP_VS_SH
tristate "source hashing scheduling"
- depends on IP_VS
---help---
The source hashing scheduling algorithm assigns network
connections to the servers through looking up a statically assigned
@@ -198,7 +181,6 @@ config IP_VS_SH
config IP_VS_SED
tristate "shortest expected delay scheduling"
- depends on IP_VS
---help---
The shortest expected delay scheduling algorithm assigns network
connections to the server with the shortest expected delay. The
@@ -212,7 +194,6 @@ config IP_VS_SED
config IP_VS_NQ
tristate "never queue scheduling"
- depends on IP_VS
---help---
The never queue scheduling algorithm adopts a two-speed model.
When there is an idle server available, the job will be sent to
@@ -225,11 +206,10 @@ config IP_VS_NQ
module, choose M here. If unsure, say N.
comment 'IPVS application helper'
- depends on IP_VS
config IP_VS_FTP
tristate "FTP protocol helper"
- depends on IP_VS && IP_VS_PROTO_TCP
+ depends on IP_VS_PROTO_TCP
---help---
FTP is a protocol that transfers IP address and/or port number in
the payload. In the virtual server via Network Address Translation,
@@ -241,4 +221,4 @@ config IP_VS_FTP
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
-endmenu
+endif # IP_VS
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e3f83bf160d9..9bacf1a03630 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -499,7 +499,8 @@ check_entry(struct ipt_entry *e, const char *name)
}
static inline int check_match(struct ipt_entry_match *m, const char *name,
- const struct ipt_ip *ip, unsigned int hookmask)
+ const struct ipt_ip *ip, unsigned int hookmask,
+ unsigned int *i)
{
struct xt_match *match;
int ret;
@@ -515,6 +516,8 @@ static inline int check_match(struct ipt_entry_match *m, const char *name,
m->u.kernel.match->name);
ret = -EINVAL;
}
+ if (!ret)
+ (*i)++;
return ret;
}
@@ -537,11 +540,10 @@ find_check_match(struct ipt_entry_match *m,
}
m->u.kernel.match = match;
- ret = check_match(m, name, ip, hookmask);
+ ret = check_match(m, name, ip, hookmask, i);
if (ret)
goto err;
- (*i)++;
return 0;
err:
module_put(m->u.kernel.match->me);
@@ -1425,7 +1427,7 @@ out:
}
static inline int
-compat_check_calc_match(struct ipt_entry_match *m,
+compat_find_calc_match(struct ipt_entry_match *m,
const char *name,
const struct ipt_ip *ip,
unsigned int hookmask,
@@ -1449,6 +1451,31 @@ compat_check_calc_match(struct ipt_entry_match *m,
}
static inline int
+compat_release_match(struct ipt_entry_match *m, unsigned int *i)
+{
+ if (i && (*i)-- == 0)
+ return 1;
+
+ module_put(m->u.kernel.match->me);
+ return 0;
+}
+
+static inline int
+compat_release_entry(struct ipt_entry *e, unsigned int *i)
+{
+ struct ipt_entry_target *t;
+
+ if (i && (*i)-- == 0)
+ return 1;
+
+ /* Cleanup all matches */
+ IPT_MATCH_ITERATE(e, compat_release_match, NULL);
+ t = ipt_get_target(e);
+ module_put(t->u.kernel.target->me);
+ return 0;
+}
+
+static inline int
check_compat_entry_size_and_hooks(struct ipt_entry *e,
struct xt_table_info *newinfo,
unsigned int *size,
@@ -1485,10 +1512,10 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
off = 0;
entry_offset = (void *)e - (void *)base;
j = 0;
- ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip,
+ ret = IPT_MATCH_ITERATE(e, compat_find_calc_match, name, &e->ip,
e->comefrom, &off, &j);
if (ret != 0)
- goto cleanup_matches;
+ goto release_matches;
t = ipt_get_target(e);
target = try_then_request_module(xt_find_target(AF_INET,
@@ -1499,7 +1526,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
t->u.user.name);
ret = target ? PTR_ERR(target) : -ENOENT;
- goto cleanup_matches;
+ goto release_matches;
}
t->u.kernel.target = target;
@@ -1526,8 +1553,8 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
out:
module_put(t->u.kernel.target->me);
-cleanup_matches:
- IPT_MATCH_ITERATE(e, cleanup_match, &j);
+release_matches:
+ IPT_MATCH_ITERATE(e, compat_release_match, &j);
return ret;
}
@@ -1574,15 +1601,26 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
return ret;
}
-static inline int compat_check_entry(struct ipt_entry *e, const char *name)
+static inline int compat_check_entry(struct ipt_entry *e, const char *name,
+ unsigned int *i)
{
- int ret;
+ int j, ret;
- ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom);
+ j = 0;
+ ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
if (ret)
- return ret;
+ goto cleanup_matches;
+
+ ret = check_target(e, name);
+ if (ret)
+ goto cleanup_matches;
- return check_target(e, name);
+ (*i)++;
+ return 0;
+
+ cleanup_matches:
+ IPT_MATCH_ITERATE(e, cleanup_match, &j);
+ return ret;
}
static int
@@ -1673,10 +1711,17 @@ translate_compat_table(const char *name,
if (!mark_source_chains(newinfo, valid_hooks, entry1))
goto free_newinfo;
+ i = 0;
ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
- name);
- if (ret)
- goto free_newinfo;
+ name, &i);
+ if (ret) {
+ j -= i;
+ IPT_ENTRY_ITERATE_CONTINUE(entry1, newinfo->size, i,
+ compat_release_entry, &j);
+ IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
+ xt_free_table_info(newinfo);
+ return ret;
+ }
/* And one copy for every other CPU */
for_each_possible_cpu(i)
@@ -1691,7 +1736,7 @@ translate_compat_table(const char *name,
free_newinfo:
xt_free_table_info(newinfo);
out:
- IPT_ENTRY_ITERATE(entry0, total_size, cleanup_entry, &j);
+ IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
return ret;
out_unlock:
compat_flush_offsets();
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 0654eaae70c9..6dc72a815f77 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -133,6 +133,7 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
struct nf_conn_help *help;
+ struct nf_conntrack_helper *helper;
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(*pskb, &ctinfo);
@@ -140,12 +141,14 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
return NF_ACCEPT;
help = nfct_help(ct);
- if (!help || !help->helper)
+ if (!help)
return NF_ACCEPT;
-
- return help->helper->help(pskb,
- skb_network_offset(*pskb) + ip_hdrlen(*pskb),
- ct, ctinfo);
+ /* rcu_read_lock()ed by nf_hook_slow */
+ helper = rcu_dereference(help->helper);
+ if (!helper)
+ return NF_ACCEPT;
+ return helper->help(pskb, skb_network_offset(*pskb) + ip_hdrlen(*pskb),
+ ct, ctinfo);
}
static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
@@ -154,12 +157,10 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
-#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
/* Previously seen (loopback)? Ignore. Do this before
fragment check. */
if ((*pskb)->nfct)
return NF_ACCEPT;
-#endif
/* Gather fragments. */
if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) {
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index 751b59801755..e6bc8e5a72f1 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -40,8 +40,7 @@ mangle_rfc959_packet(struct sk_buff **pskb,
unsigned int matchoff,
unsigned int matchlen,
struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- u32 *seq)
+ enum ip_conntrack_info ctinfo)
{
char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")];
@@ -50,7 +49,6 @@ mangle_rfc959_packet(struct sk_buff **pskb,
DEBUGP("calling nf_nat_mangle_tcp_packet\n");
- *seq += strlen(buffer) - matchlen;
return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
matchlen, buffer, strlen(buffer));
}
@@ -63,8 +61,7 @@ mangle_eprt_packet(struct sk_buff **pskb,
unsigned int matchoff,
unsigned int matchlen,
struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- u32 *seq)
+ enum ip_conntrack_info ctinfo)
{
char buffer[sizeof("|1|255.255.255.255|65535|")];
@@ -72,7 +69,6 @@ mangle_eprt_packet(struct sk_buff **pskb,
DEBUGP("calling nf_nat_mangle_tcp_packet\n");
- *seq += strlen(buffer) - matchlen;
return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
matchlen, buffer, strlen(buffer));
}
@@ -85,8 +81,7 @@ mangle_epsv_packet(struct sk_buff **pskb,
unsigned int matchoff,
unsigned int matchlen,
struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- u32 *seq)
+ enum ip_conntrack_info ctinfo)
{
char buffer[sizeof("|||65535|")];
@@ -94,14 +89,13 @@ mangle_epsv_packet(struct sk_buff **pskb,
DEBUGP("calling nf_nat_mangle_tcp_packet\n");
- *seq += strlen(buffer) - matchlen;
return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
matchlen, buffer, strlen(buffer));
}
static int (*mangle[])(struct sk_buff **, __be32, u_int16_t,
unsigned int, unsigned int, struct nf_conn *,
- enum ip_conntrack_info, u32 *seq)
+ enum ip_conntrack_info)
= {
[NF_CT_FTP_PORT] = mangle_rfc959_packet,
[NF_CT_FTP_PASV] = mangle_rfc959_packet,
@@ -116,8 +110,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb,
enum nf_ct_ftp_type type,
unsigned int matchoff,
unsigned int matchlen,
- struct nf_conntrack_expect *exp,
- u32 *seq)
+ struct nf_conntrack_expect *exp)
{
__be32 newip;
u_int16_t port;
@@ -145,8 +138,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb,
if (port == 0)
return NF_DROP;
- if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo,
- seq)) {
+ if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo)) {
nf_conntrack_unexpect_related(exp);
return NF_DROP;
}
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index fcebc968d37f..c5d2a2d690b8 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -455,9 +455,9 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct,
if (idx > 0 &&
get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
(ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
- set_h225_addr_hook(pskb, data, 0, &taddr[0],
- &ct->tuplehash[!dir].tuple.dst.u3,
- info->sig_port[!dir]);
+ set_h225_addr(pskb, data, 0, &taddr[0],
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ info->sig_port[!dir]);
}
} else {
nf_conntrack_unexpect_related(exp);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 37ab5802ca08..3b690cf2a4ee 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -109,6 +109,17 @@ static const struct snmp_mib snmp4_ipstats_list[] = {
SNMP_MIB_SENTINEL
};
+/* Following RFC4293 items are displayed in /proc/net/netstat */
+static const struct snmp_mib snmp4_ipextstats_list[] = {
+ SNMP_MIB_ITEM("InNoRoutes", IPSTATS_MIB_INNOROUTES),
+ SNMP_MIB_ITEM("InTruncatedPkts", IPSTATS_MIB_INTRUNCATEDPKTS),
+ SNMP_MIB_ITEM("InMcastPkts", IPSTATS_MIB_INMCASTPKTS),
+ SNMP_MIB_ITEM("OutMcastPkts", IPSTATS_MIB_OUTMCASTPKTS),
+ SNMP_MIB_ITEM("InBcastPkts", IPSTATS_MIB_INBCASTPKTS),
+ SNMP_MIB_ITEM("OutBcastPkts", IPSTATS_MIB_OUTBCASTPKTS),
+ SNMP_MIB_SENTINEL
+};
+
static const struct snmp_mib snmp4_icmp_list[] = {
SNMP_MIB_ITEM("InMsgs", ICMP_MIB_INMSGS),
SNMP_MIB_ITEM("InErrors", ICMP_MIB_INERRORS),
@@ -249,7 +260,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
seq_printf(seq, "\nIp: %d %d",
- ipv4_devconf.forwarding ? 1 : 2, sysctl_ip_default_ttl);
+ IPV4_DEVCONF_ALL(FORWARDING) ? 1 : 2, sysctl_ip_default_ttl);
for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
@@ -338,6 +349,16 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
snmp_fold_field((void **)net_statistics,
snmp4_net_list[i].entry));
+ seq_puts(seq, "\nIpExt:");
+ for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
+ seq_printf(seq, " %s", snmp4_ipextstats_list[i].name);
+
+ seq_puts(seq, "\nIpExt:");
+ for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
+ seq_printf(seq, " %lu",
+ snmp_fold_field((void **)ip_statistics,
+ snmp4_ipextstats_list[i].entry));
+
seq_putc(seq, '\n');
return 0;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cb76e3c725a0..29ca63e81ced 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1636,7 +1636,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
atomic_set(&rth->u.dst.__refcnt, 1);
rth->u.dst.flags= DST_HOST;
- if (in_dev->cnf.no_policy)
+ if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
rth->u.dst.flags |= DST_NOPOLICY;
rth->fl.fl4_dst = daddr;
rth->rt_dst = daddr;
@@ -1778,9 +1778,9 @@ static inline int __mkroute_input(struct sk_buff *skb,
if (res->fi->fib_nhs > 1)
rth->u.dst.flags |= DST_BALANCED;
#endif
- if (in_dev->cnf.no_policy)
+ if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
rth->u.dst.flags |= DST_NOPOLICY;
- if (out_dev->cnf.no_xfrm)
+ if (IN_DEV_CONF_GET(out_dev, NOXFRM))
rth->u.dst.flags |= DST_NOXFRM;
rth->fl.fl4_dst = daddr;
rth->rt_dst = daddr;
@@ -2021,7 +2021,7 @@ local_input:
atomic_set(&rth->u.dst.__refcnt, 1);
rth->u.dst.flags= DST_HOST;
- if (in_dev->cnf.no_policy)
+ if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
rth->u.dst.flags |= DST_NOPOLICY;
rth->fl.fl4_dst = daddr;
rth->rt_dst = daddr;
@@ -2218,9 +2218,9 @@ static inline int __mkroute_output(struct rtable **result,
rth->u.dst.flags |= DST_BALANCED;
}
#endif
- if (in_dev->cnf.no_xfrm)
+ if (IN_DEV_CONF_GET(in_dev, NOXFRM))
rth->u.dst.flags |= DST_NOXFRM;
- if (in_dev->cnf.no_policy)
+ if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
rth->u.dst.flags |= DST_NOPOLICY;
rth->fl.fl4_dst = oldflp->fl4_dst;
@@ -2396,7 +2396,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
dev_out = ip_dev_find(oldflp->fl4_src);
- if ((dev_out == NULL) && !(sysctl_ip_nonlocal_bind))
+ if (dev_out == NULL)
goto out;
/* I removed check for oif == dev_out->oif here.
@@ -2407,7 +2407,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
of another iface. --ANK
*/
- if (dev_out && oldflp->oif == 0
+ if (oldflp->oif == 0
&& (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
/* Special hack: user can direct multicasts
and limited broadcast via necessary interface
@@ -2598,6 +2598,69 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
EXPORT_SYMBOL_GPL(__ip_route_output_key);
+static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
+{
+}
+
+static struct dst_ops ipv4_dst_blackhole_ops = {
+ .family = AF_INET,
+ .protocol = __constant_htons(ETH_P_IP),
+ .destroy = ipv4_dst_destroy,
+ .check = ipv4_dst_check,
+ .update_pmtu = ipv4_rt_blackhole_update_pmtu,
+ .entry_size = sizeof(struct rtable),
+};
+
+
+static int ipv4_blackhole_output(struct sk_buff *skb)
+{
+ kfree_skb(skb);
+ return 0;
+}
+
+static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock *sk)
+{
+ struct rtable *ort = *rp;
+ struct rtable *rt = (struct rtable *)
+ dst_alloc(&ipv4_dst_blackhole_ops);
+
+ if (rt) {
+ struct dst_entry *new = &rt->u.dst;
+
+ atomic_set(&new->__refcnt, 1);
+ new->__use = 1;
+ new->input = ipv4_blackhole_output;
+ new->output = ipv4_blackhole_output;
+ memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
+
+ new->dev = ort->u.dst.dev;
+ if (new->dev)
+ dev_hold(new->dev);
+
+ rt->fl = ort->fl;
+
+ rt->idev = ort->idev;
+ if (rt->idev)
+ in_dev_hold(rt->idev);
+ rt->rt_flags = ort->rt_flags;
+ rt->rt_type = ort->rt_type;
+ rt->rt_dst = ort->rt_dst;
+ rt->rt_src = ort->rt_src;
+ rt->rt_iif = ort->rt_iif;
+ rt->rt_gateway = ort->rt_gateway;
+ rt->rt_spec_dst = ort->rt_spec_dst;
+ rt->peer = ort->peer;
+ if (rt->peer)
+ atomic_inc(&rt->peer->refcnt);
+
+ dst_free(new);
+ }
+
+ dst_release(&(*rp)->u.dst);
+ *rp = rt;
+ return (rt ? 0 : -ENOMEM);
+}
+
int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags)
{
int err;
@@ -2610,7 +2673,11 @@ int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk,
flp->fl4_src = (*rp)->rt_src;
if (!flp->fl4_dst)
flp->fl4_dst = (*rp)->rt_dst;
- return xfrm_lookup((struct dst_entry **)rp, flp, sk, flags);
+ err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, flags);
+ if (err == -EREMOTE)
+ err = ipv4_dst_blackhole(rp, flp, sk);
+
+ return err;
}
return 0;
@@ -2692,7 +2759,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
__be32 dst = rt->rt_dst;
if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
- ipv4_devconf.mc_forwarding) {
+ IPV4_DEVCONF_ALL(MC_FORWARDING)) {
int err = ipmr_get_route(skb, r, nowait);
if (err <= 0) {
if (!nowait) {
@@ -3139,6 +3206,8 @@ int __init ip_rt_init(void)
kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+ ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
+
rt_hash_table = (struct rt_hash_bucket *)
alloc_large_system_hash("IP route cache",
sizeof(struct rt_hash_bucket),
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 6817d6485df5..53ef0f4bbdaa 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -37,12 +37,12 @@ static
int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- int val = ipv4_devconf.forwarding;
+ int val = IPV4_DEVCONF_ALL(FORWARDING);
int ret;
ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
- if (write && ipv4_devconf.forwarding != val)
+ if (write && IPV4_DEVCONF_ALL(FORWARDING) != val)
inet_forward_change();
return ret;
@@ -222,7 +222,7 @@ ctl_table ipv4_table[] = {
{
.ctl_name = NET_IPV4_FORWARD,
.procname = "ip_forward",
- .data = &ipv4_devconf.forwarding,
+ .data = &IPV4_DEVCONF_ALL(FORWARDING),
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &ipv4_sysctl_forward,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bd4c295f5d79..cd3c7e95de9e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1674,9 +1674,8 @@ adjudge_to_death:
}
if (sk->sk_state != TCP_CLOSE) {
sk_stream_mem_reclaim(sk);
- if (atomic_read(sk->sk_prot->orphan_count) > sysctl_tcp_max_orphans ||
- (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
- atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
+ if (tcp_too_many_orphans(sk,
+ atomic_read(sk->sk_prot->orphan_count))) {
if (net_ratelimit())
printk(KERN_INFO "TCP: too many of orphaned "
"sockets\n");
@@ -2465,13 +2464,10 @@ void __init tcp_init(void)
order++)
;
if (order >= 4) {
- sysctl_local_port_range[0] = 32768;
- sysctl_local_port_range[1] = 61000;
tcp_death_row.sysctl_max_tw_buckets = 180000;
sysctl_tcp_max_orphans = 4096 << (order - 4);
sysctl_max_syn_backlog = 1024;
} else if (order < 3) {
- sysctl_local_port_range[0] = 1024 * (3 - order);
tcp_death_row.sysctl_max_tw_buckets >>= (3 - order);
sysctl_tcp_max_orphans >>= (3 - order);
sysctl_max_syn_backlog = 128;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 281c9f913257..dd9ef65ad3ff 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -29,7 +29,7 @@ static int fast_convergence = 1;
static int max_increment = 16;
static int low_window = 14;
static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
-static int initial_ssthresh = 100;
+static int initial_ssthresh;
static int smooth_part = 20;
module_param(fast_convergence, int, 0644);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 86b26539e54b..1260e52ad772 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -276,30 +276,34 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
/*
- * Slow start (exponential increase) with
- * RFC3742 Limited Slow Start (fast linear increase) support.
+ * Slow start is used when congestion window is less than slow start
+ * threshold. This version implements the basic RFC2581 version
+ * and optionally supports:
+ * RFC3742 Limited Slow Start - growth limited to max_ssthresh
+ * RFC3465 Appropriate Byte Counting - growth limited by bytes acknowledged
*/
void tcp_slow_start(struct tcp_sock *tp)
{
- int cnt = 0;
-
- if (sysctl_tcp_abc) {
- /* RFC3465: Slow Start
- * TCP sender SHOULD increase cwnd by the number of
- * previously unacknowledged bytes ACKed by each incoming
- * acknowledgment, provided the increase is not more than L
- */
- if (tp->bytes_acked < tp->mss_cache)
- return;
- }
+ int cnt; /* increase in packets */
+
+ /* RFC3465: ABC Slow start
+ * Increase only after a full MSS of bytes is acked
+ *
+ * TCP sender SHOULD increase cwnd by the number of
+ * previously unacknowledged bytes ACKed by each incoming
+ * acknowledgment, provided the increase is not more than L
+ */
+ if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache)
+ return;
- if (sysctl_tcp_max_ssthresh > 0 &&
- tp->snd_cwnd > sysctl_tcp_max_ssthresh)
- cnt += sysctl_tcp_max_ssthresh>>1;
+ if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh)
+ cnt = sysctl_tcp_max_ssthresh >> 1; /* limited slow start */
else
- cnt += tp->snd_cwnd;
+ cnt = tp->snd_cwnd; /* exponential increase */
- /* RFC3465: We MAY increase by 2 if discovered delayed ack */
+ /* RFC3465: ABC
+ * We MAY increase by 2 if discovered delayed ack
+ */
if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache)
cnt <<= 1;
tp->bytes_acked = 0;
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 14224487b16b..ebfaac2f9f46 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -29,7 +29,7 @@
static int fast_convergence __read_mostly = 1;
static int max_increment __read_mostly = 16;
static int beta __read_mostly = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
-static int initial_ssthresh __read_mostly = 100;
+static int initial_ssthresh __read_mostly;
static int bic_scale __read_mostly = 41;
static int tcp_friendliness __read_mostly = 1;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7641b2761a14..d6d0f9b6cdc6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1501,6 +1501,8 @@ void tcp_enter_loss(struct sock *sk, int how)
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
TCP_ECN_queue_cwr(tp);
+ /* Abort FRTO algorithm if one is in progress */
+ tp->frto_counter = 0;
clear_all_retrans_hints(tp);
}
@@ -2035,7 +2037,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
- tp->left_out = tp->sacked_out;
+ tcp_sync_left_out(tp);
if (tp->retrans_out == 0)
tp->retrans_stamp = 0;
@@ -2405,8 +2407,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
struct sk_buff *skb;
__u32 now = tcp_time_stamp;
int acked = 0;
+ int prior_packets = tp->packets_out;
__s32 seq_rtt = -1;
- u32 pkts_acked = 0;
ktime_t last_ackt = ktime_set(0,0);
while ((skb = tcp_write_queue_head(sk)) &&
@@ -2435,7 +2437,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
*/
if (!(scb->flags & TCPCB_FLAG_SYN)) {
acked |= FLAG_DATA_ACKED;
- ++pkts_acked;
} else {
acked |= FLAG_SYN_ACKED;
tp->retrans_stamp = 0;
@@ -2479,6 +2480,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
}
if (acked&FLAG_ACKED) {
+ u32 pkts_acked = prior_packets - tp->packets_out;
const struct tcp_congestion_ops *ca_ops
= inet_csk(sk)->icsk_ca_ops;
@@ -2608,6 +2610,7 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
{
tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
tp->snd_cwnd_cnt = 0;
+ TCP_ECN_queue_cwr(tp);
tcp_moderate_cwnd(tp);
}
@@ -2929,6 +2932,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
opt_rx->sack_ok) {
TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
}
+ break;
#ifdef CONFIG_TCP_MD5SIG
case TCPOPT_MD5SIG:
/*
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5a3e7f839fc5..354721d67f69 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -192,8 +192,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
IPPROTO_TCP,
inet->sport, usin->sin_port, sk, 1);
- if (tmp < 0)
+ if (tmp < 0) {
+ if (tmp == -ENETUNREACH)
+ IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
return tmp;
+ }
if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
ip_rt_put(rt);
@@ -702,6 +705,8 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
ip_hdr(skb)->saddr, /* XXX */
arg.iov[0].iov_len, IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
+ if (twsk)
+ arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if;
ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
@@ -873,6 +878,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
kfree(newkey);
return -ENOMEM;
}
+ sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
}
if (tcp_alloc_md5sig_pool() == NULL) {
kfree(newkey);
@@ -1002,7 +1008,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
return -EINVAL;
tp->md5sig_info = p;
-
+ sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
}
newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 3938d5dbdf20..d9323dfff826 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -63,6 +63,9 @@ struct {
* FIXME: causes an extra copy
*/
static void printl(const char *fmt, ...)
+ __attribute__ ((format (printf, 1, 2)));
+
+static void printl(const char *fmt, ...)
{
va_list args;
int len;
@@ -95,7 +98,7 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
/* Only update if port matches */
if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port)
&& (full || tp->snd_cwnd != tcpw.lastcwnd)) {
- printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u\n",
+ printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u %u\n",
NIPQUAD(inet->saddr), ntohs(inet->sport),
NIPQUAD(inet->daddr), ntohs(inet->dport),
skb->len, tp->snd_nxt, tp->snd_una,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 2ca97b20929d..e9b151b3a598 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -78,9 +78,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)
if (sk->sk_err_soft)
orphans <<= 1;
- if (orphans >= sysctl_tcp_max_orphans ||
- (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
- atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
+ if (tcp_too_many_orphans(sk, orphans)) {
if (net_ratelimit())
printk(KERN_INFO "Out of socket memory\n");
@@ -294,9 +292,9 @@ static void tcp_retransmit_timer(struct sock *sk)
* we cannot allow such beasts to hang infinitely.
*/
#ifdef TCP_DEBUG
- if (net_ratelimit()) {
+ if (1) {
struct inet_sock *inet = inet_sk(sk);
- printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n",
+ LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n",
NIPQUAD(inet->daddr), ntohs(inet->dport),
inet->num, tp->snd_una, tp->snd_nxt);
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4c7e95fa090d..facb7e29304e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -114,36 +114,14 @@ DEFINE_RWLOCK(udp_hash_lock);
static int udp_port_rover;
-/*
- * Note about this hash function :
- * Typical use is probably daddr = 0, only dport is going to vary hash
- */
-static inline unsigned int udp_hash_port(__u16 port)
-{
- return port;
-}
-
-static inline int __udp_lib_port_inuse(unsigned int hash, int port,
- const struct sock *this_sk,
- struct hlist_head udptable[],
- const struct udp_get_port_ops *ops)
+static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[])
{
struct sock *sk;
struct hlist_node *node;
- struct inet_sock *inet;
- sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) {
- if (sk->sk_hash != hash)
- continue;
- inet = inet_sk(sk);
- if (inet->num != port)
- continue;
- if (this_sk) {
- if (ops->saddr_cmp(sk, this_sk))
- return 1;
- } else if (ops->saddr_any(sk))
+ sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
+ if (sk->sk_hash == num)
return 1;
- }
return 0;
}
@@ -154,16 +132,16 @@ static inline int __udp_lib_port_inuse(unsigned int hash, int port,
* @snum: port number to look up
* @udptable: hash list table, must be of UDP_HTABLE_SIZE
* @port_rover: pointer to record of last unallocated port
- * @ops: AF-dependent address operations
+ * @saddr_comp: AF-dependent comparison of bound local IP addresses
*/
int __udp_lib_get_port(struct sock *sk, unsigned short snum,
struct hlist_head udptable[], int *port_rover,
- const struct udp_get_port_ops *ops)
+ int (*saddr_comp)(const struct sock *sk1,
+ const struct sock *sk2 ) )
{
struct hlist_node *node;
struct hlist_head *head;
struct sock *sk2;
- unsigned int hash;
int error = 1;
write_lock_bh(&udp_hash_lock);
@@ -178,8 +156,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
int size;
- hash = ops->hash_port_and_rcv_saddr(result, sk);
- head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
+ head = &udptable[result & (UDP_HTABLE_SIZE - 1)];
if (hlist_empty(head)) {
if (result > sysctl_local_port_range[1])
result = sysctl_local_port_range[0] +
@@ -204,16 +181,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
result = sysctl_local_port_range[0]
+ ((result - sysctl_local_port_range[0]) &
(UDP_HTABLE_SIZE - 1));
- hash = udp_hash_port(result);
- if (__udp_lib_port_inuse(hash, result,
- NULL, udptable, ops))
- continue;
- if (ops->saddr_any(sk))
- break;
-
- hash = ops->hash_port_and_rcv_saddr(result, sk);
- if (! __udp_lib_port_inuse(hash, result,
- sk, udptable, ops))
+ if (! __udp_lib_lport_inuse(result, udptable))
break;
}
if (i >= (1 << 16) / UDP_HTABLE_SIZE)
@@ -221,40 +189,21 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
gotit:
*port_rover = snum = result;
} else {
- hash = udp_hash_port(snum);
- head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
+ head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
sk_for_each(sk2, node, head)
- if (sk2->sk_hash == hash &&
- sk2 != sk &&
- inet_sk(sk2)->num == snum &&
- (!sk2->sk_reuse || !sk->sk_reuse) &&
- (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
- sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
- ops->saddr_cmp(sk, sk2))
+ if (sk2->sk_hash == snum &&
+ sk2 != sk &&
+ (!sk2->sk_reuse || !sk->sk_reuse) &&
+ (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
+ || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+ (*saddr_comp)(sk, sk2) )
goto fail;
-
- if (!ops->saddr_any(sk)) {
- hash = ops->hash_port_and_rcv_saddr(snum, sk);
- head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
-
- sk_for_each(sk2, node, head)
- if (sk2->sk_hash == hash &&
- sk2 != sk &&
- inet_sk(sk2)->num == snum &&
- (!sk2->sk_reuse || !sk->sk_reuse) &&
- (!sk2->sk_bound_dev_if ||
- !sk->sk_bound_dev_if ||
- sk2->sk_bound_dev_if ==
- sk->sk_bound_dev_if) &&
- ops->saddr_cmp(sk, sk2))
- goto fail;
- }
}
inet_sk(sk)->num = snum;
- sk->sk_hash = hash;
+ sk->sk_hash = snum;
if (sk_unhashed(sk)) {
- head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
+ head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
sk_add_node(sk, head);
sock_prot_inc_use(sk->sk_prot);
}
@@ -265,12 +214,12 @@ fail:
}
int udp_get_port(struct sock *sk, unsigned short snum,
- const struct udp_get_port_ops *ops)
+ int (*scmp)(const struct sock *, const struct sock *))
{
- return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, ops);
+ return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp);
}
-static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
{
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
@@ -279,33 +228,9 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
inet1->rcv_saddr == inet2->rcv_saddr ));
}
-static int ipv4_rcv_saddr_any(const struct sock *sk)
-{
- return !inet_sk(sk)->rcv_saddr;
-}
-
-static inline unsigned int ipv4_hash_port_and_addr(__u16 port, __be32 addr)
-{
- addr ^= addr >> 16;
- addr ^= addr >> 8;
- return port ^ addr;
-}
-
-static unsigned int ipv4_hash_port_and_rcv_saddr(__u16 port,
- const struct sock *sk)
-{
- return ipv4_hash_port_and_addr(port, inet_sk(sk)->rcv_saddr);
-}
-
-const struct udp_get_port_ops udp_ipv4_ops = {
- .saddr_cmp = ipv4_rcv_saddr_equal,
- .saddr_any = ipv4_rcv_saddr_any,
- .hash_port_and_rcv_saddr = ipv4_hash_port_and_rcv_saddr,
-};
-
static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
{
- return udp_get_port(sk, snum, &udp_ipv4_ops);
+ return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
}
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
@@ -317,77 +242,63 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
{
struct sock *sk, *result = NULL;
struct hlist_node *node;
- unsigned int hash, hashwild;
- int score, best = -1, hport = ntohs(dport);
-
- hash = ipv4_hash_port_and_addr(hport, daddr);
- hashwild = udp_hash_port(hport);
+ unsigned short hnum = ntohs(dport);
+ int badness = -1;
read_lock(&udp_hash_lock);
-
-lookup:
-
- sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) {
+ sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
struct inet_sock *inet = inet_sk(sk);
- if (sk->sk_hash != hash || ipv6_only_sock(sk) ||
- inet->num != hport)
- continue;
-
- score = (sk->sk_family == PF_INET ? 1 : 0);
- if (inet->rcv_saddr) {
- if (inet->rcv_saddr != daddr)
- continue;
- score+=2;
- }
- if (inet->daddr) {
- if (inet->daddr != saddr)
- continue;
- score+=2;
- }
- if (inet->dport) {
- if (inet->dport != sport)
- continue;
- score+=2;
- }
- if (sk->sk_bound_dev_if) {
- if (sk->sk_bound_dev_if != dif)
- continue;
- score+=2;
- }
- if (score == 9) {
- result = sk;
- goto found;
- } else if (score > best) {
- result = sk;
- best = score;
+ if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) {
+ int score = (sk->sk_family == PF_INET ? 1 : 0);
+ if (inet->rcv_saddr) {
+ if (inet->rcv_saddr != daddr)
+ continue;
+ score+=2;
+ }
+ if (inet->daddr) {
+ if (inet->daddr != saddr)
+ continue;
+ score+=2;
+ }
+ if (inet->dport) {
+ if (inet->dport != sport)
+ continue;
+ score+=2;
+ }
+ if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if != dif)
+ continue;
+ score+=2;
+ }
+ if (score == 9) {
+ result = sk;
+ break;
+ } else if (score > badness) {
+ result = sk;
+ badness = score;
+ }
}
}
-
- if (hash != hashwild) {
- hash = hashwild;
- goto lookup;
- }
-found:
if (result)
sock_hold(result);
read_unlock(&udp_hash_lock);
return result;
}
-static inline struct sock *udp_v4_mcast_next(struct sock *sk, unsigned int hnum,
- int hport, __be32 loc_addr,
+static inline struct sock *udp_v4_mcast_next(struct sock *sk,
+ __be16 loc_port, __be32 loc_addr,
__be16 rmt_port, __be32 rmt_addr,
int dif)
{
struct hlist_node *node;
struct sock *s = sk;
+ unsigned short hnum = ntohs(loc_port);
sk_for_each_from(s, node) {
struct inet_sock *inet = inet_sk(s);
if (s->sk_hash != hnum ||
- inet->num != hport ||
(inet->daddr && inet->daddr != rmt_addr) ||
(inet->dport != rmt_port && inet->dport) ||
(inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
@@ -722,8 +633,11 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
.dport = dport } } };
security_sk_classify_flow(sk, &fl);
err = ip_route_output_flow(&rt, &fl, sk, 1);
- if (err)
+ if (err) {
+ if (err == -ENETUNREACH)
+ IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
goto out;
+ }
err = -EACCES;
if ((rt->rt_flags & RTCF_BROADCAST) &&
@@ -1218,45 +1132,29 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
__be32 saddr, __be32 daddr,
struct hlist_head udptable[])
{
- struct sock *sk, *skw, *sknext;
+ struct sock *sk;
int dif;
- int hport = ntohs(uh->dest);
- unsigned int hash = ipv4_hash_port_and_addr(hport, daddr);
- unsigned int hashwild = udp_hash_port(hport);
-
- dif = skb->dev->ifindex;
read_lock(&udp_hash_lock);
-
- sk = sk_head(&udptable[hash & (UDP_HTABLE_SIZE - 1)]);
- skw = sk_head(&udptable[hashwild & (UDP_HTABLE_SIZE - 1)]);
-
- sk = udp_v4_mcast_next(sk, hash, hport, daddr, uh->source, saddr, dif);
- if (!sk) {
- hash = hashwild;
- sk = udp_v4_mcast_next(skw, hash, hport, daddr, uh->source,
- saddr, dif);
- }
+ sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+ dif = skb->dev->ifindex;
+ sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
if (sk) {
+ struct sock *sknext = NULL;
+
do {
struct sk_buff *skb1 = skb;
- sknext = udp_v4_mcast_next(sk_next(sk), hash, hport,
- daddr, uh->source, saddr, dif);
- if (!sknext && hash != hashwild) {
- hash = hashwild;
- sknext = udp_v4_mcast_next(skw, hash, hport,
- daddr, uh->source, saddr, dif);
- }
+
+ sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
+ uh->source, saddr, dif);
if (sknext)
skb1 = skb_clone(skb, GFP_ATOMIC);
if (skb1) {
int ret = udp_queue_rcv_skb(sk, skb1);
if (ret > 0)
- /*
- * we should probably re-process
- * instead of dropping packets here.
- */
+ /* we should probably re-process instead
+ * of dropping packets here. */
kfree_skb(skb1);
}
sk = sknext;
@@ -1343,7 +1241,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest,
- skb->dev->ifindex, udptable);
+ skb->dev->ifindex, udptable );
if (sk != NULL) {
int ret = udp_queue_rcv_skb(sk, skb);
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 06d94195e644..820a477cfaa6 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -5,14 +5,14 @@
#include <net/protocol.h>
#include <net/inet_common.h>
-extern const struct udp_get_port_ops udp_ipv4_ops;
-
extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int );
extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []);
extern int __udp_lib_get_port(struct sock *sk, unsigned short snum,
struct hlist_head udptable[], int *port_rover,
- const struct udp_get_port_ops *ops);
+ int (*)(const struct sock*,const struct sock*));
+extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *);
+
extern int udp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 3653b32dce2d..f34fd686a8f1 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -19,15 +19,14 @@ struct hlist_head udplite_hash[UDP_HTABLE_SIZE];
static int udplite_port_rover;
int udplite_get_port(struct sock *sk, unsigned short p,
- const struct udp_get_port_ops *ops)
+ int (*c)(const struct sock *, const struct sock *))
{
- return __udp_lib_get_port(sk, p, udplite_hash,
- &udplite_port_rover, ops);
+ return __udp_lib_get_port(sk, p, udplite_hash, &udplite_port_rover, c);
}
static int udplite_v4_get_port(struct sock *sk, unsigned short snum)
{
- return udplite_get_port(sk, snum, &udp_ipv4_ops);
+ return udplite_get_port(sk, snum, ipv4_rcv_saddr_equal);
}
static int udplite_rcv(struct sk_buff *skb)
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 5ceca951d73f..fa1902dc81b8 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -139,10 +139,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
nf_reset(skb);
if (decaps) {
- if (!(skb->dev->flags&IFF_LOOPBACK)) {
- dst_release(skb->dst);
- skb->dst = NULL;
- }
+ dst_release(skb->dst);
+ skb->dst = NULL;
netif_rx(skb);
return 0;
} else {
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index a2f2e6a5ec5d..9963700e74c1 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -85,6 +85,8 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->saddr = x->props.saddr.a4;
top_iph->daddr = x->id.daddr.a4;
+ skb->protocol = htons(ETH_P_IP);
+
memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
return 0;
}