diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-20 18:58:50 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-20 18:58:50 -0800 |
commit | a0b1c42951dd06ec83cc1bc2c9788131d9fefcd8 (patch) | |
tree | a572f1523cf904c93020c9cdb32f3bc84ec3ac16 /net/ipv4/devinet.c | |
parent | 8ec4942212a6d337982967778a3dc3b60aea782e (diff) | |
parent | ecd9883724b78cc72ed92c98bcb1a46c764fff21 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking update from David Miller:
1) Checkpoint/restarted TCP sockets now can properly propagate the TCP
timestamp offset. From Andrey Vagin.
2) VMWARE VM VSOCK layer, from Andy King.
3) Much improved support for virtual functions and SR-IOV in bnx2x,
from Ariel ELior.
4) All protocols on ipv4 and ipv6 are now network namespace aware, and
all the compatability checks for initial-namespace-only protocols is
removed. Thanks to Tom Parkin for helping deal with the last major
holdout, L2TP.
5) IPV6 support in netpoll and network namespace support in pktgen,
from Cong Wang.
6) Multiple Registration Protocol (MRP) and Multiple VLAN Registration
Protocol (MVRP) support, from David Ward.
7) Compute packet lengths more accurately in the packet scheduler, from
Eric Dumazet.
8) Use per-task page fragment allocator in skb_append_datato_frags(),
also from Eric Dumazet.
9) Add support for connection tracking labels in netfilter, from
Florian Westphal.
10) Fix default multicast group joining on ipv6, and add anti-spoofing
checks to 6to4 and 6rd. From Hannes Frederic Sowa.
11) Make ipv4/ipv6 fragmentation memory limits more reasonable in modern
times, rearrange inet frag datastructures for better cacheline
locality, and move more operations outside of locking. From Jesper
Dangaard Brouer.
12) Instead of strict master <--> slave relationships, allow arbitrary
scenerios with "upper device lists". From Jiri Pirko.
13) Improve rate limiting accuracy in TBF and act_police, also from Jiri
Pirko.
14) Add a BPF filter netfilter match target, from Willem de Bruijn.
15) Orphan and delete a bunch of pre-historic networking drivers from
Paul Gortmaker.
16) Add TSO support for GRE tunnels, from Pravin B SHelar. Although
this still needs some minor bug fixing before it's %100 correct in
all cases.
17) Handle unresolved IPSEC states like ARP, with a resolution packet
queue. From Steffen Klassert.
18) Remove TCP Appropriate Byte Count support (ABC), from Stephen
Hemminger. This was long overdue.
19) Support SO_REUSEPORT, from Tom Herbert.
20) Allow locking a socket BPF filter, so that it cannot change after a
process drops capabilities.
21) Add VLAN filtering to bridge, from Vlad Yasevich.
22) Bring ipv6 on-par with ipv4 and do not cache neighbour entries in
the ipv6 routes, from YOSHIFUJI Hideaki.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1538 commits)
ipv6: fix race condition regarding dst->expires and dst->from.
net: fix a wrong assignment in skb_split()
ip_gre: remove an extra dst_release()
ppp: set qdisc_tx_busylock to avoid LOCKDEP splat
atl1c: restore buffer state
net: fix a build failure when !CONFIG_PROC_FS
net: ipv4: fix waring -Wunused-variable
net: proc: fix build failed when procfs is not configured
Revert "xen: netback: remove redundant xenvif_put"
net: move procfs code to net/core/net-procfs.c
qmi_wwan, cdc-ether: add ADU960S
bonding: set sysfs device_type to 'bond'
bonding: fix bond_release_all inconsistencies
b44: use netdev_alloc_skb_ip_align()
xen: netback: remove redundant xenvif_put
net: fec: Do a sanity check on the gpio number
ip_gre: propogate target device GSO capability to the tunnel device
ip_gre: allow CSUM capable devices to handle packets
bonding: Fix initialize after use for 3ad machine state spinlock
bonding: Fix race condition between bond_enslave() and bond_3ad_update_lacp_rate()
...
Diffstat (limited to 'net/ipv4/devinet.c')
-rw-r--r-- | net/ipv4/devinet.c | 215 |
1 files changed, 210 insertions, 5 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a8e4f2665d5e..5281314886c1 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -63,6 +63,7 @@ #include <net/ip_fib.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> +#include <net/addrconf.h> #include "fib_lookup.h" @@ -93,6 +94,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_ADDRESS] = { .type = NLA_U32 }, [IFA_BROADCAST] = { .type = NLA_U32 }, [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, + [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, }; #define IN4_ADDR_HSIZE_SHIFT 8 @@ -417,6 +419,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); } +static void check_lifetime(struct work_struct *work); + +static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime); + static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, u32 portid) { @@ -462,6 +468,9 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, inet_hash_insert(dev_net(in_dev->dev), ifa); + cancel_delayed_work(&check_lifetime_work); + schedule_delayed_work(&check_lifetime_work, 0); + /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ @@ -573,7 +582,107 @@ errout: return err; } -static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) +#define INFINITY_LIFE_TIME 0xFFFFFFFF + +static void check_lifetime(struct work_struct *work) +{ + unsigned long now, next, next_sec, next_sched; + struct in_ifaddr *ifa; + struct hlist_node *node; + int i; + + now = jiffies; + next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); + + rcu_read_lock(); + for (i = 0; i < IN4_ADDR_HSIZE; i++) { + hlist_for_each_entry_rcu(ifa, node, + &inet_addr_lst[i], hash) { + unsigned long age; + + if (ifa->ifa_flags & IFA_F_PERMANENT) + continue; + + /* We try to batch several events at once. */ + age = (now - ifa->ifa_tstamp + + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; + + if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && + age >= ifa->ifa_valid_lft) { + struct in_ifaddr **ifap ; + + rtnl_lock(); + for (ifap = &ifa->ifa_dev->ifa_list; + *ifap != NULL; ifap = &ifa->ifa_next) { + if (*ifap == ifa) + inet_del_ifa(ifa->ifa_dev, + ifap, 1); + } + rtnl_unlock(); + } else if (ifa->ifa_preferred_lft == + INFINITY_LIFE_TIME) { + continue; + } else if (age >= ifa->ifa_preferred_lft) { + if (time_before(ifa->ifa_tstamp + + ifa->ifa_valid_lft * HZ, next)) + next = ifa->ifa_tstamp + + ifa->ifa_valid_lft * HZ; + + if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) { + ifa->ifa_flags |= IFA_F_DEPRECATED; + rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); + } + } else if (time_before(ifa->ifa_tstamp + + ifa->ifa_preferred_lft * HZ, + next)) { + next = ifa->ifa_tstamp + + ifa->ifa_preferred_lft * HZ; + } + } + } + rcu_read_unlock(); + + next_sec = round_jiffies_up(next); + next_sched = next; + + /* If rounded timeout is accurate enough, accept it. */ + if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ)) + next_sched = next_sec; + + now = jiffies; + /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */ + if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX)) + next_sched = now + ADDRCONF_TIMER_FUZZ_MAX; + + schedule_delayed_work(&check_lifetime_work, next_sched - now); +} + +static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, + __u32 prefered_lft) +{ + unsigned long timeout; + + ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED); + + timeout = addrconf_timeout_fixup(valid_lft, HZ); + if (addrconf_finite_timeout(timeout)) + ifa->ifa_valid_lft = timeout; + else + ifa->ifa_flags |= IFA_F_PERMANENT; + + timeout = addrconf_timeout_fixup(prefered_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + if (timeout == 0) + ifa->ifa_flags |= IFA_F_DEPRECATED; + ifa->ifa_preferred_lft = timeout; + } + ifa->ifa_tstamp = jiffies; + if (!ifa->ifa_cstamp) + ifa->ifa_cstamp = ifa->ifa_tstamp; +} + +static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, + __u32 *pvalid_lft, __u32 *pprefered_lft) { struct nlattr *tb[IFA_MAX+1]; struct in_ifaddr *ifa; @@ -633,24 +742,73 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); + if (tb[IFA_CACHEINFO]) { + struct ifa_cacheinfo *ci; + + ci = nla_data(tb[IFA_CACHEINFO]); + if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { + err = -EINVAL; + goto errout; + } + *pvalid_lft = ci->ifa_valid; + *pprefered_lft = ci->ifa_prefered; + } + return ifa; errout: return ERR_PTR(err); } +static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) +{ + struct in_device *in_dev = ifa->ifa_dev; + struct in_ifaddr *ifa1, **ifap; + + if (!ifa->ifa_local) + return NULL; + + for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; + ifap = &ifa1->ifa_next) { + if (ifa1->ifa_mask == ifa->ifa_mask && + inet_ifa_match(ifa1->ifa_address, ifa) && + ifa1->ifa_local == ifa->ifa_local) + return ifa1; + } + return NULL; +} + static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct in_ifaddr *ifa; + struct in_ifaddr *ifa_existing; + __u32 valid_lft = INFINITY_LIFE_TIME; + __u32 prefered_lft = INFINITY_LIFE_TIME; ASSERT_RTNL(); - ifa = rtm_to_ifaddr(net, nlh); + ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft); if (IS_ERR(ifa)) return PTR_ERR(ifa); - return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); + ifa_existing = find_matching_ifa(ifa); + if (!ifa_existing) { + /* It would be best to check for !NLM_F_CREATE here but + * userspace alreay relies on not having to provide this. + */ + set_ifa_lifetime(ifa, valid_lft, prefered_lft); + return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); + } else { + inet_free_ifa(ifa); + + if (nlh->nlmsg_flags & NLM_F_EXCL || + !(nlh->nlmsg_flags & NLM_F_REPLACE)) + return -EEXIST; + + set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft); + } + return 0; } /* @@ -852,6 +1010,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) ifa->ifa_prefixlen = 32; ifa->ifa_mask = inet_make_mask(32); } + set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); ret = inet_set_ifa(dev, ifa); break; @@ -1190,6 +1349,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, ifa->ifa_dev = in_dev; ifa->ifa_scope = RT_SCOPE_HOST; memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); + set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, + INFINITY_LIFE_TIME); inet_insert_ifa(ifa); } } @@ -1246,11 +1407,30 @@ static size_t inet_nlmsg_size(void) + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ } +static inline u32 cstamp_delta(unsigned long cstamp) +{ + return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; +} + +static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, + unsigned long tstamp, u32 preferred, u32 valid) +{ + struct ifa_cacheinfo ci; + + ci.cstamp = cstamp_delta(cstamp); + ci.tstamp = cstamp_delta(tstamp); + ci.ifa_prefered = preferred; + ci.ifa_valid = valid; + + return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); +} + static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, u32 portid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; + u32 preferred, valid; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) @@ -1259,10 +1439,31 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, ifm = nlmsg_data(nlh); ifm->ifa_family = AF_INET; ifm->ifa_prefixlen = ifa->ifa_prefixlen; - ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; + ifm->ifa_flags = ifa->ifa_flags; ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; + if (!(ifm->ifa_flags & IFA_F_PERMANENT)) { + preferred = ifa->ifa_preferred_lft; + valid = ifa->ifa_valid_lft; + if (preferred != INFINITY_LIFE_TIME) { + long tval = (jiffies - ifa->ifa_tstamp) / HZ; + + if (preferred > tval) + preferred -= tval; + else + preferred = 0; + if (valid != INFINITY_LIFE_TIME) { + if (valid > tval) + valid -= tval; + else + valid = 0; + } + } + } else { + preferred = INFINITY_LIFE_TIME; + valid = INFINITY_LIFE_TIME; + } if ((ifa->ifa_address && nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) || (ifa->ifa_local && @@ -1270,7 +1471,9 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, (ifa->ifa_broadcast && nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || (ifa->ifa_label[0] && - nla_put_string(skb, IFA_LABEL, ifa->ifa_label))) + nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || + put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, + preferred, valid)) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -1988,6 +2191,8 @@ void __init devinet_init(void) register_gifconf(PF_INET, inet_gifconf); register_netdevice_notifier(&ip_netdev_notifier); + schedule_delayed_work(&check_lifetime_work, 0); + rtnl_af_register(&inet_af_ops); rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); |