From 6cb6a27c45cec9184302c2e350b3593c64bc7f6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sat, 2 Apr 2011 22:48:47 -0700 Subject: net: Call netdev_features_change() from netdev_update_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue FEAT_CHANGE notification when features are changed by netdev_update_features(). This will allow changes made by extra constraints on e.g. MTU change to be properly propagated like changes via ethtool. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3da9fb06d47a..02f56376fe99 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5236,7 +5236,7 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) } EXPORT_SYMBOL(netdev_fix_features); -void netdev_update_features(struct net_device *dev) +int __netdev_update_features(struct net_device *dev) { u32 features; int err = 0; @@ -5250,7 +5250,7 @@ void netdev_update_features(struct net_device *dev) features = netdev_fix_features(dev, features); if (dev->features == features) - return; + return 0; netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n", dev->features, features); @@ -5258,12 +5258,23 @@ void netdev_update_features(struct net_device *dev) if (dev->netdev_ops->ndo_set_features) err = dev->netdev_ops->ndo_set_features(dev, features); - if (!err) - dev->features = features; - else if (err < 0) + if (unlikely(err < 0)) { netdev_err(dev, "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", err, features, dev->features); + return -1; + } + + if (!err) + dev->features = features; + + return 1; +} + +void netdev_update_features(struct net_device *dev) +{ + if (__netdev_update_features(dev)) + netdev_features_change(dev); } EXPORT_SYMBOL(netdev_update_features); @@ -5430,7 +5441,7 @@ int register_netdevice(struct net_device *dev) goto err_uninit; dev->reg_state = NETREG_REGISTERED; - netdev_update_features(dev); + __netdev_update_features(dev); /* * Default initial state at registry is that the -- cgit v1.2.3 From c6e1a0d12ca7b4f22c58e55a16beacfb7d3d8462 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 4 Apr 2011 22:30:30 -0700 Subject: net: Allow no-cache copy from user on transmit This patch uses __copy_from_user_nocache on transmit to bypass data cache for a performance improvement. skb_add_data_nocache and skb_copy_to_page_nocache can be called by sendmsg functions to use this feature, initial support is in tcp_sendmsg. This functionality is configurable per device using ethtool. Presumably, this feature would only be useful when the driver does not touch the data. The feature is turned on by default if a device indicates that it does some form of checksum offload; it is off by default for devices that do no checksum offload or indicate no checksum is necessary. For the former case copy-checksum is probably done anyway, in the latter case the device is likely loopback in which case the no cache copy is probably not beneficial. This patch was tested using 200 instances of netperf TCP_RR with 1400 byte request and one byte reply. Platform is 16 core AMD x86. No-cache copy disabled: 672703 tps, 97.13% utilization 50/90/99% latency:244.31 484.205 1028.41 No-cache copy enabled: 702113 tps, 96.16% utilization, 50/90/99% latency 238.56 467.56 956.955 Using 14000 byte request and response sizes demonstrate the effects more dramatically: No-cache copy disabled: 79571 tps, 34.34 %utlization 50/90/95% latency 1584.46 2319.59 5001.76 No-cache copy enabled: 83856 tps, 34.81% utilization 50/90/95% latency 2508.42 2622.62 2735.88 Note especially the effect on latency tail (95th percentile). This seems to provide a nice performance improvement and is consistent in the tests I ran. Presumably, this would provide the greatest benfits in the presence of an application workload stressing the cache and a lot of transmit data happening. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 02f56376fe99..5d0b4f6f1a72 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5425,6 +5425,14 @@ int register_netdevice(struct net_device *dev) dev->features &= ~NETIF_F_GSO; } + /* Turn on no cache copy if HW is doing checksum */ + dev->hw_features |= NETIF_F_NOCACHE_COPY; + if ((dev->features & NETIF_F_ALL_CSUM) && + !(dev->features & NETIF_F_NO_CSUM)) { + dev->wanted_features |= NETIF_F_NOCACHE_COPY; + dev->features |= NETIF_F_NOCACHE_COPY; + } + /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features * are enabled only if supported by underlying device. @@ -6182,6 +6190,10 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask) } } + /* If device can't no cache copy, don't do for all */ + if (!(one & NETIF_F_NOCACHE_COPY)) + all &= ~NETIF_F_NOCACHE_COPY; + one |= NETIF_F_ALL_CSUM; one |= all & NETIF_F_ONE_FOR_ALL; -- cgit v1.2.3 From bcc6d47903612c3861201cc3a866fb604f26b8b2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Apr 2011 19:48:33 +0000 Subject: net: vlan: make non-hw-accel rx path similar to hw-accel Now there are 2 paths for rx vlan frames. When rx-vlan-hw-accel is enabled, skb is untagged by NIC, vlan_tci is set and the skb gets into vlan code in __netif_receive_skb - vlan_hwaccel_do_receive. For non-rx-vlan-hw-accel however, tagged skb goes thru whole __netif_receive_skb, it's untagged in ptype_base hander and reinjected This incosistency is fixed by this patch. Vlan untagging happens early in __netif_receive_skb so the rest of code (ptype_all handlers, rx_handlers) see the skb like it was untagged by hw. Signed-off-by: Jiri Pirko v1->v2: remove "inline" from vlan_core.c functions Signed-off-by: David S. Miller --- net/core/dev.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 95897ff3a76f..d1aebf7c6494 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3130,6 +3130,12 @@ another_round: __this_cpu_inc(softnet_data.processed); + if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { + skb = vlan_untag(skb); + if (unlikely(!skb)) + goto out; + } + #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); @@ -3177,7 +3183,7 @@ ncls: ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } - if (vlan_hwaccel_do_receive(&skb)) { + if (vlan_do_receive(&skb)) { ret = __netif_receive_skb(skb); goto out; } else if (unlikely(!skb)) -- cgit v1.2.3 From 872674858fe236b746317741013c830bb70775c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 12 Apr 2011 09:56:38 +0000 Subject: net: add RTNL_ASSERT in __netdev_update_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index d1aebf7c6494..f523eee3141c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5247,6 +5247,8 @@ int __netdev_update_features(struct net_device *dev) u32 features; int err = 0; + ASSERT_RTNL(); + features = netdev_get_wanted_features(dev); if (dev->netdev_ops->ndo_fix_features) -- cgit v1.2.3 From ea2d36883ca8e6caab23b6d15bfa80b1d1d81d2f Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 12 Apr 2011 14:38:37 +0000 Subject: net: Disable all TSO features when SG is disabled The feature flags NETIF_F_TSO and NETIF_F_TSO6 independently enable TSO for IPv4 and IPv6 respectively. However, the test in netdev_fix_features() and its predecessor functions was never updated to check for NETIF_F_TSO6, possibly because it was originally proposed that TSO for IPv6 would be dependent on both feature flags. Now that these feature flags can be changed independently from user-space and we depend on netdev_fix_features() to fix invalid feature combinations, it's important to disable them both if scatter-gather is disabled. Also disable NETIF_F_TSO_ECN so user-space sees all TSO features as disabled. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 956d3b006e8b..6401fb588145 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5203,9 +5203,9 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) } /* TSO requires that SG is present as well. */ - if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { - netdev_info(dev, "Dropping NETIF_F_TSO since no SG feature.\n"); - features &= ~NETIF_F_TSO; + if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { + netdev_info(dev, "Dropping TSO features since no SG feature.\n"); + features &= ~NETIF_F_ALL_TSO; } /* Software GSO depends on SG. */ -- cgit v1.2.3 From 31d8b9e099e59f880aa65095951559896d4e20fa Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 12 Apr 2011 14:47:15 +0000 Subject: net: Disable NETIF_F_TSO_ECN when TSO is disabled NETIF_F_TSO_ECN has no effect when TSO is disabled; this just means that feature state will be accurately reported to user-space. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 6401fb588145..c2ac599fa0f6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5208,6 +5208,10 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) features &= ~NETIF_F_ALL_TSO; } + /* TSO ECN requires that TSO is present as well. */ + if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) + features &= ~NETIF_F_TSO_ECN; + /* Software GSO depends on SG. */ if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); -- cgit v1.2.3 From b71d1d426d263b0b6cb5760322efebbfc89d4463 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Apr 2011 04:53:02 +0000 Subject: inet: constify ip headers and in6_addr Add const qualifiers to structs iphdr, ipv6hdr and in6_addr pointers where possible, to make code intention more obvious. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3871bf69a386..379c993ff421 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2502,8 +2502,8 @@ static inline void ____napi_schedule(struct softnet_data *sd, __u32 __skb_get_rxhash(struct sk_buff *skb) { int nhoff, hash = 0, poff; - struct ipv6hdr *ip6; - struct iphdr *ip; + const struct ipv6hdr *ip6; + const struct iphdr *ip; u8 ip_proto; u32 addr1, addr2, ihl; union { @@ -2518,7 +2518,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) goto done; - ip = (struct iphdr *) (skb->data + nhoff); + ip = (const struct iphdr *) (skb->data + nhoff); if (ip->frag_off & htons(IP_MF | IP_OFFSET)) ip_proto = 0; else @@ -2531,7 +2531,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) goto done; - ip6 = (struct ipv6hdr *) (skb->data + nhoff); + ip6 = (const struct ipv6hdr *) (skb->data + nhoff); ip_proto = ip6->nexthdr; addr1 = (__force u32) ip6->saddr.s6_addr32[3]; addr2 = (__force u32) ip6->daddr.s6_addr32[3]; -- cgit v1.2.3 From 22d5969fb450afd3a4aff606360f7d52c5a3a628 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Thu, 21 Apr 2011 12:42:15 +0000 Subject: net: make WARN_ON in dev_disable_lro() useful MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 379c993ff421..541f22a035a2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1315,7 +1315,8 @@ void dev_disable_lro(struct net_device *dev) return; __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO); - WARN_ON(dev->features & NETIF_F_LRO); + if (unlikely(dev->features & NETIF_F_LRO)) + netdev_WARN(dev, "failed to disable LRO!\n"); } EXPORT_SYMBOL(dev_disable_lro); -- cgit v1.2.3 From 3aba891dde3842d89ad022237b99c1ed308040b0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 19 Apr 2011 03:48:16 +0000 Subject: bonding: move processing of recv handlers into handle_frame() Since now when bonding uses rx_handler, all traffic going into bond device goes thru bond_handle_frame. So there's no need to go back into bonding code later via ptype handlers. This patch converts original ptype handlers into "bonding receive probes". These functions are called from bond_handle_frame and they are registered per-mode. Note that vlan packets are also handled because they are always untagged thanks to vlan_untag() Note that this also allows arpmon for eth-bond-bridge-vlan topology. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 541f22a035a2..3bbb4c2ce92e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3077,25 +3077,6 @@ void netdev_rx_handler_unregister(struct net_device *dev) } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); -static void vlan_on_bond_hook(struct sk_buff *skb) -{ - /* - * Make sure ARP frames received on VLAN interfaces stacked on - * bonding interfaces still make their way to any base bonding - * device that may have registered for a specific ptype. - */ - if (skb->dev->priv_flags & IFF_802_1Q_VLAN && - vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING && - skb->protocol == htons(ETH_P_ARP)) { - struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); - - if (!skb2) - return; - skb2->dev = vlan_dev_real_dev(skb->dev); - netif_rx(skb2); - } -} - static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; @@ -3191,8 +3172,6 @@ ncls: goto out; } - vlan_on_bond_hook(skb); - /* deliver only exact match when indicated */ null_or_dev = deliver_exact ? skb->dev : NULL; -- cgit v1.2.3 From 1742f183fc218798dab6fcf0ded25b6608fc0a48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 22 Apr 2011 06:31:16 +0000 Subject: net: fix netdev_increment_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify and fix netdev_increment_features() to conform to what is stated in netdevice.h comments about NETIF_F_ONE_FOR_ALL. Include FCoE segmentation and VLAN-challedged flags in computation. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 35 +++++++++++------------------------ 1 file changed, 11 insertions(+), 24 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3bbb4c2ce92e..7db99b52679f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6164,33 +6164,20 @@ static int dev_cpu_callback(struct notifier_block *nfb, */ u32 netdev_increment_features(u32 all, u32 one, u32 mask) { - /* If device needs checksumming, downgrade to it. */ - if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) - all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM); - else if (mask & NETIF_F_ALL_CSUM) { - /* If one device supports v4/v6 checksumming, set for all. */ - if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) && - !(all & NETIF_F_GEN_CSUM)) { - all &= ~NETIF_F_ALL_CSUM; - all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); - } - - /* If one device supports hw checksumming, set for all. */ - if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) { - all &= ~NETIF_F_ALL_CSUM; - all |= NETIF_F_HW_CSUM; - } - } + if (mask & NETIF_F_GEN_CSUM) + mask |= NETIF_F_ALL_CSUM; + mask |= NETIF_F_VLAN_CHALLENGED; - /* If device can't no cache copy, don't do for all */ - if (!(one & NETIF_F_NOCACHE_COPY)) - all &= ~NETIF_F_NOCACHE_COPY; + all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; + all &= one | ~NETIF_F_ALL_FOR_ALL; - one |= NETIF_F_ALL_CSUM; + /* If device needs checksumming, downgrade to it. */ + if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM)) + all &= ~NETIF_F_NO_CSUM; - one |= all & NETIF_F_ONE_FOR_ALL; - all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO; - all |= one & mask & NETIF_F_ONE_FOR_ALL; + /* If one device supports hw checksumming, set for all. */ + if (all & NETIF_F_GEN_CSUM) + all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); return all; } -- cgit v1.2.3 From 8ae6daca85c8bbd6a32c382db5e2a2a989f8bed2 Mon Sep 17 00:00:00 2001 From: David Decotigny Date: Wed, 27 Apr 2011 18:32:38 +0000 Subject: ethtool: Call ethtool's get/set_settings callbacks with cleaned data This makes sure that when a driver calls the ethtool's get/set_settings() callback of another driver, the data passed to it is clean. This guarantees that speed_hi will be zeroed correctly if the called callback doesn't explicitely set it: we are sure we don't get a corrupted speed from the underlying driver. We also take care of setting the cmd field appropriately (ETHTOOL_GSET/SSET). This applies to dev_ethtool_get_settings(), which now makes sure it sets up that ethtool command parameter correctly before passing it to drivers. This also means that whoever calls dev_ethtool_get_settings() does not have to clean the ethtool command parameter. This function also becomes an exported symbol instead of an inline. All drivers visible to make allyesconfig under x86_64 have been updated. Signed-off-by: David Decotigny Signed-off-by: David S. Miller --- net/core/dev.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 7db99b52679f..e95dc30110eb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4495,6 +4495,30 @@ void dev_set_rx_mode(struct net_device *dev) netif_addr_unlock_bh(dev); } +/** + * dev_ethtool_get_settings - call device's ethtool_ops::get_settings() + * @dev: device + * @cmd: memory area for ethtool_ops::get_settings() result + * + * The cmd arg is initialized properly (cleared and + * ethtool_cmd::cmd field set to ETHTOOL_GSET). + * + * Return device's ethtool_ops::get_settings() result value or + * -EOPNOTSUPP when device doesn't expose + * ethtool_ops::get_settings() operation. + */ +int dev_ethtool_get_settings(struct net_device *dev, + struct ethtool_cmd *cmd) +{ + if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) + return -EOPNOTSUPP; + + memset(cmd, 0, sizeof(struct ethtool_cmd)); + cmd->cmd = ETHTOOL_GSET; + return dev->ethtool_ops->get_settings(dev, cmd); +} +EXPORT_SYMBOL(dev_ethtool_get_settings); + /** * dev_get_flags - get flags reported to userspace * @dev: device -- cgit v1.2.3 From 41c31f318a5209922d051e293c61e4724daad11c Mon Sep 17 00:00:00 2001 From: Lifeng Sun Date: Wed, 27 Apr 2011 22:04:51 +0000 Subject: networking: inappropriate ioctl operation should return ENOTTY ioctl() calls against a socket with an inappropriate ioctl operation are incorrectly returning EINVAL rather than ENOTTY: [ENOTTY] Inappropriate I/O control operation. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=33992 Signed-off-by: Lifeng Sun Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index c2ac599fa0f6..856b6ee9a1d5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4773,7 +4773,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm * is never reached */ WARN_ON(1); - err = -EINVAL; + err = -ENOTTY; break; } @@ -5041,7 +5041,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) /* Set the per device memory buffer space. * Not applicable in our case */ case SIOCSIFLINK: - return -EINVAL; + return -ENOTTY; /* * Unknown or private ioctl. @@ -5062,7 +5062,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) /* Take care of Wireless Extensions */ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) return wext_handle_ioctl(net, &ifr, cmd, arg); - return -EINVAL; + return -ENOTTY; } } -- cgit v1.2.3 From 1c5cae815d19ffe02bdfda1260949ef2b1806171 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 30 Apr 2011 01:21:32 +0000 Subject: net: call dev_alloc_name from register_netdevice Force dev_alloc_name() to be called from register_netdevice() by dev_get_valid_name(). That allows to remove multiple explicit dev_alloc_name() calls. The possibility to call dev_alloc_name in advance remains. This also fixes veth creation regresion caused by 84c49d8c3e4abefb0a41a77b25aa37ebe8d6b743 Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index e95dc30110eb..3b79bad3d02d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -948,7 +948,7 @@ int dev_alloc_name(struct net_device *dev, const char *name) } EXPORT_SYMBOL(dev_alloc_name); -static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt) +static int dev_get_valid_name(struct net_device *dev, const char *name) { struct net *net; @@ -958,7 +958,7 @@ static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt if (!dev_valid_name(name)) return -EINVAL; - if (fmt && strchr(name, '%')) + if (strchr(name, '%')) return dev_alloc_name(dev, name); else if (__dev_get_by_name(net, name)) return -EEXIST; @@ -995,7 +995,7 @@ int dev_change_name(struct net_device *dev, const char *newname) memcpy(oldname, dev->name, IFNAMSIZ); - err = dev_get_valid_name(dev, newname, 1); + err = dev_get_valid_name(dev, newname); if (err < 0) return err; @@ -5420,8 +5420,8 @@ int register_netdevice(struct net_device *dev) } } - ret = dev_get_valid_name(dev, dev->name, 0); - if (ret) + ret = dev_get_valid_name(dev, dev->name); + if (ret < 0) goto err_uninit; dev->ifindex = dev_new_index(net); @@ -5562,19 +5562,7 @@ int register_netdev(struct net_device *dev) int err; rtnl_lock(); - - /* - * If the name is a format string the caller wants us to do a - * name allocation. - */ - if (strchr(dev->name, '%')) { - err = dev_alloc_name(dev, dev->name); - if (err < 0) - goto out; - } - err = register_netdevice(dev); -out: rtnl_unlock(); return err; } @@ -6056,7 +6044,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* We get here if we can't use the current device name */ if (!pat) goto out; - if (dev_get_valid_name(dev, pat, 1)) + if (dev_get_valid_name(dev, pat) < 0) goto out; } -- cgit v1.2.3 From e14a599335427f81bbb0008963e59aa9c6449dce Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 May 2011 12:26:06 -0700 Subject: net: dev_close() should check IFF_UP Commit 443457242beb (factorize sync-rcu call in unregister_netdevice_many) mistakenly removed one test from dev_close() Following actions trigger a BUG : modprobe bonding modprobe dummy ifconfig bond0 up ifenslave bond0 dummy0 rmmod dummy dev_close() must not close a non IFF_UP device. With help from Frank Blaschka and Einar EL Lueck Reported-by: Frank Blaschka Reported-by: Einar EL Lueck Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 856b6ee9a1d5..92009440d28b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1284,11 +1284,13 @@ static int dev_close_many(struct list_head *head) */ int dev_close(struct net_device *dev) { - LIST_HEAD(single); + if (dev->flags & IFF_UP) { + LIST_HEAD(single); - list_add(&dev->unreg_list, &single); - dev_close_many(&single); - list_del(&single); + list_add(&dev->unreg_list, &single); + dev_close_many(&single); + list_del(&single); + } return 0; } EXPORT_SYMBOL(dev_close); -- cgit v1.2.3 From afe12cc86b0ba545a01ad8716539ab07ab6e9e89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sat, 7 May 2011 03:22:17 +0000 Subject: net: introduce netdev_change_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It will be needed by bonding and other drivers changing vlan_features after ndo_init callback. As a bonus, this includes kernel-doc for netdev_update_features(). Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 75898a32c038..ea23353e6251 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5289,6 +5289,14 @@ int __netdev_update_features(struct net_device *dev) return 1; } +/** + * netdev_update_features - recalculate device features + * @dev: the device to check + * + * Recalculate dev->features set and send notifications if it + * has changed. Should be called after driver or hardware dependent + * conditions might have changed that influence the features. + */ void netdev_update_features(struct net_device *dev) { if (__netdev_update_features(dev)) @@ -5296,6 +5304,23 @@ void netdev_update_features(struct net_device *dev) } EXPORT_SYMBOL(netdev_update_features); +/** + * netdev_change_features - recalculate device features + * @dev: the device to check + * + * Recalculate dev->features set and send notifications even + * if they have not changed. Should be called instead of + * netdev_update_features() if also dev->vlan_features might + * have changed to allow the changes to be propagated to stacked + * VLAN devices. + */ +void netdev_change_features(struct net_device *dev) +{ + __netdev_update_features(dev); + netdev_features_change(dev); +} +EXPORT_SYMBOL(netdev_change_features); + /** * netif_stacked_transfer_operstate - transfer operstate * @rootdev: the root or lower level device to transfer state from -- cgit v1.2.3 From 0696c3a8acd3b7c3186dd231d65d97e05a75189f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Pan=28=E6=BD=98=E5=8D=AB=E5=B9=B3=29?= Date: Thu, 12 May 2011 15:46:56 +0000 Subject: net:set valid name before calling ndo_init() In commit 1c5cae815d19 (net: call dev_alloc_name from register_netdevice), a bug of bonding was involved, see example 1 and 2. In register_netdevice(), the name of net_device is not valid until dev_get_valid_name() is called. But dev->netdev_ops->ndo_init(that is bond_init) is called before dev_get_valid_name(), and it uses the invalid name of net_device. I think register_netdevice() should make sure that the name of net_device is valid before calling ndo_init(). example 1: modprobe bonding ls /proc/net/bonding/bond%d ps -eLf root 3398 2 3398 0 1 21:34 ? 00:00:00 [bond%d] example 2: modprobe bonding max_bonds=3 [ 170.100292] bonding: Ethernet Channel Bonding Driver: v3.7.1 (April 27, 2011) [ 170.101090] bonding: Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details. [ 170.102469] ------------[ cut here ]------------ [ 170.103150] WARNING: at /home/pwp/net-next-2.6/fs/proc/generic.c:586 proc_register+0x126/0x157() [ 170.104075] Hardware name: VirtualBox [ 170.105065] proc_dir_entry 'bonding/bond%d' already registered [ 170.105613] Modules linked in: bonding(+) sunrpc ipv6 uinput microcode ppdev parport_pc parport joydev e1000 pcspkr i2c_piix4 i2c_core [last unloaded: bonding] [ 170.108397] Pid: 3457, comm: modprobe Not tainted 2.6.39-rc2+ #14 [ 170.108935] Call Trace: [ 170.109382] [] warn_slowpath_common+0x6a/0x7f [ 170.109911] [] ? proc_register+0x126/0x157 [ 170.110329] [] warn_slowpath_fmt+0x2b/0x2f [ 170.110846] [] proc_register+0x126/0x157 [ 170.111870] [] proc_create_data+0x82/0x98 [ 170.112335] [] bond_create_proc_entry+0x3f/0x73 [bonding] [ 170.112905] [] bond_init+0x77/0xa5 [bonding] [ 170.113319] [] register_netdevice+0x8c/0x1d3 [ 170.113848] [] bond_create+0x6c/0x90 [bonding] [ 170.114322] [] bonding_init+0x763/0x7b1 [bonding] [ 170.114879] [] do_one_initcall+0x76/0x122 [ 170.115317] [] ? 0xf94f3fff [ 170.115799] [] sys_init_module+0x1286/0x140d [ 170.116879] [] sysenter_do_call+0x12/0x28 [ 170.117404] ---[ end trace 64e4fac3ae5fff1a ]--- [ 170.117924] bond%d: Warning: failed to register to debugfs [ 170.128728] ------------[ cut here ]------------ [ 170.129360] WARNING: at /home/pwp/net-next-2.6/fs/proc/generic.c:586 proc_register+0x126/0x157() [ 170.130323] Hardware name: VirtualBox [ 170.130797] proc_dir_entry 'bonding/bond%d' already registered [ 170.131315] Modules linked in: bonding(+) sunrpc ipv6 uinput microcode ppdev parport_pc parport joydev e1000 pcspkr i2c_piix4 i2c_core [last unloaded: bonding] [ 170.133731] Pid: 3457, comm: modprobe Tainted: G W 2.6.39-rc2+ #14 [ 170.134308] Call Trace: [ 170.134743] [] warn_slowpath_common+0x6a/0x7f [ 170.135305] [] ? proc_register+0x126/0x157 [ 170.135820] [] warn_slowpath_fmt+0x2b/0x2f [ 170.137168] [] proc_register+0x126/0x157 [ 170.137700] [] proc_create_data+0x82/0x98 [ 170.138174] [] bond_create_proc_entry+0x3f/0x73 [bonding] [ 170.138745] [] bond_init+0x77/0xa5 [bonding] [ 170.139278] [] register_netdevice+0x8c/0x1d3 [ 170.139828] [] bond_create+0x6c/0x90 [bonding] [ 170.140361] [] bonding_init+0x763/0x7b1 [bonding] [ 170.140927] [] do_one_initcall+0x76/0x122 [ 170.141494] [] ? 0xf94f3fff [ 170.141975] [] sys_init_module+0x1286/0x140d [ 170.142463] [] sysenter_do_call+0x12/0x28 [ 170.142974] ---[ end trace 64e4fac3ae5fff1b ]--- [ 170.144949] bond%d: Warning: failed to register to debugfs Signed-off-by: Weiping Pan Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index ea23353e6251..3ed09f8ecbf8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5437,6 +5437,10 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; + ret = dev_get_valid_name(dev, dev->name); + if (ret < 0) + goto out; + /* Init, if this function is available */ if (dev->netdev_ops->ndo_init) { ret = dev->netdev_ops->ndo_init(dev); @@ -5447,10 +5451,6 @@ int register_netdevice(struct net_device *dev) } } - ret = dev_get_valid_name(dev, dev->name); - if (ret < 0) - goto err_uninit; - dev->ifindex = dev_new_index(net); if (dev->iflink == -1) dev->iflink = dev->ifindex; -- cgit v1.2.3 From 6f404e441d169afc90929ef5e451ec9779c1f11a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Mon, 16 May 2011 15:14:21 -0400 Subject: net: Change netdev_fix_features messages loglevel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those reduced to DEBUG can possibly be triggered by unprivileged processes and are nothing exceptional. Illegal checksum combinations can only be caused by driver bug, so promote those messages to WARN. Since GSO without SG will now only cause DEBUG message from netdev_fix_features(), remove the workaround from register_netdevice(). Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 92009440d28b..b624fe4d9bd7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5186,27 +5186,27 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) /* Fix illegal checksum combinations */ if ((features & NETIF_F_HW_CSUM) && (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - netdev_info(dev, "mixed HW and IP checksum settings.\n"); + netdev_warn(dev, "mixed HW and IP checksum settings.\n"); features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); } if ((features & NETIF_F_NO_CSUM) && (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - netdev_info(dev, "mixed no checksumming and other settings.\n"); + netdev_warn(dev, "mixed no checksumming and other settings.\n"); features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); } /* Fix illegal SG+CSUM combinations. */ if ((features & NETIF_F_SG) && !(features & NETIF_F_ALL_CSUM)) { - netdev_info(dev, - "Dropping NETIF_F_SG since no checksum feature.\n"); + netdev_dbg(dev, + "Dropping NETIF_F_SG since no checksum feature.\n"); features &= ~NETIF_F_SG; } /* TSO requires that SG is present as well. */ if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { - netdev_info(dev, "Dropping TSO features since no SG feature.\n"); + netdev_dbg(dev, "Dropping TSO features since no SG feature.\n"); features &= ~NETIF_F_ALL_TSO; } @@ -5216,7 +5216,7 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) /* Software GSO depends on SG. */ if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { - netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); + netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); features &= ~NETIF_F_GSO; } @@ -5226,13 +5226,13 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) if (!((features & NETIF_F_GEN_CSUM) || (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - netdev_info(dev, + netdev_dbg(dev, "Dropping NETIF_F_UFO since no checksum offload features.\n"); features &= ~NETIF_F_UFO; } if (!(features & NETIF_F_SG)) { - netdev_info(dev, + netdev_dbg(dev, "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); features &= ~NETIF_F_UFO; } @@ -5414,12 +5414,6 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_SOFT_FEATURES; dev->wanted_features = dev->features & dev->hw_features; - /* Avoid warning from netdev_fix_features() for GSO without SG */ - if (!(dev->wanted_features & NETIF_F_SG)) { - dev->wanted_features &= ~NETIF_F_GSO; - dev->features &= ~NETIF_F_GSO; - } - /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features * are enabled only if supported by underlying device. -- cgit v1.2.3 From 372b2312010bece1e36f577d6c99a6193ec54cbd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 May 2011 13:56:59 -0400 Subject: net: use hlist_del_rcu() in dev_change_name() Using plain hlist_del() in dev_change_name() is wrong since a concurrent reader can crash trying to dereference LIST_POISON1. Bug introduced in commit 72c9528bab94 (net: Introduce dev_get_by_name_rcu()) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index b624fe4d9bd7..30a4078b3fa2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1007,7 +1007,7 @@ rollback: } write_lock_bh(&dev_base_lock); - hlist_del(&dev->name_hlist); + hlist_del_rcu(&dev->name_hlist); write_unlock_bh(&dev_base_lock); synchronize_rcu(); -- cgit v1.2.3 From 604ae14ffb6d75d6eef4757859226b758d6bf9e3 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 16 May 2011 10:37:39 +0000 Subject: net: Change netdev_fix_features messages loglevel Cool, how about we make 'Features changed' debug as well? This way userspace can't fill up the log just by tweaking tun features with an ioctl. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 30a4078b3fa2..acd742379344 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5258,7 +5258,7 @@ void netdev_update_features(struct net_device *dev) if (dev->features == features) return; - netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n", + netdev_dbg(dev, "Features changed: 0x%08x -> 0x%08x\n", dev->features, features); if (dev->netdev_ops->ndo_set_features) -- cgit v1.2.3 From 449f4544267e73d5db372971da63634707c32299 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 May 2011 12:24:16 +0000 Subject: macvlan: remove one synchronize_rcu() call When one macvlan device is dismantled, we can avoid one synchronize_rcu() call done after deletion from hash list, since caller will perform a synchronize_net() call after its ndo_stop() call. Add a new netdev->dismantle field to signal this dismantle intent. Reduces RTNL hold time. Signed-off-by: Eric Dumazet CC: Patrick McHardy CC: Ben Greear Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 155de2094e71..d94537914a71 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5126,7 +5126,7 @@ static void rollback_registered_many(struct list_head *head) list_del(&dev->unreg_list); continue; } - + dev->dismantle = true; BUG_ON(dev->reg_state != NETREG_REGISTERED); } -- cgit v1.2.3 From 6df427fe8c481d3be437cbe8bd366bdac82b73c4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 May 2011 19:37:40 +0000 Subject: net: remove synchronize_net() from netdev_set_master() In the old days, we used to access dev->master in __netif_receive_skb() in a rcu_read_lock section. So one synchronize_net() call was needed in netdev_set_master() to make sure another cpu could not use old master while/after we release it. We now use netdev_rx_handler infrastructure and added one synchronize_net() call in bond_release()/bond_release_all() Remove the obsolete synchronize_net() from netdev_set_master() and add one in bridge del_nbp() after its netdev_rx_handler_unregister() call. This makes enslave -d a bit faster. Signed-off-by: Eric Dumazet CC: Jiri Pirko CC: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index d94537914a71..bcb05cb799c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4294,10 +4294,8 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) slave->master = master; - if (old) { - synchronize_net(); + if (old) dev_put(old); - } return 0; } EXPORT_SYMBOL(netdev_set_master); -- cgit v1.2.3 From be3fc413da9eb17cce0991f214ab019d16c88c41 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 May 2011 23:07:32 +0000 Subject: net: use synchronize_rcu_expedited() synchronize_rcu() is very slow in various situations (HZ=100, CONFIG_NO_HZ=y, CONFIG_PREEMPT=n) Extract from my (mostly idle) 8 core machine : synchronize_rcu() in 99985 us synchronize_rcu() in 79982 us synchronize_rcu() in 87612 us synchronize_rcu() in 79827 us synchronize_rcu() in 109860 us synchronize_rcu() in 98039 us synchronize_rcu() in 89841 us synchronize_rcu() in 79842 us synchronize_rcu() in 80151 us synchronize_rcu() in 119833 us synchronize_rcu() in 99858 us synchronize_rcu() in 73999 us synchronize_rcu() in 79855 us synchronize_rcu() in 79853 us When we hold RTNL mutex, we would like to spend some cpu cycles but not block too long other processes waiting for this mutex. We also want to setup/dismantle network features as fast as possible at boot/shutdown time. This patch makes synchronize_net() call the expedited version if RTNL is locked. synchronize_rcu_expedited() typical delay is about 20 us on my machine. synchronize_rcu_expedited() in 18 us synchronize_rcu_expedited() in 18 us synchronize_rcu_expedited() in 18 us synchronize_rcu_expedited() in 18 us synchronize_rcu_expedited() in 20 us synchronize_rcu_expedited() in 16 us synchronize_rcu_expedited() in 20 us synchronize_rcu_expedited() in 18 us synchronize_rcu_expedited() in 18 us Signed-off-by: Eric Dumazet CC: Paul E. McKenney CC: Ben Greear Reviewed-by: Paul E. McKenney Signed-off-by: David S. Miller --- net/core/dev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index bcb05cb799c1..ec11d757c1fc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5954,7 +5954,10 @@ EXPORT_SYMBOL(free_netdev); void synchronize_net(void) { might_sleep(); - synchronize_rcu(); + if (rtnl_is_locked()) + synchronize_rcu_expedited(); + else + synchronize_rcu(); } EXPORT_SYMBOL(synchronize_net); -- cgit v1.2.3 From f11970e383acd6f505f492f1bc07fb1a4d884829 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 24 May 2011 08:31:09 +0000 Subject: net: make dev_disable_lro use physical device if passed a vlan dev (v2) If the device passed into dev_disable_lro is a vlan, then repoint the dev poniter so that we actually modify the underlying physical device. Signed-of-by: Neil Horman CC: davem@davemloft.net CC: bhutchings@solarflare.com Signed-off-by: David S. Miller --- net/core/dev.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index ec11d757c1fc..c7e305d13b71 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1308,6 +1308,13 @@ void dev_disable_lro(struct net_device *dev) { u32 flags; + /* + * If we're trying to disable lro on a vlan device + * use the underlying physical device instead + */ + if (is_vlan_dev(dev)) + dev = vlan_dev_real_dev(dev); + if (dev->ethtool_ops && dev->ethtool_ops->get_flags) flags = dev->ethtool_ops->get_flags(dev); else -- cgit v1.2.3 From ec764bf083a6ff396234351b51fd236f53c903bf Mon Sep 17 00:00:00 2001 From: Koki Sanagi Date: Mon, 30 May 2011 21:48:34 +0000 Subject: net: tracepoint of net_dev_xmit sees freed skb and causes panic Because there is a possibility that skb is kfree_skb()ed and zero cleared after ndo_start_xmit, we should not see the contents of skb like skb->len and skb->dev->name after ndo_start_xmit. But trace_net_dev_xmit does that and causes panic by NULL pointer dereference. This patch fixes trace_net_dev_xmit not to see the contents of skb directly. If you want to reproduce this panic, 1. Get tracepoint of net_dev_xmit on 2. Create 2 guests on KVM 2. Make 2 guests use virtio_net 4. Execute netperf from one to another for a long time as a network burden 5. host will panic(It takes about 30 minutes) Signed-off-by: Koki Sanagi Signed-off-by: David S. Miller --- net/core/dev.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index c7e305d13b71..939307891e71 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2096,6 +2096,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, { const struct net_device_ops *ops = dev->netdev_ops; int rc = NETDEV_TX_OK; + unsigned int skb_len; if (likely(!skb->next)) { u32 features; @@ -2146,8 +2147,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, } } + skb_len = skb->len; rc = ops->ndo_start_xmit(skb, dev); - trace_net_dev_xmit(skb, rc); + trace_net_dev_xmit(skb, rc, dev, skb_len); if (rc == NETDEV_TX_OK) txq_trans_update(txq); return rc; @@ -2167,8 +2169,9 @@ gso: if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(nskb); + skb_len = nskb->len; rc = ops->ndo_start_xmit(nskb, dev); - trace_net_dev_xmit(nskb, rc); + trace_net_dev_xmit(nskb, rc, dev, skb_len); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) goto out_kfree_gso_skb; -- cgit v1.2.3 From 3019de124b9f5b1526cb3668b74af14371e21795 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 6 Jun 2011 16:41:33 -0700 Subject: net: Rework netdev_drivername() to avoid warning. This interface uses a temporary buffer, but for no real reason. And now can generate warnings like: net/sched/sch_generic.c: In function dev_watchdog net/sched/sch_generic.c:254:10: warning: unused variable drivername Just return driver->name directly or "". Reported-by: Connor Hansen Signed-off-by: David S. Miller --- net/core/dev.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 939307891e71..1af6cb27f67a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6264,29 +6264,23 @@ err_name: /** * netdev_drivername - network driver for the device * @dev: network device - * @buffer: buffer for resulting name - * @len: size of buffer * * Determine network driver for device. */ -char *netdev_drivername(const struct net_device *dev, char *buffer, int len) +const char *netdev_drivername(const struct net_device *dev) { const struct device_driver *driver; const struct device *parent; - - if (len <= 0 || !buffer) - return buffer; - buffer[0] = 0; + const char *empty = ""; parent = dev->dev.parent; - if (!parent) - return buffer; + return empty; driver = parent->driver; if (driver && driver->name) - strlcpy(buffer, driver->name, len); - return buffer; + return driver->name; + return empty; } static int __netdev_printk(const char *level, const struct net_device *dev, -- cgit v1.2.3 From 264524d5e5195f6e0f099bee20253a22b651e272 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 6 Jun 2011 20:50:03 +0000 Subject: net: cpu offline cause napi stall Frank Blaschka reported : During heavy network load we turn off/on cpus. Sometimes this causes a stall on the network device. Digging into the dump I found out following: napi is scheduled but does not run. From the I/O buffers and the napi state I see napi/rx_softirq processing has stopped because the budget was reached. napi stays in the softnet_data poll_list and the rx_softirq was raised again. I assume at this time the cpu offline comes in, the rx softirq is raised/moved to another cpu but napi stays in the poll_list of the softnet_data of the now offline cpu. Reviewing dev_cpu_callback (net/core/dev.c) I did not find the poll_list is transfered to the new cpu. This patch is a straightforward implementation of Frank suggestion : Transfert poll_list and trigger NET_RX_SOFTIRQ on new cpu. Reported-by: Frank Blaschka Signed-off-by: Heiko Carstens Signed-off-by: Eric Dumazet Tested-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 1af6cb27f67a..a54c9f87ddbb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6178,6 +6178,11 @@ static int dev_cpu_callback(struct notifier_block *nfb, oldsd->output_queue = NULL; oldsd->output_queue_tailp = &oldsd->output_queue; } + /* Append NAPI poll list from offline CPU. */ + if (!list_empty(&oldsd->poll_list)) { + list_splice_init(&oldsd->poll_list, &sd->poll_list); + raise_softirq_irqoff(NET_RX_SOFTIRQ); + } raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); -- cgit v1.2.3 From 0b5c9db1b11d3175bb42b80663a9f072f801edf5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 10 Jun 2011 06:56:58 +0000 Subject: vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check Testing of VLAN_FLAG_REORDER_HDR does not belong in vlan_untag but rather in vlan_do_receive. Otherwise the vlan header will not be properly put on the packet in the case of vlan header accelleration. As we remove the check from vlan_check_reorder_header rename it vlan_reorder_header to keep the naming clean. Fix up the skb->pkt_type early so we don't look at the packet after adding the vlan tag, which guarantees we don't goof and look at the wrong field. Use a simple if statement instead of a complicated switch statement to decided that we need to increment rx_stats for a multicast packet. Hopefully at somepoint we will just declare the case where VLAN_FLAG_REORDER_HDR is cleared as unsupported and remove the code. Until then this keeps it working correctly. Signed-off-by: Eric W. Biederman Signed-off-by: Jiri Pirko Acked-by: Changli Gao Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a54c9f87ddbb..9c58c1ec41a9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3114,7 +3114,7 @@ static int __netif_receive_skb(struct sk_buff *skb) skb_reset_network_header(skb); skb_reset_transport_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; + skb_reset_mac_len(skb); pt_prev = NULL; -- cgit v1.2.3