From efd7ef1c1929d7a0329d4349252863c04d6f1729 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 11 Mar 2015 23:04:08 -0500 Subject: net: Kill hold_net release_net hold_net and release_net were an idea that turned out to be useless. The code has been disabled since 2008. Kill the code it is long past due. Signed-off-by: "Eric W. Biederman" Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 962ee9d71964..39fe369b46ad 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6841,8 +6841,6 @@ void free_netdev(struct net_device *dev) { struct napi_struct *p, *n; - release_net(dev_net(dev)); - netif_free_tx_queues(dev); #ifdef CONFIG_SYSFS kvfree(dev->_rx); -- cgit v1.2.3 From db24a9044ee191c397dcd1c6574f56d67d7c8df5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 17 Mar 2015 20:23:15 -0600 Subject: net: add support for phys_port_name Similar to port id allow netdevices to specify port names and export the name via sysfs. Drivers can implement the netdevice operation to assist udev in having sane default names for the devices using the rule: $ cat /etc/udev/rules.d/80-net-setup-link.rules SUBSYSTEM=="net", ACTION=="add", ATTR{phys_port_name}!="", NAME="$attr{phys_port_name}" Use of phys_name versus phys_id was suggested-by Jiri Pirko. Signed-off-by: David Ahern Acked-by: Jiri Pirko Acked-by: Scott Feldman Signed-off-by: David S. Miller --- net/core/dev.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 39fe369b46ad..a1f24151db5b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5911,6 +5911,24 @@ int dev_get_phys_port_id(struct net_device *dev, } EXPORT_SYMBOL(dev_get_phys_port_id); +/** + * dev_get_phys_port_name - Get device physical port name + * @dev: device + * @name: port name + * + * Get device physical port name + */ +int dev_get_phys_port_name(struct net_device *dev, + char *name, size_t len) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_get_phys_port_name) + return -EOPNOTSUPP; + return ops->ndo_get_phys_port_name(dev, name, len); +} +EXPORT_SYMBOL(dev_get_phys_port_name); + /** * dev_new_index - allocate an ifindex * @net: the applicable net namespace -- cgit v1.2.3 From 99c4a26a159b28fa46a3e746a9b41b297e73d261 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Mar 2015 22:52:33 -0400 Subject: net: Fix high overhead of vlan sub-device teardown. When a networking device is taken down that has a non-trivial number of VLAN devices configured under it, we eat a full synchronize_net() for every such VLAN device. This is because of the call chain: NETDEV_DOWN notifier --> vlan_device_event() --> dev_change_flags() --> __dev_change_flags() --> __dev_close() --> __dev_close_many() --> dev_deactivate_many() --> synchronize_net() This is kind of rediculous because we already have infrastructure for batching doing operation X to a list of net devices so that we only incur one sync. So make use of that by exporting dev_close_many() and adjusting it's interfaace so that the caller can fully manage the batch list. Use this in vlan_device_event() and all the overhead goes away. Reported-by: Salam Noureddine Signed-off-by: David S. Miller --- net/core/dev.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a1f24151db5b..5d43e010ef87 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1385,7 +1385,7 @@ static int __dev_close(struct net_device *dev) return retval; } -static int dev_close_many(struct list_head *head) +int dev_close_many(struct list_head *head, bool unlink) { struct net_device *dev, *tmp; @@ -1399,11 +1399,13 @@ static int dev_close_many(struct list_head *head) list_for_each_entry_safe(dev, tmp, head, close_list) { rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL); call_netdevice_notifiers(NETDEV_DOWN, dev); - list_del_init(&dev->close_list); + if (unlink) + list_del_init(&dev->close_list); } return 0; } +EXPORT_SYMBOL(dev_close_many); /** * dev_close - shutdown an interface. @@ -1420,7 +1422,7 @@ int dev_close(struct net_device *dev) LIST_HEAD(single); list_add(&dev->close_list, &single); - dev_close_many(&single); + dev_close_many(&single, true); list_del(&single); } return 0; @@ -5986,7 +5988,7 @@ static void rollback_registered_many(struct list_head *head) /* If device is running, close it first. */ list_for_each_entry(dev, head, unreg_list) list_add_tail(&dev->close_list, &close_head); - dev_close_many(&close_head); + dev_close_many(&close_head, true); list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ -- cgit v1.2.3 From 08b4b8ea799d27c5dd28e8cb9188d2e88e58d294 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 20 Mar 2015 14:29:09 -0700 Subject: net: clear skb->priority when forwarding to another netns skb->priority can be set for two purposes: 1) With respect to IP TOS field, which is computed by a mask. Ususally used for priority qdisc's (pfifo, prio etc.), on TX side (we only have ingress qdisc on RX side). 2) Used as a classid or flowid, works in the same way with tc classid. What's more, this can even override the classid of tc filters. For case 1), it has been respected within its netns, I don't see any point of keeping it for another netns, especially when packets will be forwarded to Rx path (no matter from TX path or RX path). For case 2) we care, our applications run inside a netns, and we classify the packets by our own filters outside, If some application sets this priority, it could bypass our filters, therefore clear it when moving out of a netns, it makes no sense to bypass tc filters out of its netns. Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 5d43e010ef87..a0408d497dae 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1696,6 +1696,7 @@ int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) } skb_scrub_packet(skb, true); + skb->priority = 0; skb->protocol = eth_type_trans(skb, dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); -- cgit v1.2.3 From f5a7fb88e1f82542ca14ba93a1d4fa35471c60ca Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 27 Mar 2015 14:31:11 +0900 Subject: vlan: Introduce helper functions to check if skb is tagged Separate the two checks for single vlan and multiple vlans in netif_skb_features(). This allows us to move the check for multiple vlans to another function later. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- net/core/dev.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a0408d497dae..04bffcd4a48d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2567,7 +2567,6 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) struct net_device *dev = skb->dev; netdev_features_t features = dev->features; u16 gso_segs = skb_shinfo(skb)->gso_segs; - __be16 protocol = skb->protocol; if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs) features &= ~NETIF_F_GSO_MASK; @@ -2579,22 +2578,15 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) if (skb->encapsulation) features &= dev->hw_enc_features; - if (!skb_vlan_tag_present(skb)) { - if (unlikely(protocol == htons(ETH_P_8021Q) || - protocol == htons(ETH_P_8021AD))) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - protocol = veh->h_vlan_encapsulated_proto; - } else { - goto finalize; - } - } - - features = netdev_intersect_features(features, - dev->vlan_features | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); + if (skb_vlan_tagged(skb)) + features = netdev_intersect_features(features, + dev->vlan_features | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); + else + goto finalize; - if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) + if (skb_vlan_tagged_multi(skb)) features = netdev_intersect_features(features, NETIF_F_SG | NETIF_F_HIGHDMA | -- cgit v1.2.3 From 8cb65d00086bfba22bac87ff18b751432fc74003 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 27 Mar 2015 14:31:12 +0900 Subject: net: Move check for multiple vlans to drivers To allow drivers to handle the features check for multiple tags, move the check to ndo_features_check(). As no drivers currently handle multiple tagged TSO, introduce dflt_features_check() and call it if the driver does not have ndo_features_check(). Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- net/core/dev.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 04bffcd4a48d..cb46badbef5a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2562,6 +2562,13 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, return features; } +static netdev_features_t dflt_features_check(const struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + return vlan_features_check(skb, features); +} + netdev_features_t netif_skb_features(struct sk_buff *skb) { struct net_device *dev = skb->dev; @@ -2583,22 +2590,12 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); - else - goto finalize; - if (skb_vlan_tagged_multi(skb)) - features = netdev_intersect_features(features, - NETIF_F_SG | - NETIF_F_HIGHDMA | - NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); - -finalize: if (dev->netdev_ops->ndo_features_check) features &= dev->netdev_ops->ndo_features_check(skb, dev, features); + else + features &= dflt_features_check(skb, dev, features); return harmonize_features(skb, features); } -- cgit v1.2.3 From e38f30256b36700aa63aa709dc091bf6eb69c257 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 27 Mar 2015 14:31:13 +0900 Subject: net: Introduce passthru_features_check As there are a number of (especially virtual) devices that don't need the multiple vlan check, introduce passthru_features_check() for convenience. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index cb46badbef5a..3a06003ecafd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2562,6 +2562,14 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, return features; } +netdev_features_t passthru_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + return features; +} +EXPORT_SYMBOL(passthru_features_check); + static netdev_features_t dflt_features_check(const struct sk_buff *skb, struct net_device *dev, netdev_features_t features) -- cgit v1.2.3 From fbcb21705930f2930f506149d0b8d36dfbe45107 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 30 Mar 2015 16:56:01 +0200 Subject: net: rename dev to orig_dev in deliver_ptype_list_skb Unlike other places, this function uses name "dev" for what should be "orig_dev", which might be a bit confusing. So fix this. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3a06003ecafd..65492b0354c0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1740,7 +1740,8 @@ static inline int deliver_skb(struct sk_buff *skb, static inline void deliver_ptype_list_skb(struct sk_buff *skb, struct packet_type **pt, - struct net_device *dev, __be16 type, + struct net_device *orig_dev, + __be16 type, struct list_head *ptype_list) { struct packet_type *ptype, *pt_prev = *pt; @@ -1749,7 +1750,7 @@ static inline void deliver_ptype_list_skb(struct sk_buff *skb, if (ptype->type != type) continue; if (pt_prev) - deliver_skb(skb, pt_prev, dev); + deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } *pt = pt_prev; -- cgit v1.2.3 From a54acb3a6f853e8394c4cb7b6a4d93c88f13eefd Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 2 Apr 2015 17:07:00 +0200 Subject: dev: introduce dev_get_iflink() The goal of this patch is to prepare the removal of the iflink field. It introduces a new ndo function, which will be implemented by virtual interfaces. There is no functional change into this patch. All readers of iflink field now call dev_get_iflink(). Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/dev.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 65492b0354c0..77172d085760 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -659,6 +659,23 @@ __setup("netdev=", netdev_boot_setup); *******************************************************************************/ +/** + * dev_get_iflink - get 'iflink' value of a interface + * @dev: targeted interface + * + * Indicates the ifindex the interface is linked to. + * Physical interfaces have the same 'ifindex' and 'iflink' values. + */ + +int dev_get_iflink(const struct net_device *dev) +{ + if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) + return dev->netdev_ops->ndo_get_iflink(dev); + + return dev->iflink; +} +EXPORT_SYMBOL(dev_get_iflink); + /** * __dev_get_by_name - find a device by its name * @net: the applicable net namespace @@ -6345,7 +6362,7 @@ int register_netdevice(struct net_device *dev) else if (__dev_get_by_index(net, dev->ifindex)) goto err_uninit; - if (dev->iflink == -1) + if (dev_get_iflink(dev) == -1) dev->iflink = dev->ifindex; /* Transfer changeable features to wanted_features and enable @@ -7061,7 +7078,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* If there is an ifindex conflict assign a new one */ if (__dev_get_by_index(net, dev->ifindex)) { - int iflink = (dev->iflink == dev->ifindex); + int iflink = (dev_get_iflink(dev) == dev->ifindex); dev->ifindex = dev_new_index(net); if (iflink) dev->iflink = dev->ifindex; -- cgit v1.2.3 From 7a66bbc96ce9ad8261fa5f7f6ae65370eb6866ee Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 2 Apr 2015 17:07:09 +0200 Subject: net: remove iflink field from struct net_device Now that all users of iflink have the ndo_get_iflink handler available, it's possible to remove this field. By default, dev_get_iflink() returns the ifindex of the interface. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/dev.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 77172d085760..3be107e0bc93 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -672,7 +672,7 @@ int dev_get_iflink(const struct net_device *dev) if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) return dev->netdev_ops->ndo_get_iflink(dev); - return dev->iflink; + return dev->ifindex; } EXPORT_SYMBOL(dev_get_iflink); @@ -6331,8 +6331,6 @@ int register_netdevice(struct net_device *dev) spin_lock_init(&dev->addr_list_lock); netdev_set_addr_lockdep_class(dev); - dev->iflink = -1; - ret = dev_get_valid_name(net, dev, dev->name); if (ret < 0) goto out; @@ -6362,9 +6360,6 @@ int register_netdevice(struct net_device *dev) else if (__dev_get_by_index(net, dev->ifindex)) goto err_uninit; - if (dev_get_iflink(dev) == -1) - dev->iflink = dev->ifindex; - /* Transfer changeable features to wanted_features and enable * software offloads (GSO and GRO). */ @@ -7077,12 +7072,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char dev_net_set(dev, net); /* If there is an ifindex conflict assign a new one */ - if (__dev_get_by_index(net, dev->ifindex)) { - int iflink = (dev_get_iflink(dev) == dev->ifindex); + if (__dev_get_by_index(net, dev->ifindex)) dev->ifindex = dev_new_index(net); - if (iflink) - dev->iflink = dev->ifindex; - } /* Send a netdev-add uevent to the new namespace */ kobject_uevent(&dev->dev.kobj, KOBJ_ADD); -- cgit v1.2.3 From e1622baf54df8cc958bf29d71de5ad545ea7d93c Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 2 Apr 2015 17:07:10 +0200 Subject: dev: set iflink to 0 for virtual interfaces Virtual interfaces are supposed to set an iflink value != of their ifindex. It was not the case for some of them, like vxlan, bond or bridge. Let's set iflink to 0 when dev->rtnl_link_ops is set. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3be107e0bc93..26622d614f81 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -672,6 +672,10 @@ int dev_get_iflink(const struct net_device *dev) if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) return dev->netdev_ops->ndo_get_iflink(dev); + /* If dev->rtnl_link_ops is set, it's a virtual interface. */ + if (dev->rtnl_link_ops) + return 0; + return dev->ifindex; } EXPORT_SYMBOL(dev_get_iflink); -- cgit v1.2.3 From 7026b1ddb6b8d4e6ee33dc2bd06c0ca8746fa7ab Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 5 Apr 2015 22:19:04 -0400 Subject: netfilter: Pass socket pointer down through okfn(). On the output paths in particular, we have to sometimes deal with two socket contexts. First, and usually skb->sk, is the local socket that generated the frame. And second, is potentially the socket used to control a tunneling socket, such as one the encapsulates using UDP. We do not want to disassociate skb->sk when encapsulating in order to fix this, because that would break socket memory accounting. The most extreme case where this can cause huge problems is an AF_PACKET socket transmitting over a vxlan device. We hit code paths doing checks that assume they are dealing with an ipv4 socket, but are actually operating upon the AF_PACKET one. Signed-off-by: David S. Miller --- net/core/dev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3b3965288f52..b2775f06c710 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2879,7 +2879,7 @@ EXPORT_SYMBOL(xmit_recursion); * dev_loopback_xmit - loop back @skb * @skb: buffer to transmit */ -int dev_loopback_xmit(struct sk_buff *skb) +int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb) { skb_reset_mac_header(skb); __skb_pull(skb, skb_network_offset(skb)); @@ -3017,11 +3017,11 @@ out: return rc; } -int dev_queue_xmit(struct sk_buff *skb) +int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb) { return __dev_queue_xmit(skb, NULL); } -EXPORT_SYMBOL(dev_queue_xmit); +EXPORT_SYMBOL(dev_queue_xmit_sk); int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv) { @@ -3853,13 +3853,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb) * NET_RX_SUCCESS: no congestion * NET_RX_DROP: packet was dropped */ -int netif_receive_skb(struct sk_buff *skb) +int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb) { trace_netif_receive_skb_entry(skb); return netif_receive_skb_internal(skb); } -EXPORT_SYMBOL(netif_receive_skb); +EXPORT_SYMBOL(netif_receive_skb_sk); /* Network device is going away, flush any packets still pending * Called with irqs disabled. -- cgit v1.2.3 From 4577139b2dabf58973d59d157aae4ddd3bde863a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 10 Apr 2015 23:07:54 +0200 Subject: net: use jump label patching for ingress qdisc in __netif_receive_skb_core Even if we make use of classifier and actions from the egress path, we're going into handle_ing() executing additional code on a per-packet cost for ingress qdisc, just to realize that nothing is attached on ingress. Instead, this can just be blinded out as a no-op entirely with the use of a static key. On input fast-path, we already make use of static keys in various places, e.g. skb time stamping, in RPS, etc. It makes sense to not waste time when we're assured that no ingress qdisc is attached anywhere. Enabling/disabling of that code path is being done via two helpers, namely net_{inc,dec}_ingress_queue(), that are being invoked under RTNL mutex when a ingress qdisc is being either initialized or destructed. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/dev.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index b2775f06c710..af4a1b0adc10 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1630,6 +1630,22 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) } EXPORT_SYMBOL(call_netdevice_notifiers); +#ifdef CONFIG_NET_CLS_ACT +static struct static_key ingress_needed __read_mostly; + +void net_inc_ingress_queue(void) +{ + static_key_slow_inc(&ingress_needed); +} +EXPORT_SYMBOL_GPL(net_inc_ingress_queue); + +void net_dec_ingress_queue(void) +{ + static_key_slow_dec(&ingress_needed); +} +EXPORT_SYMBOL_GPL(net_dec_ingress_queue); +#endif + static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL /* We are not allowed to call static_key_slow_dec() from irq context @@ -3547,7 +3563,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc) - goto out; + return skb; if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); @@ -3561,8 +3577,6 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, return NULL; } -out: - skb->tc_verd = 0; return skb; } #endif @@ -3698,12 +3712,15 @@ another_round: skip_taps: #ifdef CONFIG_NET_CLS_ACT - skb = handle_ing(skb, &pt_prev, &ret, orig_dev); - if (!skb) - goto unlock; + if (static_key_false(&ingress_needed)) { + skb = handle_ing(skb, &pt_prev, &ret, orig_dev); + if (!skb) + goto unlock; + } + + skb->tc_verd = 0; ncls: #endif - if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) goto drop; -- cgit v1.2.3 From 8b86a61da37cbbcf4bd6e87fda494a59b1cf16c4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 17 Apr 2015 15:45:04 +0200 Subject: net: remove unused 'dev' argument from netif_needs_gso() In commit 04ffcb255f22 ("net: Add ndo_gso_check") Tom originally added the 'dev' argument to be able to call ndo_gso_check(). Then later, when generalizing this in commit 5f35227ea34b ("net: Generalize ndo_gso_check to ndo_features_check") Jesse removed the call to ndo_gso_check() in netif_needs_gso() by calling the new ndo_features_check() in a different place. This made the 'dev' argument unused. Remove the unused argument and go back to the code as before. Cc: Tom Herbert Cc: Jesse Gross Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index af4a1b0adc10..1796cef55ab5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2713,7 +2713,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device if (unlikely(!skb)) goto out_null; - if (netif_needs_gso(dev, skb, features)) { + if (netif_needs_gso(skb, features)) { struct sk_buff *segs; segs = skb_gso_segment(skb, features); -- cgit v1.2.3 From a31196b07f8034eba6a3487a1ad1bb5ec5cd58a5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 25 Apr 2015 09:35:24 -0700 Subject: net: rfs: fix crash in get_rps_cpus() Commit 567e4b79731c ("net: rfs: add hash collision detection") had one mistake : RPS_NO_CPU is no longer the marker for invalid cpu in set_rps_cpu() and get_rps_cpu(), as @next_cpu was the result of an AND with rps_cpu_mask This bug showed up on a host with 72 cpus : next_cpu was 0x7f, and the code was trying to access percpu data of an non existent cpu. In a follow up patch, we might get rid of compares against nr_cpu_ids, if we init the tables with 0. This is silly to test for a very unlikely condition that exists only shortly after table initialization, as we got rid of rps_reset_sock_flow() and similar functions that were writing this RPS_NO_CPU magic value at flow dismantle : When table is old enough, it never contains this value anymore. Fixes: 567e4b79731c ("net: rfs: add hash collision detection") Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 1796cef55ab5..c7ba0388f1be 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3079,7 +3079,7 @@ static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow *rflow, u16 next_cpu) { - if (next_cpu != RPS_NO_CPU) { + if (next_cpu < nr_cpu_ids) { #ifdef CONFIG_RFS_ACCEL struct netdev_rx_queue *rxqueue; struct rps_dev_flow_table *flow_table; @@ -3184,7 +3184,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, * If the desired CPU (where last recvmsg was done) is * different from current CPU (one in the rx-queue flow * table entry), switch if one of the following holds: - * - Current CPU is unset (equal to RPS_NO_CPU). + * - Current CPU is unset (>= nr_cpu_ids). * - Current CPU is offline. * - The current CPU's queue tail has advanced beyond the * last packet that was enqueued using this table entry. @@ -3192,14 +3192,14 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, * have been dequeued, thus preserving in order delivery. */ if (unlikely(tcpu != next_cpu) && - (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || + (tcpu >= nr_cpu_ids || !cpu_online(tcpu) || ((int)(per_cpu(softnet_data, tcpu).input_queue_head - rflow->last_qtail)) >= 0)) { tcpu = next_cpu; rflow = set_rps_cpu(dev, skb, rflow, next_cpu); } - if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { + if (tcpu < nr_cpu_ids && cpu_online(tcpu)) { *rflowp = rflow; cpu = tcpu; goto done; @@ -3240,14 +3240,14 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, struct rps_dev_flow_table *flow_table; struct rps_dev_flow *rflow; bool expire = true; - int cpu; + unsigned int cpu; rcu_read_lock(); flow_table = rcu_dereference(rxqueue->rps_flow_table); if (flow_table && flow_id <= flow_table->mask) { rflow = &flow_table->flows[flow_id]; cpu = ACCESS_ONCE(rflow->cpu); - if (rflow->filter == filter_id && cpu != RPS_NO_CPU && + if (rflow->filter == filter_id && cpu < nr_cpu_ids && ((int)(per_cpu(softnet_data, cpu).input_queue_head - rflow->last_qtail) < (int)(10 * flow_table->mask))) -- cgit v1.2.3 From d66bf7dd27573ee5ea90484899ee952c19ccb194 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sat, 2 May 2015 21:33:44 -0400 Subject: net: core: Correct an over-stringent device loop detection. The code in __netdev_upper_dev_link() has an over-stringent loop detection logic that actually prevents valid configurations from working correctly. In particular, the logic returns an error if an upper device is already in the list of all upper devices for a given dev. This particular check seems to be a overzealous as it disallows perfectly valid configurations. For example: # ip l a link eth0 name eth0.10 type vlan id 10 # ip l a dev br0 typ bridge # ip l s eth0.10 master br0 # ip l s eth0 master br0 <--- Will fail If you switch the last two commands (add eth0 first), then both will succeed. If after that, you remove eth0 and try to re-add it, it will fail! It appears to be enough to simply check adj_list to keeps things safe. I've tried stacking multiple devices multiple times in all different combinations, and either rx_handler registration prevented the stacking of the device linking cought the error. Signed-off-by: Vladislav Yasevich Acked-by: Jiri Pirko Acked-by: Veaceslav Falico Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index c7ba0388f1be..2c1c67fad64d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5209,7 +5209,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper)) return -EBUSY; - if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper)) + if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper)) return -EEXIST; if (master && netdev_master_upper_dev_get(dev)) -- cgit v1.2.3