From 95603e2293de556de7e82221649bfd7fd98b64a3 Mon Sep 17 00:00:00 2001 From: Michel Machado Date: Tue, 12 Jun 2012 10:16:35 +0000 Subject: net-next: add dev_loopback_xmit() to avoid duplicate code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add dev_loopback_xmit() in order to deduplicate functions ip_dev_loopback_xmit() (in net/ipv4/ip_output.c) and ip6_dev_loopback_xmit() (in net/ipv6/ip6_output.c). I was about to reinvent the wheel when I noticed that ip_dev_loopback_xmit() and ip6_dev_loopback_xmit() do exactly what I need and are not IP-only functions, but they were not available to reuse elsewhere. ip6_dev_loopback_xmit() does not have line "skb_dst_force(skb);", but I understand that this is harmless, and should be in dev_loopback_xmit(). Signed-off-by: Michel Machado CC: "David S. Miller" CC: Alexey Kuznetsov CC: James Morris CC: Hideaki YOSHIFUJI CC: Patrick McHardy CC: Eric Dumazet CC: Jiri Pirko CC: "Michał Mirosław" CC: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index cd0981977f5c..c6e29ea65bd9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2475,6 +2475,23 @@ static void skb_update_prio(struct sk_buff *skb) static DEFINE_PER_CPU(int, xmit_recursion); #define RECURSION_LIMIT 10 +/** + * dev_loopback_xmit - loop back @skb + * @skb: buffer to transmit + */ +int dev_loopback_xmit(struct sk_buff *skb) +{ + skb_reset_mac_header(skb); + __skb_pull(skb, skb_network_offset(skb)); + skb->pkt_type = PACKET_LOOPBACK; + skb->ip_summed = CHECKSUM_UNNECESSARY; + WARN_ON(!skb_dst(skb)); + skb_dst_force(skb); + netif_rx_ni(skb); + return 0; +} +EXPORT_SYMBOL(dev_loopback_xmit); + /** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit -- cgit v1.2.3 From 62b1a8ab9b3660bb820d8dfe23148ed6cda38574 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Jun 2012 06:42:44 +0000 Subject: net: remove skb_orphan_try() Orphaning skb in dev_hard_start_xmit() makes bonding behavior unfriendly for applications sending big UDP bursts : Once packets pass the bonding device and come to real device, they might hit a full qdisc and be dropped. Without orphaning, the sender is automatically throttled because sk->sk_wmemalloc reaches sk->sk_sndbuf (assuming sk_sndbuf is not too big) We could try to defer the orphaning adding another test in dev_hard_start_xmit(), but all this seems of little gain, now that BQL tends to make packets more likely to be parked in Qdisc queues instead of NIC TX ring, in cases where performance matters. Reverts commits : fc6055a5ba31 net: Introduce skb_orphan_try() 87fd308cfc6b net: skb_tx_hash() fix relative to skb_orphan_try() and removes SKBTX_DRV_NEEDS_SK_REF flag Reported-and-bisected-by: Jean-Michel Hautbois Signed-off-by: Eric Dumazet Tested-by: Oliver Hartkopp Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/core/dev.c | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index cd0981977f5c..6df214041a5e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2089,25 +2089,6 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) return 0; } -/* - * Try to orphan skb early, right before transmission by the device. - * We cannot orphan skb if tx timestamp is requested or the sk-reference - * is needed on driver level for other reasons, e.g. see net/can/raw.c - */ -static inline void skb_orphan_try(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - - if (sk && !skb_shinfo(skb)->tx_flags) { - /* skb_tx_hash() wont be able to get sk. - * We copy sk_hash into skb->rxhash - */ - if (!skb->rxhash) - skb->rxhash = sk->sk_hash; - skb_orphan(skb); - } -} - static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) { return ((features & NETIF_F_GEN_CSUM) || @@ -2193,8 +2174,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (!list_empty(&ptype_all)) dev_queue_xmit_nit(skb, dev); - skb_orphan_try(skb); - features = netif_skb_features(skb); if (vlan_tx_tag_present(skb) && @@ -2304,7 +2283,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, if (skb->sk && skb->sk->sk_hash) hash = skb->sk->sk_hash; else - hash = (__force u16) skb->protocol ^ skb->rxhash; + hash = (__force u16) skb->protocol; hash = jhash_1word(hash, hashrnd); return (u16) (((u64) hash * qcount) >> 32) + qoffset; -- cgit v1.2.3 From 7cecb523adedcaf8acba5e14d47559d8bc3f40d7 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 27 Jun 2012 14:32:07 +0000 Subject: net: Downgrade CAP_SYS_MODULE deprecated message from error to warning. Make logging level consistent with other deprecation messages in net subsystem. Signed-off-by: Vinson Lee Cc: David Mackey Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 6df214041a5e..84f01ba81a34 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1136,8 +1136,8 @@ void dev_load(struct net *net, const char *name) no_module = request_module("netdev-%s", name); if (no_module && capable(CAP_SYS_MODULE)) { if (!request_module("%s", name)) - pr_err("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n", - name); + pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n", + name); } } EXPORT_SYMBOL(dev_load); -- cgit v1.2.3 From 16917b87a23b429226527f393270047069d665e9 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Sun, 1 Jul 2012 03:18:50 +0000 Subject: net-next: Add netif_get_num_default_rss_queues Most multi-queue networking driver consider the number of online cpus when configuring RSS queues. This patch adds a wrapper to the number of cpus, setting an upper limit on the number of cpus a driver should consider (by default) when allocating resources for his queues. Signed-off-by: Yuval Mintz Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- net/core/dev.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index ed674e212b7a..69f7a1a393d8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1793,6 +1793,17 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) EXPORT_SYMBOL(netif_set_real_num_rx_queues); #endif +/* netif_get_num_default_rss_queues - default number of RSS queues + * + * This routine should set an upper limit on the number of RSS queues + * used by default by multiqueue devices. + */ +int netif_get_num_default_rss_queues() +{ + return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus()); +} +EXPORT_SYMBOL(netif_get_num_default_rss_queues); + static inline void __netif_reschedule(struct Qdisc *q) { struct softnet_data *sd; -- cgit v1.2.3 From 91c68ce2b26319248a32d7baa1226f819d283758 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Jul 2012 21:45:10 +0000 Subject: net: cgroup: fix out of bounds accesses dev->priomap is allocated by extend_netdev_table() called from update_netdev_tables(). And this is only called if write_priomap() is called. But if write_priomap() is not called, it seems we can have out of bounds accesses in cgrp_destroy(), read_priomap() & skb_update_prio() With help from Gao Feng Signed-off-by: Eric Dumazet Cc: Neil Horman Cc: Gao feng Acked-by: Gao feng Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 84f01ba81a34..0f28a9e0b8ad 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2444,8 +2444,12 @@ static void skb_update_prio(struct sk_buff *skb) { struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); - if ((!skb->priority) && (skb->sk) && map) - skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx]; + if (!skb->priority && skb->sk && map) { + unsigned int prioidx = skb->sk->sk_cgrp_prioidx; + + if (prioidx < map->priomap_len) + skb->priority = map->priomap[prioidx]; + } } #else #define skb_update_prio(skb) -- cgit v1.2.3 From a55b138b1da3d25c04f66f8df03d659dfd46c950 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 10 Jul 2012 10:54:38 +0000 Subject: net: Properly define functions with no parameters Defining a function with no parameters as 'T foo()' is the deprecated K&R style, and is not strictly equivalent to defining it as 'T foo(void)'. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 69f7a1a393d8..9c21548e5b31 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1798,7 +1798,7 @@ EXPORT_SYMBOL(netif_set_real_num_rx_queues); * This routine should set an upper limit on the number of RSS queues * used by default by multiqueue devices. */ -int netif_get_num_default_rss_queues() +int netif_get_num_default_rss_queues(void) { return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus()); } -- cgit v1.2.3 From 2c53040f018b6c36a46eec75b9b937aaa5f78e6d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 10 Jul 2012 10:55:09 +0000 Subject: net: Fix (nearly-)kernel-doc comments for various functions Fix incorrect start markers, wrapped summary lines, missing section breaks, incorrect separators, and some name mismatches. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 9c21548e5b31..5ab6f4b37c0c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1691,7 +1691,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) rcu_read_unlock(); } -/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change +/** + * netif_setup_tc - Handle tc mappings on real_num_tx_queues change * @dev: Network device * @txq: number of queues available * @@ -1793,7 +1794,8 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) EXPORT_SYMBOL(netif_set_real_num_rx_queues); #endif -/* netif_get_num_default_rss_queues - default number of RSS queues +/** + * netif_get_num_default_rss_queues - default number of RSS queues * * This routine should set an upper limit on the number of RSS queues * used by default by multiqueue devices. @@ -5670,7 +5672,7 @@ int netdev_refcnt_read(const struct net_device *dev) } EXPORT_SYMBOL(netdev_refcnt_read); -/* +/** * netdev_wait_allrefs - wait until all references are gone. * * This is called when unregistering network devices. -- cgit v1.2.3 From 7bf2357524408b97fec58344caf7397f8140c3fd Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 4 Jul 2012 21:23:25 -0400 Subject: net: feed /dev/random with the MAC address when registering a device Cc: David Miller Cc: Linus Torvalds Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 6df214041a5e..bdd1e88f60d3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1172,6 +1172,7 @@ static int __dev_open(struct net_device *dev) net_dmaengine_get(); dev_set_rx_mode(dev); dev_activate(dev); + add_device_randomness(dev->dev_addr, dev->addr_len); } return ret; @@ -4763,6 +4764,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) err = ops->ndo_set_mac_address(dev, sa); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + add_device_randomness(dev->dev_addr, dev->addr_len); return err; } EXPORT_SYMBOL(dev_set_mac_address); @@ -5541,6 +5543,7 @@ int register_netdevice(struct net_device *dev) dev_init_scheduler(dev); dev_hold(dev); list_netdevice(dev); + add_device_randomness(dev->dev_addr, dev->addr_len); /* Notify protocols, that a new device appeared. */ ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); -- cgit v1.2.3 From 734b65417b24d6eea3e3d7457e1f11493890ee1d Mon Sep 17 00:00:00 2001 From: "Rustad, Mark D" Date: Wed, 18 Jul 2012 09:06:07 +0000 Subject: net: Statically initialize init_net.dev_base_head This change eliminates an initialization-order hazard most recently seen when netprio_cgroup is built into the kernel. With thanks to Eric Dumazet for catching a bug. Signed-off-by: Mark Rustad Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 0f28a9e0b8ad..1cb0d8a6aa6c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6283,7 +6283,8 @@ static struct hlist_head *netdev_create_hash(void) /* Initialize per network namespace state */ static int __net_init netdev_init(struct net *net) { - INIT_LIST_HEAD(&net->dev_base_head); + if (net != &init_net) + INIT_LIST_HEAD(&net->dev_base_head); net->dev_name_head = netdev_create_hash(); if (net->dev_name_head == NULL) -- cgit v1.2.3 From 1080e512d44d4f67b8beb8edf25a1bbcb1066dc7 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 Jul 2012 09:23:17 +0000 Subject: net: orphan frags on receive zero copy packets are normally sent to the outside network, but bridging, tun etc might loop them back to host networking stack. If this happens destructors will never be called, so orphan the frags immediately on receive. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/dev.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index d70e4a3a49f2..cca02ae7a844 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1632,6 +1632,8 @@ static inline int deliver_skb(struct sk_buff *skb, struct packet_type *pt_prev, struct net_device *orig_dev) { + if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) + return -ENOMEM; atomic_inc(&skb->users); return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } @@ -3262,7 +3264,10 @@ ncls: } if (pt_prev) { - ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) + ret = -ENOMEM; + else + ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); -- cgit v1.2.3 From b68581778cd0051a3fb9a2b614dee7eccb5127ff Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 23 Jul 2012 16:27:54 -0700 Subject: net: Make skb->skb_iif always track skb->dev Make it follow device decapsulation, from things such as VLAN and bonding. The stuff that actually cares about pre-demuxed device pointers, is handled by the "orig_dev" variable in __netif_receive_skb(). And the only consumer of that is the po->origdev feature of AF_PACKET sockets. Signed-off-by: David S. Miller --- net/core/dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index cca02ae7a844..0ebaea16632f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3173,8 +3173,6 @@ static int __netif_receive_skb(struct sk_buff *skb) if (netpoll_receive_skb(skb)) return NET_RX_DROP; - if (!skb->skb_iif) - skb->skb_iif = skb->dev->ifindex; orig_dev = skb->dev; skb_reset_network_header(skb); @@ -3186,6 +3184,7 @@ static int __netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); another_round: + skb->skb_iif = skb->dev->ifindex; __this_cpu_inc(softnet_data.processed); -- cgit v1.2.3 From b4b9e3558508980fc0cd161a545ffb55a1f13ee9 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:44:26 -0700 Subject: netvm: set PF_MEMALLOC as appropriate during SKB processing In order to make sure pfmemalloc packets receive all memory needed to proceed, ensure processing of pfmemalloc SKBs happens under PF_MEMALLOC. This is limited to a subset of protocols that are expected to be used for writing to swap. Taps are not allowed to use PF_MEMALLOC as these are expected to communicate with userspace processes which could be paged out. [a.p.zijlstra@chello.nl: Ideas taken from various patches] [jslaby@suse.cz: Lock imbalance fix] Signed-off-by: Mel Gorman Acked-by: David S. Miller Cc: Neil Brown Cc: Peter Zijlstra Cc: Mike Christie Cc: Eric B Munson Cc: Eric Dumazet Cc: Sebastian Andrzej Siewior Cc: Mel Gorman Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/core/dev.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 6 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 0ebaea16632f..ce132443d5d1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3155,6 +3155,23 @@ void netdev_rx_handler_unregister(struct net_device *dev) } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); +/* + * Limit the use of PFMEMALLOC reserves to those protocols that implement + * the special handling of PFMEMALLOC skbs. + */ +static bool skb_pfmemalloc_protocol(struct sk_buff *skb) +{ + switch (skb->protocol) { + case __constant_htons(ETH_P_ARP): + case __constant_htons(ETH_P_IP): + case __constant_htons(ETH_P_IPV6): + case __constant_htons(ETH_P_8021Q): + return true; + default: + return false; + } +} + static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; @@ -3164,14 +3181,27 @@ static int __netif_receive_skb(struct sk_buff *skb) bool deliver_exact = false; int ret = NET_RX_DROP; __be16 type; + unsigned long pflags = current->flags; net_timestamp_check(!netdev_tstamp_prequeue, skb); trace_netif_receive_skb(skb); + /* + * PFMEMALLOC skbs are special, they should + * - be delivered to SOCK_MEMALLOC sockets only + * - stay away from userspace + * - have bounded memory usage + * + * Use PF_MEMALLOC as this saves us from propagating the allocation + * context down to all allocation sites. + */ + if (sk_memalloc_socks() && skb_pfmemalloc(skb)) + current->flags |= PF_MEMALLOC; + /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) - return NET_RX_DROP; + goto out; orig_dev = skb->dev; @@ -3191,7 +3221,7 @@ another_round: if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { skb = vlan_untag(skb); if (unlikely(!skb)) - goto out; + goto unlock; } #ifdef CONFIG_NET_CLS_ACT @@ -3201,6 +3231,9 @@ another_round: } #endif + if (sk_memalloc_socks() && skb_pfmemalloc(skb)) + goto skip_taps; + list_for_each_entry_rcu(ptype, &ptype_all, list) { if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) @@ -3209,13 +3242,18 @@ another_round: } } +skip_taps: #ifdef CONFIG_NET_CLS_ACT skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto out; + goto unlock; ncls: #endif + if (sk_memalloc_socks() && skb_pfmemalloc(skb) + && !skb_pfmemalloc_protocol(skb)) + goto drop; + rx_handler = rcu_dereference(skb->dev->rx_handler); if (vlan_tx_tag_present(skb)) { if (pt_prev) { @@ -3225,7 +3263,7 @@ ncls: if (vlan_do_receive(&skb, !rx_handler)) goto another_round; else if (unlikely(!skb)) - goto out; + goto unlock; } if (rx_handler) { @@ -3235,7 +3273,7 @@ ncls: } switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: - goto out; + goto unlock; case RX_HANDLER_ANOTHER: goto another_round; case RX_HANDLER_EXACT: @@ -3268,6 +3306,7 @@ ncls: else ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { +drop: atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); /* Jamal, now you will not able to escape explaining @@ -3276,8 +3315,10 @@ ncls: ret = NET_RX_DROP; } -out: +unlock: rcu_read_unlock(); +out: + tsk_restore_flags(current, pflags, PF_MEMALLOC); return ret; } -- cgit v1.2.3 From 30b678d844af3305cda5953467005cebb5d7b687 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 30 Jul 2012 15:57:00 +0000 Subject: net: Allow driver to limit number of GSO segments per skb A peer (or local user) may cause TCP to use a nominal MSS of as little as 88 (actual MSS of 76 with timestamps). Given that we have a sufficiently prodigious local sender and the peer ACKs quickly enough, it is nevertheless possible to grow the window for such a connection to the point that we will try to send just under 64K at once. This results in a single skb that expands to 861 segments. In some drivers with TSO support, such an skb will require hundreds of DMA descriptors; a substantial fraction of a TX ring or even more than a full ring. The TX queue selected for the skb may stall and trigger the TX watchdog repeatedly (since the problem skb will be retried after the TX reset). This particularly affects sfc, for which the issue is designated as CVE-2012-3412. Therefore: 1. Add the field net_device::gso_max_segs holding the device-specific limit. 2. In netif_skb_features(), if the number of segments is too high then mask out GSO features to force fall back to software GSO. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 0cb3fe8d8e72..f91abf800161 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2134,6 +2134,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) __be16 protocol = skb->protocol; netdev_features_t features = skb->dev->features; + if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) + features &= ~NETIF_F_GSO_MASK; + if (protocol == htons(ETH_P_8021Q)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; @@ -5986,6 +5989,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); dev->gso_max_size = GSO_MAX_SIZE; + dev->gso_max_segs = GSO_MAX_SEGS; INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); -- cgit v1.2.3 From 7364e445f62825758fa61195d237a5b8ecdd06ec Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Wed, 8 Aug 2012 00:33:25 +0000 Subject: net/core: Fix potential memory leak in dev_set_alias() Do not leak memory by updating pointer with potentially NULL realloc return value. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- net/core/dev.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index f91abf800161..a39354ee1432 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1055,6 +1055,8 @@ rollback: */ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) { + char *new_ifalias; + ASSERT_RTNL(); if (len >= IFALIASZ) @@ -1068,9 +1070,10 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) return 0; } - dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); - if (!dev->ifalias) + new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); + if (!new_ifalias) return -ENOMEM; + dev->ifalias = new_ifalias; strlcpy(dev->ifalias, alias, len+1); return len; -- cgit v1.2.3