From 90e51adf16db137d9b61eea952af32ee2f454c41 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Fri, 22 Nov 2013 15:04:46 +0800 Subject: Fix comment typo for alloc_netdev_mqs() it seems subquue should be subqueue here Signed-off-by: Li Zhong Signed-off-by: Jiri Kosina --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 5c713f2239cc..e3457181e2e2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6010,7 +6010,7 @@ EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops); * @rxqs: the number of RX subqueues to allocate * * Allocates a struct net_device with private data area for driver use - * and performs basic initialization. Also allocates subquue structs + * and performs basic initialization. Also allocates subqueue structs * for each queue on the device. */ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, -- cgit v1.2.3 From 84b9cd633bc35a028b313178829ee313525f6892 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Dec 2013 21:44:27 -0800 Subject: gro: small napi_get_frags() optim Remove one useless conditional branch : napi->skb is NULL, so nothing bad can happen. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index ba3b7ea5ebb3..c98052487e98 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3981,8 +3981,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) if (!skb) { skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); - if (skb) - napi->skb = skb; + napi->skb = skb; } return skb; } -- cgit v1.2.3 From e6247027e5173c00efb2084d688d06ff835bc3b0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Dec 2013 04:45:08 -0800 Subject: net: introduce dev_consume_skb_any() Some network drivers use dev_kfree_skb_any() and dev_kfree_skb_irq() helpers to free skbs, both for dropped packets and TX completed ones. We need to separate the two causes to get better diagnostics given by dropwatch or "perf record -e skb:kfree_skb" This patch provides two new helpers, dev_consume_skb_any() and dev_consume_skb_irq() to be used for consumed skbs. __dev_kfree_skb_irq() is slightly optimized to remove one atomic_dec_and_test() in fast path, and use this_cpu_{r|w} accessors. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index c98052487e98..6cc98dd49c7a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2145,30 +2145,42 @@ void __netif_schedule(struct Qdisc *q) } EXPORT_SYMBOL(__netif_schedule); -void dev_kfree_skb_irq(struct sk_buff *skb) +struct dev_kfree_skb_cb { + enum skb_free_reason reason; +}; + +static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb) +{ + return (struct dev_kfree_skb_cb *)skb->cb; +} + +void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) { - if (atomic_dec_and_test(&skb->users)) { - struct softnet_data *sd; - unsigned long flags; + unsigned long flags; - local_irq_save(flags); - sd = &__get_cpu_var(softnet_data); - skb->next = sd->completion_queue; - sd->completion_queue = skb; - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_restore(flags); + if (likely(atomic_read(&skb->users) == 1)) { + smp_rmb(); + atomic_set(&skb->users, 0); + } else if (likely(!atomic_dec_and_test(&skb->users))) { + return; } + get_kfree_skb_cb(skb)->reason = reason; + local_irq_save(flags); + skb->next = __this_cpu_read(softnet_data.completion_queue); + __this_cpu_write(softnet_data.completion_queue, skb); + raise_softirq_irqoff(NET_TX_SOFTIRQ); + local_irq_restore(flags); } -EXPORT_SYMBOL(dev_kfree_skb_irq); +EXPORT_SYMBOL(__dev_kfree_skb_irq); -void dev_kfree_skb_any(struct sk_buff *skb) +void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason) { if (in_irq() || irqs_disabled()) - dev_kfree_skb_irq(skb); + __dev_kfree_skb_irq(skb, reason); else dev_kfree_skb(skb); } -EXPORT_SYMBOL(dev_kfree_skb_any); +EXPORT_SYMBOL(__dev_kfree_skb_any); /** @@ -3306,7 +3318,10 @@ static void net_tx_action(struct softirq_action *h) clist = clist->next; WARN_ON(atomic_read(&skb->users)); - trace_kfree_skb(skb, net_tx_action); + if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) + trace_consume_skb(skb); + else + trace_kfree_skb(skb, net_tx_action); __kfree_skb(skb); } } -- cgit v1.2.3 From 4262e5ccbbb5171abd2921eed16ed339633d6478 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 6 Dec 2013 11:36:16 +0100 Subject: net: dev: move inline skb_needs_linearize helper to header As we need it elsewhere, move the inline helper function of skb_needs_linearize() over to skbuff.h include file. While at it, also convert the return to 'bool' instead of 'int' and add a proper kernel doc. Signed-off-by: Daniel Borkmann Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/core/dev.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 6cc98dd49c7a..355df36360b4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2535,21 +2535,6 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) } EXPORT_SYMBOL(netif_skb_features); -/* - * Returns true if either: - * 1. skb has frag_list and the device doesn't support FRAGLIST, or - * 2. skb is fragmented and the device does not support SG. - */ -static inline int skb_needs_linearize(struct sk_buff *skb, - netdev_features_t features) -{ - return skb_is_nonlinear(skb) && - ((skb_has_frag_list(skb) && - !(features & NETIF_F_FRAGLIST)) || - (skb_shinfo(skb)->nr_frags && - !(features & NETIF_F_SG))); -} - int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, void *accel_priv) { -- cgit v1.2.3 From 299603e8370a93dd5d8e8d800f0dff1ce2c53d36 Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Wed, 11 Dec 2013 20:53:45 -0800 Subject: net-gro: Prepare GRO stack for the upcoming tunneling support This patch modifies the GRO stack to avoid the use of "network_header" and associated macros like ip_hdr() and ipv6_hdr() in order to allow an arbitary number of IP hdrs (v4 or v6) to be used in the encapsulation chain. This lays the foundation for various IP tunneling support (IP-in-IP, GRE, VXLAN, SIT,...) to be added later. With this patch, the GRO stack traversing now is mostly based on skb_gro_offset rather than special hdr offsets saved in skb (e.g., skb->network_header). As a result all but the top layer (i.e., the the transport layer) must have hdrs of the same length in order for a pkt to be considered for aggregation. Therefore when adding a new encap layer (e.g., for tunneling), one must check and skip flows (e.g., by setting NAPI_GRO_CB(p)->same_flow to 0) that have a different hdr length. Note that unlike the network header, the transport header can and will continue to be set by the GRO code since there will be at most one "transport layer" in the encap chain. Signed-off-by: H.K. Jerry Chu Suggested-by: Eric Dumazet Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 76 +++++++++++++++++++--------------------------------------- 1 file changed, 25 insertions(+), 51 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 355df36360b4..c95d664b2b42 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3752,7 +3752,7 @@ static int napi_gro_complete(struct sk_buff *skb) if (ptype->type != type || !ptype->callbacks.gro_complete) continue; - err = ptype->callbacks.gro_complete(skb); + err = ptype->callbacks.gro_complete(skb, 0); break; } rcu_read_unlock(); @@ -3818,6 +3818,23 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) } } +static void skb_gro_reset_offset(struct sk_buff *skb) +{ + const struct skb_shared_info *pinfo = skb_shinfo(skb); + const skb_frag_t *frag0 = &pinfo->frags[0]; + + NAPI_GRO_CB(skb)->data_offset = 0; + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; + + if (skb_mac_header(skb) == skb_tail_pointer(skb) && + pinfo->nr_frags && + !PageHighMem(skb_frag_page(frag0))) { + NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); + NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); + } +} + static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; @@ -3833,6 +3850,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (skb_is_gso(skb) || skb_has_frag_list(skb)) goto normal; + skb_gro_reset_offset(skb); gro_list_prepare(napi, skb); rcu_read_lock(); @@ -3938,27 +3956,8 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) return ret; } -static void skb_gro_reset_offset(struct sk_buff *skb) -{ - const struct skb_shared_info *pinfo = skb_shinfo(skb); - const skb_frag_t *frag0 = &pinfo->frags[0]; - - NAPI_GRO_CB(skb)->data_offset = 0; - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; - - if (skb_mac_header(skb) == skb_tail_pointer(skb) && - pinfo->nr_frags && - !PageHighMem(skb_frag_page(frag0))) { - NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); - NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); - } -} - gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { - skb_gro_reset_offset(skb); - return napi_skb_finish(dev_gro_receive(napi, skb), skb); } EXPORT_SYMBOL(napi_gro_receive); @@ -3992,12 +3991,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * { switch (ret) { case GRO_NORMAL: - case GRO_HELD: - skb->protocol = eth_type_trans(skb, skb->dev); - - if (ret == GRO_HELD) - skb_gro_pull(skb, -ETH_HLEN); - else if (netif_receive_skb(skb)) + if (netif_receive_skb(skb)) ret = GRO_DROP; break; @@ -4006,6 +4000,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * napi_reuse_skb(napi, skb); break; + case GRO_HELD: case GRO_MERGED: break; } @@ -4016,36 +4011,15 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * static struct sk_buff *napi_frags_skb(struct napi_struct *napi) { struct sk_buff *skb = napi->skb; - struct ethhdr *eth; - unsigned int hlen; - unsigned int off; napi->skb = NULL; - skb_reset_mac_header(skb); - skb_gro_reset_offset(skb); - - off = skb_gro_offset(skb); - hlen = off + sizeof(*eth); - eth = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - eth = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!eth)) { - napi_reuse_skb(napi, skb); - skb = NULL; - goto out; - } + if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) { + napi_reuse_skb(napi, skb); + return NULL; } + skb->protocol = eth_type_trans(skb, skb->dev); - skb_gro_pull(skb, sizeof(*eth)); - - /* - * This works because the only protocols we care about don't require - * special handling. We'll fix it up properly at the end. - */ - skb->protocol = eth->h_proto; - -out: return skb; } -- cgit v1.2.3 From e001bfad913bf119fb67c1e8dd2d4ec1f5d392fa Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 13 Dec 2013 10:19:55 +0800 Subject: bonding: create bond_first_slave_rcu() The bond_first_slave_rcu() will be used to instead of bond_first_slave() in rcu_read_lock(). According to the Jay Vosburgh's suggestion, the struct netdev_adjacent should hide from users who wanted to use it directly. so I package a new function to get the first slave of the bond. Suggested-by: Nikolay Aleksandrov Suggested-by: Jay Vosburgh Suggested-by: Veaceslav Falico Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- net/core/dev.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index c95d664b2b42..9d4369ece679 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4543,6 +4543,27 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, } EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); +/** + * netdev_lower_get_first_private_rcu - Get the first ->private from the + * lower neighbour list, RCU + * variant + * @dev: device + * + * Gets the first netdev_adjacent->private from the dev's lower neighbour + * list. The caller must hold RCU read lock. + */ +void *netdev_lower_get_first_private_rcu(struct net_device *dev) +{ + struct netdev_adjacent *lower; + + lower = list_first_or_null_rcu(&dev->adj_list.lower, + struct netdev_adjacent, list); + if (lower) + return lower->private; + return NULL; +} +EXPORT_SYMBOL(netdev_lower_get_first_private_rcu); + /** * netdev_master_upper_dev_get_rcu - Get master upper device * @dev: device -- cgit v1.2.3 From 3958afa1b272eb07109fd31549e69193b4d7c364 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 15 Dec 2013 22:12:06 -0800 Subject: net: Change skb_get_rxhash to skb_get_hash Changing name of function as part of making the hash in skbuff to be generic property, not just for receive path. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 9d4369ece679..c482fe8abf87 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3006,7 +3006,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } skb_reset_network_header(skb); - if (!skb_get_rxhash(skb)) + if (!skb_get_hash(skb)) goto done; flow_table = rcu_dereference(rxqueue->rps_flow_table); @@ -3151,7 +3151,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) rcu_read_lock(); fl = rcu_dereference(sd->flow_limit); if (fl) { - new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1); + new_flow = skb_get_hash(skb) & (fl->num_buckets - 1); old_flow = fl->history[fl->history_head]; fl->history[fl->history_head] = new_flow; -- cgit v1.2.3 From 289dccbe141e01efc5968fe39a0993c9f611375e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Dec 2013 14:29:08 -0800 Subject: net: use kfree_skb_list() helper We can use kfree_skb_list() instead of open coding it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index c482fe8abf87..973c23656673 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2454,13 +2454,8 @@ static void dev_gso_skb_destructor(struct sk_buff *skb) { struct dev_gso_cb *cb; - do { - struct sk_buff *nskb = skb->next; - - skb->next = nskb->next; - nskb->next = NULL; - kfree_skb(nskb); - } while (skb->next); + kfree_skb_list(skb->next); + skb->next = NULL; cb = DEV_GSO_CB(skb); if (cb->destructor) @@ -4240,17 +4235,10 @@ EXPORT_SYMBOL(netif_napi_add); void netif_napi_del(struct napi_struct *napi) { - struct sk_buff *skb, *next; - list_del_init(&napi->dev_list); napi_free_frags(napi); - for (skb = napi->gro_list; skb; skb = next) { - next = skb->next; - skb->next = NULL; - kfree_skb(skb); - } - + kfree_skb_list(napi->gro_list); napi->gro_list = NULL; napi->gro_count = 0; } -- cgit v1.2.3 From 855abcf0664512df945fba911fa0b9d88e2ea0bf Mon Sep 17 00:00:00 2001 From: Zhi Yong Wu Date: Wed, 1 Jan 2014 04:34:50 +0800 Subject: net, rps: fix the comment of net_rps_action_and_irq_enable() Signed-off-by: Zhi Yong Wu Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 973c23656673..cc9ab80581d7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4030,7 +4030,7 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) EXPORT_SYMBOL(napi_gro_frags); /* - * net_rps_action sends any pending IPI's for rps. + * net_rps_action_and_irq_enable sends any pending IPI's for rps. * Note: called with local irq disabled, but exits with local irq enabled. */ static void net_rps_action_and_irq_enable(struct softnet_data *sd) -- cgit v1.2.3 From 1d143d9f0c833fcf38cc737eb0a8698fa2dd144c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Sun, 29 Dec 2013 14:01:29 -0800 Subject: net: core functions cleanup The following functions are not used outside of net/core/dev.c and should be declared static. call_netdevice_notifiers_info __dev_remove_offload netdev_has_any_upper_dev __netdev_adjacent_dev_remove __netdev_adjacent_dev_link_lists __netdev_adjacent_dev_unlink_lists __netdev_adjacent_dev_unlink __netdev_adjacent_dev_link_neighbour __netdev_adjacent_dev_unlink_neighbour And the following are never used and should be deleted netdev_lower_dev_get_private_rcu __netdev_find_adj_rcu Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 82 +++++++++++++++++++--------------------------------------- 1 file changed, 26 insertions(+), 56 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index cc9ab80581d7..77f43aa373fe 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -480,7 +480,7 @@ EXPORT_SYMBOL(dev_add_offload); * and must not be freed until after all the CPU's have gone * through a quiescent state. */ -void __dev_remove_offload(struct packet_offload *po) +static void __dev_remove_offload(struct packet_offload *po) { struct list_head *head = &offload_base; struct packet_offload *po1; @@ -498,7 +498,6 @@ void __dev_remove_offload(struct packet_offload *po) out: spin_unlock(&offload_lock); } -EXPORT_SYMBOL(__dev_remove_offload); /** * dev_remove_offload - remove packet offload handler @@ -1566,14 +1565,14 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); * are as for raw_notifier_call_chain(). */ -int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, - struct netdev_notifier_info *info) +static int call_netdevice_notifiers_info(unsigned long val, + struct net_device *dev, + struct netdev_notifier_info *info) { ASSERT_RTNL(); netdev_notifier_info_init(info, dev); return raw_notifier_call_chain(&netdev_chain, val, info); } -EXPORT_SYMBOL(call_netdevice_notifiers_info); /** * call_netdevice_notifiers - call all network notifier blocks @@ -4355,19 +4354,6 @@ struct netdev_adjacent { struct rcu_head rcu; }; -static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev, - struct net_device *adj_dev, - struct list_head *adj_list) -{ - struct netdev_adjacent *adj; - - list_for_each_entry_rcu(adj, adj_list, list) { - if (adj->dev == adj_dev) - return adj; - } - return NULL; -} - static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, struct net_device *adj_dev, struct list_head *adj_list) @@ -4406,13 +4392,12 @@ EXPORT_SYMBOL(netdev_has_upper_dev); * Find out if a device is linked to an upper device and return true in case * it is. The caller must hold the RTNL lock. */ -bool netdev_has_any_upper_dev(struct net_device *dev) +static bool netdev_has_any_upper_dev(struct net_device *dev) { ASSERT_RTNL(); return !list_empty(&dev->all_adj_list.upper); } -EXPORT_SYMBOL(netdev_has_any_upper_dev); /** * netdev_master_upper_dev_get - Get master upper device @@ -4644,9 +4629,9 @@ free_adj: return ret; } -void __netdev_adjacent_dev_remove(struct net_device *dev, - struct net_device *adj_dev, - struct list_head *dev_list) +static void __netdev_adjacent_dev_remove(struct net_device *dev, + struct net_device *adj_dev, + struct list_head *dev_list) { struct netdev_adjacent *adj; char linkname[IFNAMSIZ+7]; @@ -4684,11 +4669,11 @@ void __netdev_adjacent_dev_remove(struct net_device *dev, kfree_rcu(adj, rcu); } -int __netdev_adjacent_dev_link_lists(struct net_device *dev, - struct net_device *upper_dev, - struct list_head *up_list, - struct list_head *down_list, - void *private, bool master) +static int __netdev_adjacent_dev_link_lists(struct net_device *dev, + struct net_device *upper_dev, + struct list_head *up_list, + struct list_head *down_list, + void *private, bool master) { int ret; @@ -4707,8 +4692,8 @@ int __netdev_adjacent_dev_link_lists(struct net_device *dev, return 0; } -int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev) +static int __netdev_adjacent_dev_link(struct net_device *dev, + struct net_device *upper_dev) { return __netdev_adjacent_dev_link_lists(dev, upper_dev, &dev->all_adj_list.upper, @@ -4716,26 +4701,26 @@ int __netdev_adjacent_dev_link(struct net_device *dev, NULL, false); } -void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, - struct net_device *upper_dev, - struct list_head *up_list, - struct list_head *down_list) +static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, + struct net_device *upper_dev, + struct list_head *up_list, + struct list_head *down_list) { __netdev_adjacent_dev_remove(dev, upper_dev, up_list); __netdev_adjacent_dev_remove(upper_dev, dev, down_list); } -void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) +static void __netdev_adjacent_dev_unlink(struct net_device *dev, + struct net_device *upper_dev) { __netdev_adjacent_dev_unlink_lists(dev, upper_dev, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower); } -int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, - struct net_device *upper_dev, - void *private, bool master) +static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, + struct net_device *upper_dev, + void *private, bool master) { int ret = __netdev_adjacent_dev_link(dev, upper_dev); @@ -4754,8 +4739,8 @@ int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, return 0; } -void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, - struct net_device *upper_dev) +static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, + struct net_device *upper_dev) { __netdev_adjacent_dev_unlink(dev, upper_dev); __netdev_adjacent_dev_unlink_lists(dev, upper_dev, @@ -4944,21 +4929,6 @@ void netdev_upper_dev_unlink(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_dev_unlink); -void *netdev_lower_dev_get_private_rcu(struct net_device *dev, - struct net_device *lower_dev) -{ - struct netdev_adjacent *lower; - - if (!lower_dev) - return NULL; - lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower); - if (!lower) - return NULL; - - return lower->private; -} -EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu); - void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev) { -- cgit v1.2.3 From 86f8515f9721fa171483f0fe0391968fbb949cc9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 29 Dec 2013 17:27:11 +0100 Subject: net: netprio: rename config to be more consistent with cgroup configs While we're at it and introduced CGROUP_NET_CLASSID, lets also make NETPRIO_CGROUP more consistent with the rest of cgroups and rename it into CONFIG_CGROUP_NET_PRIO so that for networking, we now have CONFIG_CGROUP_NET_{PRIO,CLASSID}. This not only makes the CONFIG option consistent among networking cgroups, but also among cgroups CONFIG conventions in general as the vast majority has a prefix of CONFIG_CGROUP_. Signed-off-by: Daniel Borkmann Cc: Zefan Li Cc: cgroups@vger.kernel.org Acked-by: Li Zefan Signed-off-by: Pablo Neira Ayuso --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index c95d664b2b42..888a79b2b8b9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2747,7 +2747,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return rc; } -#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) +#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) static void skb_update_prio(struct sk_buff *skb) { struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); -- cgit v1.2.3 From cdb3f4a31b64c3a1c6eef40bc01ebc9594c58a8c Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Tue, 7 Jan 2014 10:11:10 -0500 Subject: net: Do not enable tx-nocache-copy by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are many cases where this feature does not improve performance or even reduces it. For example, here are the results from tests that I've run using 3.12.6 on one Intel Xeon W3565 and one i7 920 connected by ixgbe adapters. The results are from the Xeon, but they're similar on the i7. All numbers report the mean±stddev over 10 runs of 10s. 1) latency tests similar to what is described in "c6e1a0d net: Allow no-cache copy from user on transmit" There is no statistically significant difference between tx-nocache-copy on/off. nic irqs spread out (one queue per cpu) 200x netperf -r 1400,1 tx-nocache-copy off 692000±1000 tps 50/90/95/99% latency (us): 275±2/643.8±0.4/799±1/2474.4±0.3 tx-nocache-copy on 693000±1000 tps 50/90/95/99% latency (us): 274±1/644.1±0.7/800±2/2474.5±0.7 200x netperf -r 14000,14000 tx-nocache-copy off 86450±80 tps 50/90/95/99% latency (us): 334.37±0.02/838±1/2100±20/3990±40 tx-nocache-copy on 86110±60 tps 50/90/95/99% latency (us): 334.28±0.01/837±2/2110±20/3990±20 2) single stream throughput tests tx-nocache-copy leads to higher service demand throughput cpu0 cpu1 demand (Gb/s) (Gcycle) (Gcycle) (cycle/B) nic irqs and netperf on cpu0 (1x netperf -T0,0 -t omni -- -d send) tx-nocache-copy off 9402±5 9.4±0.2 0.80±0.01 tx-nocache-copy on 9403±3 9.85±0.04 0.838±0.004 nic irqs on cpu0, netperf on cpu1 (1x netperf -T1,1 -t omni -- -d send) tx-nocache-copy off 9401±5 5.83±0.03 5.0±0.1 0.923±0.007 tx-nocache-copy on 9404±2 5.74±0.03 5.523±0.009 0.958±0.002 As a second example, here are some results from Eric Dumazet with latest net-next. tx-nocache-copy also leads to higher service demand (cpu is Intel(R) Xeon(R) CPU X5660 @ 2.80GHz) lpq83:~# ./ethtool -K eth0 tx-nocache-copy on lpq83:~# perf stat ./netperf -H lpq84 -c MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to lpq84.prod.google.com () port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % U us/KB us/KB 87380 16384 16384 10.00 9407.44 2.50 -1.00 0.522 -1.000 Performance counter stats for './netperf -H lpq84 -c': 4282.648396 task-clock # 0.423 CPUs utilized 9,348 context-switches # 0.002 M/sec 88 CPU-migrations # 0.021 K/sec 355 page-faults # 0.083 K/sec 11,812,797,651 cycles # 2.758 GHz [82.79%] 9,020,522,817 stalled-cycles-frontend # 76.36% frontend cycles idle [82.54%] 4,579,889,681 stalled-cycles-backend # 38.77% backend cycles idle [67.33%] 6,053,172,792 instructions # 0.51 insns per cycle # 1.49 stalled cycles per insn [83.64%] 597,275,583 branches # 139.464 M/sec [83.70%] 8,960,541 branch-misses # 1.50% of all branches [83.65%] 10.128990264 seconds time elapsed lpq83:~# ./ethtool -K eth0 tx-nocache-copy off lpq83:~# perf stat ./netperf -H lpq84 -c MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to lpq84.prod.google.com () port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % U us/KB us/KB 87380 16384 16384 10.00 9412.45 2.15 -1.00 0.449 -1.000 Performance counter stats for './netperf -H lpq84 -c': 2847.375441 task-clock # 0.281 CPUs utilized 11,632 context-switches # 0.004 M/sec 49 CPU-migrations # 0.017 K/sec 354 page-faults # 0.124 K/sec 7,646,889,749 cycles # 2.686 GHz [83.34%] 6,115,050,032 stalled-cycles-frontend # 79.97% frontend cycles idle [83.31%] 1,726,460,071 stalled-cycles-backend # 22.58% backend cycles idle [66.55%] 2,079,702,453 instructions # 0.27 insns per cycle # 2.94 stalled cycles per insn [83.22%] 363,773,213 branches # 127.757 M/sec [83.29%] 4,242,732 branch-misses # 1.17% of all branches [83.51%] 10.128449949 seconds time elapsed CC: Tom Herbert Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller --- net/core/dev.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index e5e23d785454..b3c574a88026 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5783,13 +5783,8 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_SOFT_FEATURES; dev->wanted_features = dev->features & dev->hw_features; - /* Turn on no cache copy if HW is doing checksum */ if (!(dev->flags & IFF_LOOPBACK)) { dev->hw_features |= NETIF_F_NOCACHE_COPY; - if (dev->features & NETIF_F_ALL_CSUM) { - dev->wanted_features |= NETIF_F_NOCACHE_COPY; - dev->features |= NETIF_F_NOCACHE_COPY; - } } /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. -- cgit v1.2.3 From bf5a755f5e9186406bbf50f4087100af5bd68e40 Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Tue, 7 Jan 2014 10:23:19 -0800 Subject: net-gre-gro: Add GRE support to the GRO stack This patch built on top of Commit 299603e8370a93dd5d8e8d800f0dff1ce2c53d36 ("net-gro: Prepare GRO stack for the upcoming tunneling support") to add the support of the standard GRE (RFC1701/RFC2784/RFC2890) to the GRO stack. It also serves as an example for supporting other encapsulation protocols in the GRO stack in the future. The patch supports version 0 and all the flags (key, csum, seq#) but will flush any pkt with the S (seq#) flag. This is because the S flag is not support by GSO, and a GRO pkt may end up in the forwarding path, thus requiring GSO support to break it up correctly. Currently the "packet_offload" structure only contains L3 (ETH_P_IP/ ETH_P_IPV6) GRO offload support so the encapped pkts are limited to IP pkts (i.e., w/o L2 hdr). But support for other protocol type can be easily added, so is the support for GRE variations like NVGRE. The patch also support csum offload. Specifically if the csum flag is on and the h/w is capable of checksumming the payload (CHECKSUM_COMPLETE), the code will take advantage of the csum computed by the h/w when validating the GRE csum. Note that commit 60769a5dcd8755715c7143b4571d5c44f01796f1 "ipv4: gre: add GRO capability" already introduces GRO capability to IPv4 GRE tunnels, using the gro_cells infrastructure. But GRO is done after GRE hdr has been removed (i.e., decapped). The following patch applies GRO when pkts first come in (before hitting the GRE tunnel code). There is some performance advantage for applying GRO as early as possible. Also this approach is transparent to other subsystem like Open vSwitch where GRE decap is handled outside of the IP stack hence making it harder for the gro_cells stuff to apply. On the other hand, some NICs are still not capable of hashing on the inner hdr of a GRE pkt (RSS). In that case the GRO processing of pkts from the same remote host will all happen on the same CPU and the performance may be suboptimal. I'm including some rough preliminary performance numbers below. Note that the performance will be highly dependent on traffic load, mix as usual. Moreover it also depends on NIC offload features hence the following is by no means a comprehesive study. Local testing and tuning will be needed to decide the best setting. All tests spawned 50 copies of netperf TCP_STREAM and ran for 30 secs. (super_netperf 50 -H 192.168.1.18 -l 30) An IP GRE tunnel with only the key flag on (e.g., ip tunnel add gre1 mode gre local 10.246.17.18 remote 10.246.17.17 ttl 255 key 123) is configured. The GRO support for pkts AFTER decap are controlled through the device feature of the GRE device (e.g., ethtool -K gre1 gro on/off). 1.1 ethtool -K gre1 gro off; ethtool -K eth0 gro off thruput: 9.16Gbps CPU utilization: 19% 1.2 ethtool -K gre1 gro on; ethtool -K eth0 gro off thruput: 5.9Gbps CPU utilization: 15% 1.3 ethtool -K gre1 gro off; ethtool -K eth0 gro on thruput: 9.26Gbps CPU utilization: 12-13% 1.4 ethtool -K gre1 gro on; ethtool -K eth0 gro on thruput: 9.26Gbps CPU utilization: 10% The following tests were performed on a different NIC that is capable of csum offload. I.e., the h/w is capable of computing IP payload csum (CHECKSUM_COMPLETE). 2.1 ethtool -K gre1 gro on (hence will use gro_cells) 2.1.1 ethtool -K eth0 gro off; csum offload disabled thruput: 8.53Gbps CPU utilization: 9% 2.1.2 ethtool -K eth0 gro off; csum offload enabled thruput: 8.97Gbps CPU utilization: 7-8% 2.1.3 ethtool -K eth0 gro on; csum offload disabled thruput: 8.83Gbps CPU utilization: 5-6% 2.1.4 ethtool -K eth0 gro on; csum offload enabled thruput: 8.98Gbps CPU utilization: 5% 2.2 ethtool -K gre1 gro off 2.2.1 ethtool -K eth0 gro off; csum offload disabled thruput: 5.93Gbps CPU utilization: 9% 2.2.2 ethtool -K eth0 gro off; csum offload enabled thruput: 5.62Gbps CPU utilization: 8% 2.2.3 ethtool -K eth0 gro on; csum offload disabled thruput: 7.69Gbps CPU utilization: 8% 2.2.4 ethtool -K eth0 gro on; csum offload enabled thruput: 8.96Gbps CPU utilization: 5-6% Signed-off-by: H.K. Jerry Chu Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index b3c574a88026..ce01847793c0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3846,6 +3846,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff skb_gro_reset_offset(skb); gro_list_prepare(napi, skb); + NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@ -3922,6 +3923,31 @@ normal: goto pull; } +struct packet_offload *gro_find_receive_by_type(__be16 type) +{ + struct list_head *offload_head = &offload_base; + struct packet_offload *ptype; + + list_for_each_entry_rcu(ptype, offload_head, list) { + if (ptype->type != type || !ptype->callbacks.gro_receive) + continue; + return ptype; + } + return NULL; +} + +struct packet_offload *gro_find_complete_by_type(__be16 type) +{ + struct list_head *offload_head = &offload_base; + struct packet_offload *ptype; + + list_for_each_entry_rcu(ptype, offload_head, list) { + if (ptype->type != type || !ptype->callbacks.gro_complete) + continue; + return ptype; + } + return NULL; +} static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { -- cgit v1.2.3 From 600adc18eba823f9fd8ed5fec8b04f11dddf3884 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Jan 2014 14:12:19 -0800 Subject: net: gro: change GRO overflow strategy GRO layer has a limit of 8 flows being held in GRO list, for performance reason. When a packet comes for a flow not yet in the list, and list is full, we immediately give it to upper stacks, lowering aggregation performance. With TSO auto sizing and FQ packet scheduler, this situation happens more often. This patch changes strategy to simply evict the oldest flow of the list. This works better because of the nature of packet trains for which GRO is efficient. This also has the effect of lowering the GRO latency if many flows are competing. Tested : Used a 40Gbps NIC, with 4 RX queues, and 200 concurrent TCP_STREAM netperf. Before patch, aggregate rate is 11Gbps (while a single flow can reach 30Gbps) After patch, line rate is reached. Signed-off-by: Eric Dumazet Cc: Jerry Chu Cc: Neal Cardwell Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/core/dev.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index ce01847793c0..a8280154c42a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3882,10 +3882,23 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (same_flow) goto ok; - if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) + if (NAPI_GRO_CB(skb)->flush) goto normal; - napi->gro_count++; + if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) { + struct sk_buff *nskb = napi->gro_list; + + /* locate the end of the list to select the 'oldest' flow */ + while (nskb->next) { + pp = &nskb->next; + nskb = *pp; + } + *pp = NULL; + nskb->next = NULL; + napi_gro_complete(nskb); + } else { + napi->gro_count++; + } NAPI_GRO_CB(skb)->count = 1; NAPI_GRO_CB(skb)->age = jiffies; skb_shinfo(skb)->gso_size = skb_gro_len(skb); -- cgit v1.2.3 From 2315dc91a5059d7da9a8b9b9daf78d695c11383e Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Fri, 10 Jan 2014 16:56:25 +0100 Subject: net: make dev_set_mtu() honor notification return code Currently, after changing the MTU for a device, dev_set_mtu() calls NETDEV_CHANGEMTU notification, however doesn't verify it's return code - which can be NOTIFY_BAD - i.e. some of the net notifier blocks refused this change, and continues nevertheless. To fix this, verify the return code, and if it's an error - then revert the MTU to the original one, notify again and pass the error code. CC: Jiri Pirko CC: "David S. Miller" CC: Eric Dumazet CC: Alexander Duyck CC: Nicolas Dichtel Signed-off-by: Veaceslav Falico Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a8280154c42a..87312dcf0aa8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5300,6 +5300,17 @@ int dev_change_flags(struct net_device *dev, unsigned int flags) } EXPORT_SYMBOL(dev_change_flags); +static int __dev_set_mtu(struct net_device *dev, int new_mtu) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (ops->ndo_change_mtu) + return ops->ndo_change_mtu(dev, new_mtu); + + dev->mtu = new_mtu; + return 0; +} + /** * dev_set_mtu - Change maximum transfer unit * @dev: device @@ -5309,8 +5320,7 @@ EXPORT_SYMBOL(dev_change_flags); */ int dev_set_mtu(struct net_device *dev, int new_mtu) { - const struct net_device_ops *ops = dev->netdev_ops; - int err; + int err, orig_mtu; if (new_mtu == dev->mtu) return 0; @@ -5322,14 +5332,20 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) if (!netif_device_present(dev)) return -ENODEV; - err = 0; - if (ops->ndo_change_mtu) - err = ops->ndo_change_mtu(dev, new_mtu); - else - dev->mtu = new_mtu; + orig_mtu = dev->mtu; + err = __dev_set_mtu(dev, new_mtu); - if (!err) - call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + if (!err) { + err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + err = notifier_to_errno(err); + if (err) { + /* setting mtu back and notifying everyone again, + * so that they have a chance to revert changes. + */ + __dev_set_mtu(dev, orig_mtu); + call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + } + } return err; } EXPORT_SYMBOL(dev_set_mtu); -- cgit v1.2.3 From 20567661a1eb3eab30c94cab958426bdaa83d271 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 10 Jan 2014 22:16:30 +0000 Subject: net: Fix indentation in dev_hard_start_xmit() Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 2bee80591f9a..0703ee04c670 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2596,8 +2596,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, dev_queue_xmit_nit(skb, dev); skb_len = skb->len; - rc = ops->ndo_start_xmit(skb, dev); - + rc = ops->ndo_start_xmit(skb, dev); trace_net_dev_xmit(skb, rc, dev, skb_len); if (rc == NETDEV_TX_OK) txq_trans_update(txq); -- cgit v1.2.3 From d87d04a7851f067a06bb10acca8aca62a8cad6f0 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 10 Jan 2014 22:17:03 +0000 Subject: net: Add net_dev_start_xmit trace event, exposing more skb fields The existing net/net_dev_xmit trace event provides little information about the skb that has been passed to the driver, and it is not simple to add more since the skb may already have been freed at the point the event is emitted. Add a separate trace event before the skb is passed to the driver, including most fields that are likely to be interesting for debugging driver datapath behaviour. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 0703ee04c670..9e93a1464216 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2596,6 +2596,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, dev_queue_xmit_nit(skb, dev); skb_len = skb->len; + trace_net_dev_start_xmit(skb, dev); rc = ops->ndo_start_xmit(skb, dev); trace_net_dev_xmit(skb, rc, dev, skb_len); if (rc == NETDEV_TX_OK) @@ -2614,6 +2615,7 @@ gso: dev_queue_xmit_nit(nskb, dev); skb_len = nskb->len; + trace_net_dev_start_xmit(nskb, dev); rc = ops->ndo_start_xmit(nskb, dev); trace_net_dev_xmit(nskb, rc, dev, skb_len); if (unlikely(rc != NETDEV_TX_OK)) { -- cgit v1.2.3 From ae78dbfa40c629f79c72ab93525508ef49e798b6 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 10 Jan 2014 22:17:24 +0000 Subject: net: Add trace events for all receive entry points, exposing more skb fields The existing net/netif_rx and net/netif_receive_skb trace events provide little information about the skb, nor do they indicate how it entered the stack. Add trace events at entry of each of the exported functions, including most fields that are likely to be interesting for debugging driver datapath behaviour. Split netif_rx() and netif_receive_skb() so that internal calls are not traced. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 100 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 39 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 9e93a1464216..20c834e3c7ca 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -147,6 +147,8 @@ struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; struct list_head ptype_all __read_mostly; /* Taps */ static struct list_head offload_base __read_mostly; +static int netif_rx_internal(struct sk_buff *skb); + /* * The @dev_base_head list is protected by @dev_base_lock and the rtnl * semaphore. @@ -1698,7 +1700,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) skb_scrub_packet(skb, true); skb->protocol = eth_type_trans(skb, dev); - return netif_rx(skb); + return netif_rx_internal(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); @@ -3219,22 +3221,7 @@ enqueue: return NET_RX_DROP; } -/** - * netif_rx - post buffer to the network code - * @skb: buffer to post - * - * This function receives a packet from a device driver and queues it for - * the upper (protocol) levels to process. It always succeeds. The buffer - * may be dropped during processing for congestion control or by the - * protocol layers. - * - * return values: - * NET_RX_SUCCESS (no congestion) - * NET_RX_DROP (packet was dropped) - * - */ - -int netif_rx(struct sk_buff *skb) +static int netif_rx_internal(struct sk_buff *skb) { int ret; @@ -3270,14 +3257,38 @@ int netif_rx(struct sk_buff *skb) } return ret; } + +/** + * netif_rx - post buffer to the network code + * @skb: buffer to post + * + * This function receives a packet from a device driver and queues it for + * the upper (protocol) levels to process. It always succeeds. The buffer + * may be dropped during processing for congestion control or by the + * protocol layers. + * + * return values: + * NET_RX_SUCCESS (no congestion) + * NET_RX_DROP (packet was dropped) + * + */ + +int netif_rx(struct sk_buff *skb) +{ + trace_netif_rx_entry(skb); + + return netif_rx_internal(skb); +} EXPORT_SYMBOL(netif_rx); int netif_rx_ni(struct sk_buff *skb) { int err; + trace_netif_rx_ni_entry(skb); + preempt_disable(); - err = netif_rx(skb); + err = netif_rx_internal(skb); if (local_softirq_pending()) do_softirq(); preempt_enable(); @@ -3662,22 +3673,7 @@ static int __netif_receive_skb(struct sk_buff *skb) return ret; } -/** - * netif_receive_skb - process receive buffer from network - * @skb: buffer to process - * - * netif_receive_skb() is the main receive data processing function. - * It always succeeds. The buffer may be dropped during processing - * for congestion control or by the protocol layers. - * - * This function may only be called from softirq context and interrupts - * should be enabled. - * - * Return values (usually ignored): - * NET_RX_SUCCESS: no congestion - * NET_RX_DROP: packet was dropped - */ -int netif_receive_skb(struct sk_buff *skb) +static int netif_receive_skb_internal(struct sk_buff *skb) { net_timestamp_check(netdev_tstamp_prequeue, skb); @@ -3703,6 +3699,28 @@ int netif_receive_skb(struct sk_buff *skb) #endif return __netif_receive_skb(skb); } + +/** + * netif_receive_skb - process receive buffer from network + * @skb: buffer to process + * + * netif_receive_skb() is the main receive data processing function. + * It always succeeds. The buffer may be dropped during processing + * for congestion control or by the protocol layers. + * + * This function may only be called from softirq context and interrupts + * should be enabled. + * + * Return values (usually ignored): + * NET_RX_SUCCESS: no congestion + * NET_RX_DROP: packet was dropped + */ +int netif_receive_skb(struct sk_buff *skb) +{ + trace_netif_receive_skb_entry(skb); + + return netif_receive_skb_internal(skb); +} EXPORT_SYMBOL(netif_receive_skb); /* Network device is going away, flush any packets still pending @@ -3764,7 +3782,7 @@ static int napi_gro_complete(struct sk_buff *skb) } out: - return netif_receive_skb(skb); + return netif_receive_skb_internal(skb); } /* napi->gro_list contains packets ordered by age. @@ -3972,7 +3990,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { switch (ret) { case GRO_NORMAL: - if (netif_receive_skb(skb)) + if (netif_receive_skb_internal(skb)) ret = GRO_DROP; break; @@ -3997,6 +4015,8 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { + trace_napi_gro_receive_entry(skb); + return napi_skb_finish(dev_gro_receive(napi, skb), skb); } EXPORT_SYMBOL(napi_gro_receive); @@ -4030,7 +4050,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * { switch (ret) { case GRO_NORMAL: - if (netif_receive_skb(skb)) + if (netif_receive_skb_internal(skb)) ret = GRO_DROP; break; @@ -4069,6 +4089,8 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) if (!skb) return GRO_DROP; + trace_napi_gro_frags_entry(skb); + return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); } EXPORT_SYMBOL(napi_gro_frags); @@ -6621,11 +6643,11 @@ static int dev_cpu_callback(struct notifier_block *nfb, /* Process offline CPU's input_pkt_queue */ while ((skb = __skb_dequeue(&oldsd->process_queue))) { - netif_rx(skb); + netif_rx_internal(skb); input_queue_head_incr(oldsd); } while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { - netif_rx(skb); + netif_rx_internal(skb); input_queue_head_incr(oldsd); } -- cgit v1.2.3 From 3ee32707560955e92d30f7f6e5138cb92a3b1a7e Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Tue, 14 Jan 2014 21:58:50 +0100 Subject: net: add sysfs helpers for netdev_adjacent logic They clean up the code a bit and can be used further. CC: Ding Tianhong CC: "David S. Miller" CC: Eric Dumazet CC: Nicolas Dichtel CC: Cong Wang Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- net/core/dev.c | 57 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 27 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 20c834e3c7ca..130d3bd0ce6f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4623,13 +4623,36 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) } EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); +int netdev_adjacent_sysfs_add(struct net_device *dev, + struct net_device *adj_dev, + struct list_head *dev_list) +{ + char linkname[IFNAMSIZ+7]; + sprintf(linkname, dev_list == &dev->adj_list.upper ? + "upper_%s" : "lower_%s", adj_dev->name); + return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj), + linkname); +} +void netdev_adjacent_sysfs_del(struct net_device *dev, + char *name, + struct list_head *dev_list) +{ + char linkname[IFNAMSIZ+7]; + sprintf(linkname, dev_list == &dev->adj_list.upper ? + "upper_%s" : "lower_%s", name); + sysfs_remove_link(&(dev->dev.kobj), linkname); +} + +#define netdev_adjacent_is_neigh_list(dev, dev_list) \ + (dev_list == &dev->adj_list.upper || \ + dev_list == &dev->adj_list.lower) + static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, struct list_head *dev_list, void *private, bool master) { struct netdev_adjacent *adj; - char linkname[IFNAMSIZ+7]; int ret; adj = __netdev_find_adj(dev, adj_dev, dev_list); @@ -4652,16 +4675,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, pr_debug("dev_hold for %s, because of link added from %s to %s\n", adj_dev->name, dev->name, adj_dev->name); - if (dev_list == &dev->adj_list.lower) { - sprintf(linkname, "lower_%s", adj_dev->name); - ret = sysfs_create_link(&(dev->dev.kobj), - &(adj_dev->dev.kobj), linkname); - if (ret) - goto free_adj; - } else if (dev_list == &dev->adj_list.upper) { - sprintf(linkname, "upper_%s", adj_dev->name); - ret = sysfs_create_link(&(dev->dev.kobj), - &(adj_dev->dev.kobj), linkname); + if (netdev_adjacent_is_neigh_list(dev, dev_list)) { + ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list); if (ret) goto free_adj; } @@ -4681,14 +4696,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, return 0; remove_symlinks: - if (dev_list == &dev->adj_list.lower) { - sprintf(linkname, "lower_%s", adj_dev->name); - sysfs_remove_link(&(dev->dev.kobj), linkname); - } else if (dev_list == &dev->adj_list.upper) { - sprintf(linkname, "upper_%s", adj_dev->name); - sysfs_remove_link(&(dev->dev.kobj), linkname); - } - + if (netdev_adjacent_is_neigh_list(dev, dev_list)) + netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); free_adj: kfree(adj); dev_put(adj_dev); @@ -4701,7 +4710,6 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, struct list_head *dev_list) { struct netdev_adjacent *adj; - char linkname[IFNAMSIZ+7]; adj = __netdev_find_adj(dev, adj_dev, dev_list); @@ -4721,13 +4729,8 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, if (adj->master) sysfs_remove_link(&(dev->dev.kobj), "master"); - if (dev_list == &dev->adj_list.lower) { - sprintf(linkname, "lower_%s", adj_dev->name); - sysfs_remove_link(&(dev->dev.kobj), linkname); - } else if (dev_list == &dev->adj_list.upper) { - sprintf(linkname, "upper_%s", adj_dev->name); - sysfs_remove_link(&(dev->dev.kobj), linkname); - } + if (netdev_adjacent_is_neigh_list(dev, dev_list)) + netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); list_del_rcu(&adj->list); pr_debug("dev_put for %s, because link removed from %s to %s\n", -- cgit v1.2.3 From 5bb025fae53889cc99a21058c5dd369bf8cce820 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Tue, 14 Jan 2014 21:58:51 +0100 Subject: net: rename sysfs symlinks on device name change Currently, we don't rename the upper/lower_ifc symlinks in /sys/class/net/*/ , which might result stale/duplicate links/names. Fix this by adding netdev_adjacent_rename_links(dev, oldname) which renames all the upper/lower interface's links to dev from the upper/lower_oldname to the new name. We don't need a rollback because only we control these symlinks and if we fail to rename them - sysfs will anyway complain. Reported-by: Ding Tianhong CC: Ding Tianhong CC: "David S. Miller" CC: Eric Dumazet CC: Nicolas Dichtel CC: Cong Wang Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- net/core/dev.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 130d3bd0ce6f..995755733997 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1119,6 +1119,8 @@ rollback: write_seqcount_end(&devnet_rename_seq); + netdev_adjacent_rename_links(dev, oldname); + write_lock_bh(&dev_base_lock); hlist_del_rcu(&dev->name_hlist); write_unlock_bh(&dev_base_lock); @@ -1138,6 +1140,7 @@ rollback: err = ret; write_seqcount_begin(&devnet_rename_seq); memcpy(dev->name, oldname, IFNAMSIZ); + memcpy(oldname, newname, IFNAMSIZ); goto rollback; } else { pr_err("%s: name change rollback failed: %d\n", @@ -4999,6 +5002,25 @@ void netdev_upper_dev_unlink(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_dev_unlink); +void netdev_adjacent_rename_links(struct net_device *dev, char *oldname) +{ + struct netdev_adjacent *iter; + + list_for_each_entry(iter, &dev->adj_list.upper, list) { + netdev_adjacent_sysfs_del(iter->dev, oldname, + &iter->dev->adj_list.lower); + netdev_adjacent_sysfs_add(iter->dev, dev, + &iter->dev->adj_list.lower); + } + + list_for_each_entry(iter, &dev->adj_list.lower, list) { + netdev_adjacent_sysfs_del(iter->dev, oldname, + &iter->dev->adj_list.upper); + netdev_adjacent_sysfs_add(iter->dev, dev, + &iter->dev->adj_list.upper); + } +} + void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev) { -- cgit v1.2.3 From 0b4cec8c2e872a6bc9146a001d7532f31023aed5 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 15 Jan 2014 08:58:06 -0800 Subject: net: Check skb->rxhash in gro_receive When initializing a gro_list for a packet, first check the rxhash of the incoming skb against that of the skb's in the list. This should be a very strong inidicator of whether the flow is going to be matched, and potentially allows a lot of other checks to be short circuited. Use skb_hash_raw so that we don't force the hash to be calculated. Tested by running netperf 200 TCP_STREAMs between two machines with GRO, HW rxhash, and 1G. Saw no performance degration, slight reduction of time in dev_gro_receive. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 995755733997..b2c1869b04e3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3821,10 +3821,18 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff *p; unsigned int maclen = skb->dev->hard_header_len; + u32 hash = skb_get_hash_raw(skb); for (p = napi->gro_list; p; p = p->next) { unsigned long diffs; + NAPI_GRO_CB(p)->flush = 0; + + if (hash != skb_get_hash_raw(p)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; diffs |= p->vlan_tci ^ skb->vlan_tci; if (maclen == ETH_HLEN) @@ -3835,7 +3843,6 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) skb_gro_mac_header(skb), maclen); NAPI_GRO_CB(p)->same_flow = !diffs; - NAPI_GRO_CB(p)->flush = 0; } } -- cgit v1.2.3 From 1d486bfb66971ebacc2a46a23431ace9af70dc66 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Thu, 16 Jan 2014 00:02:18 +0100 Subject: net: add NETDEV_PRECHANGEMTU to notify before mtu change happens Currently, if a device changes its mtu, first the change happens (invloving all the side effects), and after that the NETDEV_CHANGEMTU is sent so that other devices can catch up with the new mtu. However, if they return NOTIFY_BAD, then the change is reverted and error returned. This is a really long and costy operation (sometimes). To fix this, add NETDEV_PRECHANGEMTU notification which is called prior to any change actually happening, and if any callee returns NOTIFY_BAD - the change is aborted. This way we're skipping all the playing with apply/revert the mtu. CC: "David S. Miller" CC: Jiri Pirko CC: Eric Dumazet CC: Nicolas Dichtel CC: Cong Wang Signed-off-by: Veaceslav Falico Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index b2c1869b04e3..f87bedd51eed 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5392,6 +5392,11 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) if (!netif_device_present(dev)) return -ENODEV; + err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev); + err = notifier_to_errno(err); + if (err) + return err; + orig_mtu = dev->mtu; err = __dev_set_mtu(dev, new_mtu); -- cgit v1.2.3 From a953be53ce40440acb4740edb48577b9468d4c3d Mon Sep 17 00:00:00 2001 From: Michael Dalton Date: Thu, 16 Jan 2014 22:23:28 -0800 Subject: net-sysfs: add support for device-specific rx queue sysfs attributes Extend existing support for netdevice receive queue sysfs attributes to permit a device-specific attribute group. Initial use case for this support will be to allow the virtio-net device to export per-receive queue mergeable receive buffer size. Signed-off-by: Michael Dalton Signed-off-by: David S. Miller --- net/core/dev.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index f87bedd51eed..288df6232006 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2083,7 +2083,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) } EXPORT_SYMBOL(netif_set_real_num_tx_queues); -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS /** * netif_set_real_num_rx_queues - set actual number of RX queues used * @dev: Network device @@ -5764,7 +5764,7 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, } EXPORT_SYMBOL(netif_stacked_transfer_operstate); -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS static int netif_alloc_rx_queues(struct net_device *dev) { unsigned int i, count = dev->num_rx_queues; @@ -6309,7 +6309,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, return NULL; } -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS if (rxqs < 1) { pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); return NULL; @@ -6365,7 +6365,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (netif_alloc_netdev_queues(dev)) goto free_all; -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS dev->num_rx_queues = rxqs; dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) @@ -6385,7 +6385,7 @@ free_all: free_pcpu: free_percpu(dev->pcpu_refcnt); netif_free_tx_queues(dev); -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS kfree(dev->_rx); #endif @@ -6410,7 +6410,7 @@ void free_netdev(struct net_device *dev) release_net(dev_net(dev)); netif_free_tx_queues(dev); -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS kfree(dev->_rx); #endif -- cgit v1.2.3 From 9d08dd3d320fab4e8b491eb34ebbb5476d2266cf Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 20 Jan 2014 11:25:13 +0800 Subject: net: document accel_priv parameter for __dev_queue_xmit() To silent "make htmldocs" warning. Reported-by: kbuild test robot Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 288df6232006..fb99f6477050 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2778,8 +2778,9 @@ int dev_loopback_xmit(struct sk_buff *skb) EXPORT_SYMBOL(dev_loopback_xmit); /** - * dev_queue_xmit - transmit a buffer + * __dev_queue_xmit - transmit a buffer * @skb: buffer to transmit + * @accel_priv: private data used for L2 forwarding offload * * Queue a buffer for transmission to a network device. The caller must * have set the device and priority and built the buffer before calling -- cgit v1.2.3 From f14fe8a848cd67dfdb9a959ff3e1fa259878bec3 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Sat, 18 Jan 2014 19:19:27 +0100 Subject: net: remove unnecessary initializations in net_dev_init softnet_data is already set to 0, no need to use memset or initialize specific fields to 0 or NULL afterwards. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/core/dev.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index fb99f6477050..a578af589198 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6998,28 +6998,18 @@ static int __init net_dev_init(void) for_each_possible_cpu(i) { struct softnet_data *sd = &per_cpu(softnet_data, i); - memset(sd, 0, sizeof(*sd)); skb_queue_head_init(&sd->input_pkt_queue); skb_queue_head_init(&sd->process_queue); - sd->completion_queue = NULL; INIT_LIST_HEAD(&sd->poll_list); - sd->output_queue = NULL; sd->output_queue_tailp = &sd->output_queue; #ifdef CONFIG_RPS sd->csd.func = rps_trigger_softirq; sd->csd.info = sd; - sd->csd.flags = 0; sd->cpu = i; #endif sd->backlog.poll = process_backlog; sd->backlog.weight = weight_p; - sd->backlog.gro_list = NULL; - sd->backlog.gro_count = 0; - -#ifdef CONFIG_NET_FLOW_LIMIT - sd->flow_limit = NULL; -#endif } dev_boot_phase = 0; -- cgit v1.2.3 From b582ef0990d457f7ce8ccf827af51a575ca0b4a6 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 20 Jan 2014 13:59:19 +0200 Subject: net: Add GRO support for UDP encapsulating protocols Add GRO handlers for protocols that do UDP encapsulation, with the intent of being able to coalesce packets which encapsulate packets belonging to the same TCP session. For GRO purposes, the destination UDP port takes the role of the ether type field in the ethernet header or the next protocol in the IP header. The UDP GRO handler will only attempt to coalesce packets whose destination port is registered to have gro handler. Use a mark on the skb GRO CB data to disallow (flush) running the udp gro receive code twice on a packet. This solves the problem of udp encapsulated packets whose inner VM packet is udp and happen to carry a port which has registered offloads. Signed-off-by: Shlomo Pongratz Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a578af589198..da92305c344f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3893,6 +3893,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; + NAPI_GRO_CB(skb)->udp_mark = 0; pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); break; -- cgit v1.2.3 From e27a2f839598e48bb345f9fc86d4d54c3944db50 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 20 Jan 2014 13:59:20 +0200 Subject: net: Export gro_find_by_type helpers Export the gro_find_receive/complete_by_type helpers to they can be invoked by the gro callbacks of encapsulation protocols such as vxlan. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index da92305c344f..d89931bae25b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3984,6 +3984,7 @@ struct packet_offload *gro_find_receive_by_type(__be16 type) } return NULL; } +EXPORT_SYMBOL(gro_find_receive_by_type); struct packet_offload *gro_find_complete_by_type(__be16 type) { @@ -3997,6 +3998,7 @@ struct packet_offload *gro_find_complete_by_type(__be16 type) } return NULL; } +EXPORT_SYMBOL(gro_find_complete_by_type); static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { -- cgit v1.2.3