From 31278e71471399beaff9280737e52b47db4dc345 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 17 Jun 2009 01:12:19 +0000 Subject: net: group address list and its count This patch is inspired by patch recently posted by Johannes Berg. Basically what my patch does is to group list and a count of addresses into newly introduced structure netdev_hw_addr_list. This brings us two benefits: 1) struct net_device becames a bit nicer. 2) in the future there will be a possibility to operate with lists independently on netdevices (with exporting right functions). I wanted to introduce this patch before I'll post a multicast lists conversion. Signed-off-by: Jiri Pirko drivers/net/bnx2.c | 4 +- drivers/net/e1000/e1000_main.c | 4 +- drivers/net/ixgbe/ixgbe_main.c | 6 +- drivers/net/mv643xx_eth.c | 2 +- drivers/net/niu.c | 4 +- drivers/net/virtio_net.c | 10 ++-- drivers/s390/net/qeth_l2_main.c | 2 +- include/linux/netdevice.h | 17 +++-- net/core/dev.c | 130 ++++++++++++++++++-------------------- 9 files changed, 89 insertions(+), 90 deletions(-) Signed-off-by: David S. Miller --- net/core/dev.c | 130 +++++++++++++++++++++++++++------------------------------ 1 file changed, 62 insertions(+), 68 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 576a61574a93..baf2dc13a34a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3461,10 +3461,10 @@ void __dev_set_rx_mode(struct net_device *dev) /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. */ - if (dev->uc_count > 0 && !dev->uc_promisc) { + if (dev->uc.count > 0 && !dev->uc_promisc) { __dev_set_promiscuity(dev, 1); dev->uc_promisc = 1; - } else if (dev->uc_count == 0 && dev->uc_promisc) { + } else if (dev->uc.count == 0 && dev->uc_promisc) { __dev_set_promiscuity(dev, -1); dev->uc_promisc = 0; } @@ -3483,9 +3483,8 @@ void dev_set_rx_mode(struct net_device *dev) /* hw addresses list handling functions */ -static int __hw_addr_add(struct list_head *list, int *delta, - unsigned char *addr, int addr_len, - unsigned char addr_type) +static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr, + int addr_len, unsigned char addr_type) { struct netdev_hw_addr *ha; int alloc_size; @@ -3493,7 +3492,7 @@ static int __hw_addr_add(struct list_head *list, int *delta, if (addr_len > MAX_ADDR_LEN) return -EINVAL; - list_for_each_entry(ha, list, list) { + list_for_each_entry(ha, &list->list, list) { if (!memcmp(ha->addr, addr, addr_len) && ha->type == addr_type) { ha->refcount++; @@ -3512,9 +3511,8 @@ static int __hw_addr_add(struct list_head *list, int *delta, ha->type = addr_type; ha->refcount = 1; ha->synced = false; - list_add_tail_rcu(&ha->list, list); - if (delta) - (*delta)++; + list_add_tail_rcu(&ha->list, &list->list); + list->count++; return 0; } @@ -3526,120 +3524,121 @@ static void ha_rcu_free(struct rcu_head *head) kfree(ha); } -static int __hw_addr_del(struct list_head *list, int *delta, - unsigned char *addr, int addr_len, - unsigned char addr_type) +static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr, + int addr_len, unsigned char addr_type) { struct netdev_hw_addr *ha; - list_for_each_entry(ha, list, list) { + list_for_each_entry(ha, &list->list, list) { if (!memcmp(ha->addr, addr, addr_len) && (ha->type == addr_type || !addr_type)) { if (--ha->refcount) return 0; list_del_rcu(&ha->list); call_rcu(&ha->rcu_head, ha_rcu_free); - if (delta) - (*delta)--; + list->count--; return 0; } } return -ENOENT; } -static int __hw_addr_add_multiple(struct list_head *to_list, int *to_delta, - struct list_head *from_list, int addr_len, +static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + int addr_len, unsigned char addr_type) { int err; struct netdev_hw_addr *ha, *ha2; unsigned char type; - list_for_each_entry(ha, from_list, list) { + list_for_each_entry(ha, &from_list->list, list) { type = addr_type ? addr_type : ha->type; - err = __hw_addr_add(to_list, to_delta, ha->addr, - addr_len, type); + err = __hw_addr_add(to_list, ha->addr, addr_len, type); if (err) goto unroll; } return 0; unroll: - list_for_each_entry(ha2, from_list, list) { + list_for_each_entry(ha2, &from_list->list, list) { if (ha2 == ha) break; type = addr_type ? addr_type : ha2->type; - __hw_addr_del(to_list, to_delta, ha2->addr, - addr_len, type); + __hw_addr_del(to_list, ha2->addr, addr_len, type); } return err; } -static void __hw_addr_del_multiple(struct list_head *to_list, int *to_delta, - struct list_head *from_list, int addr_len, +static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + int addr_len, unsigned char addr_type) { struct netdev_hw_addr *ha; unsigned char type; - list_for_each_entry(ha, from_list, list) { + list_for_each_entry(ha, &from_list->list, list) { type = addr_type ? addr_type : ha->type; - __hw_addr_del(to_list, to_delta, ha->addr, - addr_len, addr_type); + __hw_addr_del(to_list, ha->addr, addr_len, addr_type); } } -static int __hw_addr_sync(struct list_head *to_list, int *to_delta, - struct list_head *from_list, int *from_delta, +static int __hw_addr_sync(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, int addr_len) { int err = 0; struct netdev_hw_addr *ha, *tmp; - list_for_each_entry_safe(ha, tmp, from_list, list) { + list_for_each_entry_safe(ha, tmp, &from_list->list, list) { if (!ha->synced) { - err = __hw_addr_add(to_list, to_delta, ha->addr, + err = __hw_addr_add(to_list, ha->addr, addr_len, ha->type); if (err) break; ha->synced = true; ha->refcount++; } else if (ha->refcount == 1) { - __hw_addr_del(to_list, to_delta, ha->addr, - addr_len, ha->type); - __hw_addr_del(from_list, from_delta, ha->addr, - addr_len, ha->type); + __hw_addr_del(to_list, ha->addr, addr_len, ha->type); + __hw_addr_del(from_list, ha->addr, addr_len, ha->type); } } return err; } -static void __hw_addr_unsync(struct list_head *to_list, int *to_delta, - struct list_head *from_list, int *from_delta, +static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, int addr_len) { struct netdev_hw_addr *ha, *tmp; - list_for_each_entry_safe(ha, tmp, from_list, list) { + list_for_each_entry_safe(ha, tmp, &from_list->list, list) { if (ha->synced) { - __hw_addr_del(to_list, to_delta, ha->addr, + __hw_addr_del(to_list, ha->addr, addr_len, ha->type); ha->synced = false; - __hw_addr_del(from_list, from_delta, ha->addr, + __hw_addr_del(from_list, ha->addr, addr_len, ha->type); } } } - -static void __hw_addr_flush(struct list_head *list) +static void __hw_addr_flush(struct netdev_hw_addr_list *list) { struct netdev_hw_addr *ha, *tmp; - list_for_each_entry_safe(ha, tmp, list, list) { + list_for_each_entry_safe(ha, tmp, &list->list, list) { list_del_rcu(&ha->list); call_rcu(&ha->rcu_head, ha_rcu_free); } + list->count = 0; +} + +static void __hw_addr_init(struct netdev_hw_addr_list *list) +{ + INIT_LIST_HEAD(&list->list); + list->count = 0; } /* Device addresses handling functions */ @@ -3648,7 +3647,7 @@ static void dev_addr_flush(struct net_device *dev) { /* rtnl_mutex must be held here */ - __hw_addr_flush(&dev->dev_addr_list); + __hw_addr_flush(&dev->dev_addrs); dev->dev_addr = NULL; } @@ -3660,16 +3659,16 @@ static int dev_addr_init(struct net_device *dev) /* rtnl_mutex must be held here */ - INIT_LIST_HEAD(&dev->dev_addr_list); + __hw_addr_init(&dev->dev_addrs); memset(addr, 0, sizeof(addr)); - err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, sizeof(addr), + err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr), NETDEV_HW_ADDR_T_LAN); if (!err) { /* * Get the first (previously created) address from the list * and set dev_addr pointer to this location. */ - ha = list_first_entry(&dev->dev_addr_list, + ha = list_first_entry(&dev->dev_addrs.list, struct netdev_hw_addr, list); dev->dev_addr = ha->addr; } @@ -3694,8 +3693,7 @@ int dev_addr_add(struct net_device *dev, unsigned char *addr, ASSERT_RTNL(); - err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, dev->addr_len, - addr_type); + err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; @@ -3725,11 +3723,12 @@ int dev_addr_del(struct net_device *dev, unsigned char *addr, * We can not remove the first address from the list because * dev->dev_addr points to that. */ - ha = list_first_entry(&dev->dev_addr_list, struct netdev_hw_addr, list); + ha = list_first_entry(&dev->dev_addrs.list, + struct netdev_hw_addr, list); if (ha->addr == dev->dev_addr && ha->refcount == 1) return -ENOENT; - err = __hw_addr_del(&dev->dev_addr_list, NULL, addr, dev->addr_len, + err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); @@ -3757,8 +3756,7 @@ int dev_addr_add_multiple(struct net_device *to_dev, if (from_dev->addr_len != to_dev->addr_len) return -EINVAL; - err = __hw_addr_add_multiple(&to_dev->dev_addr_list, NULL, - &from_dev->dev_addr_list, + err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, to_dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); @@ -3784,15 +3782,14 @@ int dev_addr_del_multiple(struct net_device *to_dev, if (from_dev->addr_len != to_dev->addr_len) return -EINVAL; - __hw_addr_del_multiple(&to_dev->dev_addr_list, NULL, - &from_dev->dev_addr_list, + __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, to_dev->addr_len, addr_type); call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); return 0; } EXPORT_SYMBOL(dev_addr_del_multiple); -/* unicast and multicast addresses handling functions */ +/* multicast addresses handling functions */ int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int glbl) @@ -3868,8 +3865,8 @@ int dev_unicast_delete(struct net_device *dev, void *addr) ASSERT_RTNL(); - err = __hw_addr_del(&dev->uc_list, &dev->uc_count, addr, - dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); + err = __hw_addr_del(&dev->uc, addr, dev->addr_len, + NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); return err; @@ -3892,8 +3889,8 @@ int dev_unicast_add(struct net_device *dev, void *addr) ASSERT_RTNL(); - err = __hw_addr_add(&dev->uc_list, &dev->uc_count, addr, - dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); + err = __hw_addr_add(&dev->uc, addr, dev->addr_len, + NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); return err; @@ -3966,8 +3963,7 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - err = __hw_addr_sync(&to->uc_list, &to->uc_count, - &from->uc_list, &from->uc_count, to->addr_len); + err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); return err; @@ -3990,8 +3986,7 @@ void dev_unicast_unsync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return; - __hw_addr_unsync(&to->uc_list, &to->uc_count, - &from->uc_list, &from->uc_count, to->addr_len); + __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); __dev_set_rx_mode(to); } EXPORT_SYMBOL(dev_unicast_unsync); @@ -4000,15 +3995,14 @@ static void dev_unicast_flush(struct net_device *dev) { /* rtnl_mutex must be held here */ - __hw_addr_flush(&dev->uc_list); - dev->uc_count = 0; + __hw_addr_flush(&dev->uc); } static void dev_unicast_init(struct net_device *dev) { /* rtnl_mutex must be held here */ - INIT_LIST_HEAD(&dev->uc_list); + __hw_addr_init(&dev->uc); } -- cgit v1.2.3 From d55d87fdff8252d0e2f7c28c2d443aee17e9d70f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 22 Jun 2009 02:25:25 +0000 Subject: net: Move rx skb_orphan call to where needed In order to get the tun driver to account packets, we need to be able to receive packets with destructors set. To be on the safe side, I added an skb_orphan call for all protocols by default since some of them (IP in particular) cannot handle receiving packets destructors properly. Now it seems that at least one protocol (CAN) expects to be able to pass skb->sk through the rx path without getting clobbered. So this patch attempts to fix this properly by moving the skb_orphan call to where it's actually needed. In particular, I've added it to skb_set_owner_[rw] which is what most users of skb->destructor call. This is actually an improvement for tun too since it means that we only give back the amount charged to the socket when the skb is passed to another socket that will also be charged accordingly. Signed-off-by: Herbert Xu Tested-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/core/dev.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index baf2dc13a34a..60b572812278 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2310,8 +2310,6 @@ ncls: if (!skb) goto out; - skb_orphan(skb); - type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { -- cgit v1.2.3 From ff780cd8f2fa928b193554f593b36d1243554212 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 26 Jun 2009 19:27:04 -0700 Subject: gro: Flush GRO packets in napi_disable_pending path When NAPI is disabled while we're in net_rx_action, we end up calling __napi_complete without flushing GRO packets. This is a bug as it would cause the GRO packets to linger, of course it also literally BUGs to catch error like this :) This patch changes it to napi_complete, with the obligatory IRQ reenabling. This should be safe because we've only just disabled IRQs and it does not materially affect the test conditions in between. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 60b572812278..70c27e0c7c32 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2823,9 +2823,11 @@ static void net_rx_action(struct softirq_action *h) * move the instance around on the list at-will. */ if (unlikely(work == weight)) { - if (unlikely(napi_disable_pending(n))) - __napi_complete(n); - else + if (unlikely(napi_disable_pending(n))) { + local_irq_enable(); + napi_complete(n); + local_irq_disable(); + } else list_move_tail(&n->poll_list, list); } -- cgit v1.2.3 From a6ac65db2329e7685299666f5f7b6093c7b0f3a0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 30 Jul 2009 01:06:12 +0000 Subject: net: restore the original spinlock to protect unicast list There is a path when an assetion in dev_unicast_sync() appears. igmp6_group_added -> dev_mc_add -> __dev_set_rx_mode -> -> vlan_dev_set_rx_mode -> dev_unicast_sync Therefore we cannot protect this list with rtnl. This patch restores the original protecting this list with spinlock. Signed-off-by: Jiri Pirko Tested-by: Meelis Roos Signed-off-by: David S. Miller --- net/core/dev.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 70c27e0c7c32..43e61ba7bd95 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3865,10 +3865,12 @@ int dev_unicast_delete(struct net_device *dev, void *addr) ASSERT_RTNL(); + netif_addr_lock_bh(dev); err = __hw_addr_del(&dev->uc, addr, dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); + netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_delete); @@ -3889,10 +3891,12 @@ int dev_unicast_add(struct net_device *dev, void *addr) ASSERT_RTNL(); + netif_addr_lock_bh(dev); err = __hw_addr_add(&dev->uc, addr, dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); + netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_add); @@ -3949,7 +3953,8 @@ void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, * @from: source device * * Add newly added addresses to the destination device and release - * addresses that have no users left. + * addresses that have no users left. The source device must be + * locked by netif_tx_lock_bh. * * This function is intended to be called from the dev->set_rx_mode * function of layered software devices. @@ -3958,14 +3963,14 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from) { int err = 0; - ASSERT_RTNL(); - if (to->addr_len != from->addr_len) return -EINVAL; + netif_addr_lock_bh(to); err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); + netif_addr_unlock_bh(to); return err; } EXPORT_SYMBOL(dev_unicast_sync); @@ -3981,28 +3986,30 @@ EXPORT_SYMBOL(dev_unicast_sync); */ void dev_unicast_unsync(struct net_device *to, struct net_device *from) { - ASSERT_RTNL(); - if (to->addr_len != from->addr_len) return; + netif_addr_lock_bh(from); + netif_addr_lock(to); __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); __dev_set_rx_mode(to); + netif_addr_unlock(to); + netif_addr_unlock_bh(from); } EXPORT_SYMBOL(dev_unicast_unsync); static void dev_unicast_flush(struct net_device *dev) { - /* rtnl_mutex must be held here */ - + netif_addr_lock_bh(dev); __hw_addr_flush(&dev->uc); + netif_addr_unlock_bh(dev); } static void dev_unicast_init(struct net_device *dev) { - /* rtnl_mutex must be held here */ - + netif_addr_lock_bh(dev); __hw_addr_init(&dev->uc); + netif_addr_unlock_bh(dev); } -- cgit v1.2.3 From 0bf52b981770cbf006323bab5177f2858a196766 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 4 Aug 2009 21:16:58 +0000 Subject: net: Fix spinlock use in alloc_netdev_mq() -tip testing found this lockdep warning: [ 2.272010] calling net_dev_init+0x0/0x164 @ 1 [ 2.276033] device class 'net': registering [ 2.280191] INFO: trying to register non-static key. [ 2.284005] the code is fine but needs lockdep annotation. [ 2.284005] turning off the locking correctness validator. [ 2.284005] Pid: 1, comm: swapper Not tainted 2.6.31-rc5-tip #1145 [ 2.284005] Call Trace: [ 2.284005] [<7958eb4e>] ? printk+0xf/0x11 [ 2.284005] [<7904f83c>] __lock_acquire+0x11b/0x622 [ 2.284005] [<7908c9b7>] ? alloc_debug_processing+0xf9/0x144 [ 2.284005] [<7904e2be>] ? mark_held_locks+0x3a/0x52 [ 2.284005] [<7908dbc4>] ? kmem_cache_alloc+0xa8/0x13f [ 2.284005] [<7904e475>] ? trace_hardirqs_on_caller+0xa2/0xc3 [ 2.284005] [<7904fdf6>] lock_acquire+0xb3/0xd0 [ 2.284005] [<79489678>] ? alloc_netdev_mq+0xf5/0x1ad [ 2.284005] [<79591514>] _spin_lock_bh+0x2d/0x5d [ 2.284005] [<79489678>] ? alloc_netdev_mq+0xf5/0x1ad [ 2.284005] [<79489678>] alloc_netdev_mq+0xf5/0x1ad [ 2.284005] [<793a38f2>] ? loopback_setup+0x0/0x74 [ 2.284005] [<798eecd0>] loopback_net_init+0x20/0x5d [ 2.284005] [<79483efb>] register_pernet_device+0x23/0x4b [ 2.284005] [<798f5c9f>] net_dev_init+0x115/0x164 [ 2.284005] [<7900104f>] do_one_initcall+0x4a/0x11a [ 2.284005] [<798f5b8a>] ? net_dev_init+0x0/0x164 [ 2.284005] [<79066f6d>] ? register_irq_proc+0x8c/0xa8 [ 2.284005] [<798cc29a>] do_basic_setup+0x42/0x52 [ 2.284005] [<798cc30a>] kernel_init+0x60/0xa1 [ 2.284005] [<798cc2aa>] ? kernel_init+0x0/0xa1 [ 2.284005] [<79003e03>] kernel_thread_helper+0x7/0x10 [ 2.284078] device: 'lo': device_add [ 2.288248] initcall net_dev_init+0x0/0x164 returned 0 after 11718 usecs [ 2.292010] calling neigh_init+0x0/0x66 @ 1 [ 2.296010] initcall neigh_init+0x0/0x66 returned 0 after 0 usecs it's using an zero-initialized spinlock. This is a side-effect of: dev_unicast_init(dev); in alloc_netdev_mq() making use of dev->addr_list_lock. The device has just been allocated freshly, it's not accessible anywhere yet so no locking is needed at all - in fact it's wrong to lock it here (the lock isnt initialized yet). This bug was introduced via: | commit a6ac65db2329e7685299666f5f7b6093c7b0f3a0 | Date: Thu Jul 30 01:06:12 2009 +0000 | | net: restore the original spinlock to protect unicast list Signed-off-by: Ingo Molnar Acked-by: Jiri Pirko Tested-by: Mark Brown Signed-off-by: David S. Miller --- net/core/dev.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 43e61ba7bd95..6a94475aee85 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4007,9 +4007,7 @@ static void dev_unicast_flush(struct net_device *dev) static void dev_unicast_init(struct net_device *dev) { - netif_addr_lock_bh(dev); __hw_addr_init(&dev->uc); - netif_addr_unlock_bh(dev); } -- cgit v1.2.3