diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 1 | ||||
-rw-r--r-- | net/core/datagram.c | 87 | ||||
-rw-r--r-- | net/core/dev.c | 108 | ||||
-rw-r--r-- | net/core/dst.c | 3 | ||||
-rw-r--r-- | net/core/ethtool.c | 102 | ||||
-rw-r--r-- | net/core/filter.c | 6 | ||||
-rw-r--r-- | net/core/flow.c | 2 | ||||
-rw-r--r-- | net/core/neighbour.c | 91 | ||||
-rw-r--r-- | net/core/netfilter.c | 648 | ||||
-rw-r--r-- | net/core/netpoll.c | 69 | ||||
-rw-r--r-- | net/core/pktgen.c | 531 | ||||
-rw-r--r-- | net/core/request_sock.c | 28 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 9 | ||||
-rw-r--r-- | net/core/skbuff.c | 248 | ||||
-rw-r--r-- | net/core/sock.c | 172 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 9 | ||||
-rw-r--r-- | net/core/utils.c | 2 | ||||
-rw-r--r-- | net/core/wireless.c | 71 |
18 files changed, 930 insertions, 1257 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index f5f5e58943e8..630da0f0579e 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -12,7 +12,6 @@ obj-y += dev.o ethtool.o dev_mcast.o dst.o \ obj-$(CONFIG_XFRM) += flow.o obj-$(CONFIG_SYSFS) += net-sysfs.o -obj-$(CONFIG_NETFILTER) += netfilter.o obj-$(CONFIG_NET_DIVERT) += dv.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NET_RADIO) += wireless.o diff --git a/net/core/datagram.c b/net/core/datagram.c index fcee054b6f75..d219435d086c 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -43,7 +43,6 @@ #include <linux/errno.h> #include <linux/sched.h> #include <linux/inet.h> -#include <linux/tcp.h> #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/poll.h> @@ -51,9 +50,10 @@ #include <net/protocol.h> #include <linux/skbuff.h> -#include <net/sock.h> -#include <net/checksum.h> +#include <net/checksum.h> +#include <net/sock.h> +#include <net/tcp_states.h> /* * Is a socket 'connection oriented' ? @@ -211,74 +211,49 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb) int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, struct iovec *to, int len) { - int start = skb_headlen(skb); - int i, copy = start - offset; + int i, err, fraglen, end = 0; + struct sk_buff *next = skb_shinfo(skb)->frag_list; - /* Copy header. */ - if (copy > 0) { - if (copy > len) - copy = len; - if (memcpy_toiovec(to, skb->data + offset, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - } + if (!len) + return 0; - /* Copy paged appendix. Hmm... why does this look so complicated? */ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; +next_skb: + fraglen = skb_headlen(skb); + i = -1; - BUG_TRAP(start <= offset + len); + while (1) { + int start = end; - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - int err; - u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + if ((end += fraglen) > offset) { + int copy = end - offset, o = offset - start; if (copy > len) copy = len; - vaddr = kmap(page); - err = memcpy_toiovec(to, vaddr + frag->page_offset + - offset - start, copy); - kunmap(page); + if (i == -1) + err = memcpy_toiovec(to, skb->data + o, copy); + else { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + struct page *page = frag->page; + void *p = kmap(page) + frag->page_offset + o; + err = memcpy_toiovec(to, p, copy); + kunmap(page); + } if (err) goto fault; if (!(len -= copy)) return 0; offset += copy; } - start = end; + if (++i >= skb_shinfo(skb)->nr_frags) + break; + fraglen = skb_shinfo(skb)->frags[i].size; } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - if (skb_copy_datagram_iovec(list, - offset - start, - to, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - } - start = end; - } + if (next) { + skb = next; + BUG_ON(skb_shinfo(skb)->frag_list); + next = skb->next; + goto next_skb; } - if (!len) - return 0; - fault: return -EFAULT; } diff --git a/net/core/dev.c b/net/core/dev.c index 52a3bf7ae177..8d1541595277 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -267,10 +267,6 @@ void dev_add_pack(struct packet_type *pt) spin_unlock_bh(&ptype_lock); } -extern void linkwatch_run_queue(void); - - - /** * __dev_remove_pack - remove packet handler * @pt: packet type declaration @@ -578,6 +574,8 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) return dev; } +EXPORT_SYMBOL(dev_getbyhwaddr); + struct net_device *dev_getfirstbyhwtype(unsigned short type) { struct net_device *dev; @@ -1009,13 +1007,22 @@ void net_disable_timestamp(void) atomic_dec(&netstamp_needed); } -static inline void net_timestamp(struct timeval *stamp) +void __net_timestamp(struct sk_buff *skb) +{ + struct timeval tv; + + do_gettimeofday(&tv); + skb_set_timestamp(skb, &tv); +} +EXPORT_SYMBOL(__net_timestamp); + +static inline void net_timestamp(struct sk_buff *skb) { if (atomic_read(&netstamp_needed)) - do_gettimeofday(stamp); + __net_timestamp(skb); else { - stamp->tv_sec = 0; - stamp->tv_usec = 0; + skb->tstamp.off_sec = 0; + skb->tstamp.off_usec = 0; } } @@ -1027,7 +1034,8 @@ static inline void net_timestamp(struct timeval *stamp) void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { struct packet_type *ptype; - net_timestamp(&skb->stamp); + + net_timestamp(skb); rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_all, list) { @@ -1058,7 +1066,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) skb2->h.raw = skb2->nh.raw; skb2->pkt_type = PACKET_OUTGOING; - ptype->func(skb2, skb->dev, ptype); + ptype->func(skb2, skb->dev, ptype, skb->dev); } } rcu_read_unlock(); @@ -1123,10 +1131,8 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) #define illegal_highdma(dev, skb) (0) #endif -extern void skb_release_data(struct sk_buff *); - /* Keep head the same: replace data */ -int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask) +int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask) { unsigned int size; u8 *data; @@ -1253,6 +1259,8 @@ int dev_queue_xmit(struct sk_buff *skb) if (skb_checksum_help(skb, 0)) goto out_kfree_skb; + spin_lock_prefetch(&dev->queue_lock); + /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ @@ -1379,8 +1387,8 @@ int netif_rx(struct sk_buff *skb) if (netpoll_rx(skb)) return NET_RX_DROP; - if (!skb->stamp.tv_sec) - net_timestamp(&skb->stamp); + if (!skb->tstamp.off_sec) + net_timestamp(skb); /* * The code is rearranged so that the path is the most @@ -1425,14 +1433,14 @@ int netif_rx_ni(struct sk_buff *skb) EXPORT_SYMBOL(netif_rx_ni); -static __inline__ void skb_bond(struct sk_buff *skb) +static inline struct net_device *skb_bond(struct sk_buff *skb) { struct net_device *dev = skb->dev; - if (dev->master) { - skb->real_dev = skb->dev; + if (dev->master) skb->dev = dev->master; - } + + return dev; } static void net_tx_action(struct softirq_action *h) @@ -1482,10 +1490,11 @@ static void net_tx_action(struct softirq_action *h) } static __inline__ int deliver_skb(struct sk_buff *skb, - struct packet_type *pt_prev) + struct packet_type *pt_prev, + struct net_device *orig_dev) { atomic_inc(&skb->users); - return pt_prev->func(skb, skb->dev, pt_prev); + return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) @@ -1496,7 +1505,8 @@ struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); static __inline__ int handle_bridge(struct sk_buff **pskb, - struct packet_type **pt_prev, int *ret) + struct packet_type **pt_prev, int *ret, + struct net_device *orig_dev) { struct net_bridge_port *port; @@ -1505,14 +1515,14 @@ static __inline__ int handle_bridge(struct sk_buff **pskb, return 0; if (*pt_prev) { - *ret = deliver_skb(*pskb, *pt_prev); + *ret = deliver_skb(*pskb, *pt_prev, orig_dev); *pt_prev = NULL; } return br_handle_frame_hook(port, pskb); } #else -#define handle_bridge(skb, pt_prev, ret) (0) +#define handle_bridge(skb, pt_prev, ret, orig_dev) (0) #endif #ifdef CONFIG_NET_CLS_ACT @@ -1534,17 +1544,14 @@ static int ing_filter(struct sk_buff *skb) __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); if (MAX_RED_LOOP < ttl++) { printk("Redir loop detected Dropping packet (%s->%s)\n", - skb->input_dev?skb->input_dev->name:"??",skb->dev->name); + skb->input_dev->name, skb->dev->name); return TC_ACT_SHOT; } skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); - if (NULL == skb->input_dev) { - skb->input_dev = skb->dev; - printk("ing_filter: fixed %s out %s\n",skb->input_dev->name,skb->dev->name); - } + spin_lock(&dev->ingress_lock); if ((q = dev->qdisc_ingress) != NULL) result = q->enqueue(skb, q); @@ -1559,6 +1566,7 @@ static int ing_filter(struct sk_buff *skb) int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; + struct net_device *orig_dev; int ret = NET_RX_DROP; unsigned short type; @@ -1566,10 +1574,13 @@ int netif_receive_skb(struct sk_buff *skb) if (skb->dev->poll && netpoll_rx(skb)) return NET_RX_DROP; - if (!skb->stamp.tv_sec) - net_timestamp(&skb->stamp); + if (!skb->tstamp.off_sec) + net_timestamp(skb); + + if (!skb->input_dev) + skb->input_dev = skb->dev; - skb_bond(skb); + orig_dev = skb_bond(skb); __get_cpu_var(netdev_rx_stat).total++; @@ -1590,14 +1601,14 @@ int netif_receive_skb(struct sk_buff *skb) list_for_each_entry_rcu(ptype, &ptype_all, list) { if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) - ret = deliver_skb(skb, pt_prev); + ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } } #ifdef CONFIG_NET_CLS_ACT if (pt_prev) { - ret = deliver_skb(skb, pt_prev); + ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; /* noone else should process this after*/ } else { skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); @@ -1616,7 +1627,7 @@ ncls: handle_diverter(skb); - if (handle_bridge(&skb, &pt_prev, &ret)) + if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) goto out; type = skb->protocol; @@ -1624,13 +1635,13 @@ ncls: if (ptype->type == type && (!ptype->dev || ptype->dev == skb->dev)) { if (pt_prev) - ret = deliver_skb(skb, pt_prev); + ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } } if (pt_prev) { - ret = pt_prev->func(skb, skb->dev, pt_prev); + ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { kfree_skb(skb); /* Jamal, now you will not able to escape explaining @@ -1696,7 +1707,8 @@ static void net_rx_action(struct softirq_action *h) struct softnet_data *queue = &__get_cpu_var(softnet_data); unsigned long start_time = jiffies; int budget = netdev_budget; - + void *have; + local_irq_disable(); while (!list_empty(&queue->poll_list)) { @@ -1709,10 +1721,10 @@ static void net_rx_action(struct softirq_action *h) dev = list_entry(queue->poll_list.next, struct net_device, poll_list); - netpoll_poll_lock(dev); + have = netpoll_poll_lock(dev); if (dev->quota <= 0 || dev->poll(dev, &budget)) { - netpoll_poll_unlock(dev); + netpoll_poll_unlock(have); local_irq_disable(); list_del(&dev->poll_list); list_add_tail(&dev->poll_list, &queue->poll_list); @@ -1721,7 +1733,7 @@ static void net_rx_action(struct softirq_action *h) else dev->quota = dev->weight; } else { - netpoll_poll_unlock(dev); + netpoll_poll_unlock(have); dev_put(dev); local_irq_disable(); } @@ -2705,6 +2717,20 @@ int register_netdevice(struct net_device *dev) dev->name); dev->features &= ~NETIF_F_TSO; } + if (dev->features & NETIF_F_UFO) { + if (!(dev->features & NETIF_F_HW_CSUM)) { + printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " + "NETIF_F_HW_CSUM feature.\n", + dev->name); + dev->features &= ~NETIF_F_UFO; + } + if (!(dev->features & NETIF_F_SG)) { + printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " + "NETIF_F_SG feature.\n", + dev->name); + dev->features &= ~NETIF_F_UFO; + } + } /* * nil rebuild_header routine, diff --git a/net/core/dst.c b/net/core/dst.c index 334790da9f16..470c05bc4cb2 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -39,8 +39,7 @@ static unsigned long dst_gc_timer_inc = DST_GC_MAX; static void dst_run_gc(unsigned long); static void ___dst_free(struct dst_entry * dst); -static struct timer_list dst_gc_timer = - TIMER_INITIALIZER(dst_run_gc, DST_GC_MIN, 0); +static DEFINE_TIMER(dst_gc_timer, dst_run_gc, DST_GC_MIN, 0); static void dst_run_gc(unsigned long dummy) { diff --git a/net/core/ethtool.c b/net/core/ethtool.c index a3eeb88e1c81..0350586e9195 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -81,6 +81,32 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data) return 0; } +int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *addr, u8 *data) +{ + unsigned char len = dev->addr_len; + if ( addr->size < len ) + return -ETOOSMALL; + + addr->size = len; + memcpy(data, dev->perm_addr, len); + return 0; +} + + +u32 ethtool_op_get_ufo(struct net_device *dev) +{ + return (dev->features & NETIF_F_UFO) != 0; +} + +int ethtool_op_set_ufo(struct net_device *dev, u32 data) +{ + if (data) + dev->features |= NETIF_F_UFO; + else + dev->features &= ~NETIF_F_UFO; + return 0; +} + /* Handlers for each ethtool command */ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) @@ -471,6 +497,11 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data) return err; } + if (!data && dev->ethtool_ops->set_ufo) { + err = dev->ethtool_ops->set_ufo(dev, 0); + if (err) + return err; + } return dev->ethtool_ops->set_sg(dev, data); } @@ -557,6 +588,32 @@ static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_tso(dev, edata.data); } +static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GTSO }; + + if (!dev->ethtool_ops->get_ufo) + return -EOPNOTSUPP; + edata.data = dev->ethtool_ops->get_ufo(dev); + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} +static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata; + + if (!dev->ethtool_ops->set_ufo) + return -EOPNOTSUPP; + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + if (edata.data && !(dev->features & NETIF_F_SG)) + return -EINVAL; + if (edata.data && !(dev->features & NETIF_F_HW_CSUM)) + return -EINVAL; + return dev->ethtool_ops->set_ufo(dev, edata.data); +} + static int ethtool_self_test(struct net_device *dev, char __user *useraddr) { struct ethtool_test test; @@ -683,6 +740,39 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) return ret; } +static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_perm_addr epaddr; + u8 *data; + int ret; + + if (!dev->ethtool_ops->get_perm_addr) + return -EOPNOTSUPP; + + if (copy_from_user(&epaddr,useraddr,sizeof(epaddr))) + return -EFAULT; + + data = kmalloc(epaddr.size, GFP_USER); + if (!data) + return -ENOMEM; + + ret = dev->ethtool_ops->get_perm_addr(dev,&epaddr,data); + if (ret) + return ret; + + ret = -EFAULT; + if (copy_to_user(useraddr, &epaddr, sizeof(epaddr))) + goto out; + useraddr += sizeof(epaddr); + if (copy_to_user(useraddr, data, epaddr.size)) + goto out; + ret = 0; + + out: + kfree(data); + return ret; +} + /* The main entry point in this file. Called from net/core/dev.c */ int dev_ethtool(struct ifreq *ifr) @@ -806,6 +896,15 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_GSTATS: rc = ethtool_get_stats(dev, useraddr); break; + case ETHTOOL_GPERMADDR: + rc = ethtool_get_perm_addr(dev, useraddr); + break; + case ETHTOOL_GUFO: + rc = ethtool_get_ufo(dev, useraddr); + break; + case ETHTOOL_SUFO: + rc = ethtool_set_ufo(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } @@ -826,6 +925,7 @@ int dev_ethtool(struct ifreq *ifr) EXPORT_SYMBOL(dev_ethtool); EXPORT_SYMBOL(ethtool_op_get_link); +EXPORT_SYMBOL_GPL(ethtool_op_get_perm_addr); EXPORT_SYMBOL(ethtool_op_get_sg); EXPORT_SYMBOL(ethtool_op_get_tso); EXPORT_SYMBOL(ethtool_op_get_tx_csum); @@ -833,3 +933,5 @@ EXPORT_SYMBOL(ethtool_op_set_sg); EXPORT_SYMBOL(ethtool_op_set_tso); EXPORT_SYMBOL(ethtool_op_set_tx_csum); EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); +EXPORT_SYMBOL(ethtool_op_set_ufo); +EXPORT_SYMBOL(ethtool_op_get_ufo); diff --git a/net/core/filter.c b/net/core/filter.c index cd91a24f9720..079c2edff789 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -182,7 +182,7 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) A = ntohl(*(u32 *)ptr); continue; } - return 0; + break; case BPF_LD|BPF_H|BPF_ABS: k = fentry->k; load_h: @@ -191,7 +191,7 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) A = ntohs(*(u16 *)ptr); continue; } - return 0; + break; case BPF_LD|BPF_B|BPF_ABS: k = fentry->k; load_b: @@ -200,7 +200,7 @@ load_b: A = *(u8 *)ptr; continue; } - return 0; + break; case BPF_LD|BPF_W|BPF_LEN: A = skb->len; continue; diff --git a/net/core/flow.c b/net/core/flow.c index f289570b15a3..7e95b39de9fd 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -42,7 +42,7 @@ static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL }; #define flow_table(cpu) (per_cpu(flow_tables, cpu)) -static kmem_cache_t *flow_cachep; +static kmem_cache_t *flow_cachep __read_mostly; static int flow_lwm, flow_hwm; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 1beb782ac41b..e68700f950a5 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -61,7 +61,9 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); static struct neigh_table *neigh_tables; +#ifdef CONFIG_PROC_FS static struct file_operations neigh_stat_seq_fops; +#endif /* Neighbour hash table buckets are protected with rwlock tbl->lock. @@ -173,39 +175,10 @@ static void pneigh_queue_purge(struct sk_buff_head *list) } } -void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) +static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) { int i; - write_lock_bh(&tbl->lock); - - for (i=0; i <= tbl->hash_mask; i++) { - struct neighbour *n, **np; - - np = &tbl->hash_buckets[i]; - while ((n = *np) != NULL) { - if (dev && n->dev != dev) { - np = &n->next; - continue; - } - *np = n->next; - write_lock_bh(&n->lock); - n->dead = 1; - neigh_del_timer(n); - write_unlock_bh(&n->lock); - neigh_release(n); - } - } - - write_unlock_bh(&tbl->lock); -} - -int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) -{ - int i; - - write_lock_bh(&tbl->lock); - for (i = 0; i <= tbl->hash_mask; i++) { struct neighbour *n, **np = &tbl->hash_buckets[i]; @@ -241,7 +214,19 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) neigh_release(n); } } +} + +void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) +{ + write_lock_bh(&tbl->lock); + neigh_flush_dev(tbl, dev); + write_unlock_bh(&tbl->lock); +} +int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) +{ + write_lock_bh(&tbl->lock); + neigh_flush_dev(tbl, dev); pneigh_ifdown(tbl, dev); write_unlock_bh(&tbl->lock); @@ -725,6 +710,14 @@ static __inline__ int neigh_max_probes(struct neighbour *n) p->ucast_probes + p->app_probes + p->mcast_probes); } +static inline void neigh_add_timer(struct neighbour *n, unsigned long when) +{ + if (unlikely(mod_timer(&n->timer, when))) { + printk("NEIGH: BUG, double timer add, state is %x\n", + n->nud_state); + dump_stack(); + } +} /* Called when a timer expires for a neighbour entry. */ @@ -806,11 +799,10 @@ static void neigh_timer_handler(unsigned long arg) } if (neigh->nud_state & NUD_IN_TIMER) { - neigh_hold(neigh); if (time_before(next, jiffies + HZ/2)) next = jiffies + HZ/2; - neigh->timer.expires = next; - add_timer(&neigh->timer); + if (!mod_timer(&neigh->timer, next)) + neigh_hold(neigh); } if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { struct sk_buff *skb = skb_peek(&neigh->arp_queue); @@ -852,8 +844,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) atomic_set(&neigh->probes, neigh->parms->ucast_probes); neigh->nud_state = NUD_INCOMPLETE; neigh_hold(neigh); - neigh->timer.expires = now + 1; - add_timer(&neigh->timer); + neigh_add_timer(neigh, now + 1); } else { neigh->nud_state = NUD_FAILED; write_unlock_bh(&neigh->lock); @@ -866,8 +857,8 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); neigh_hold(neigh); neigh->nud_state = NUD_DELAY; - neigh->timer.expires = jiffies + neigh->parms->delay_probe_time; - add_timer(&neigh->timer); + neigh_add_timer(neigh, + jiffies + neigh->parms->delay_probe_time); } if (neigh->nud_state == NUD_INCOMPLETE) { @@ -1013,10 +1004,10 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh_del_timer(neigh); if (new & NUD_IN_TIMER) { neigh_hold(neigh); - neigh->timer.expires = jiffies + + neigh_add_timer(neigh, (jiffies + ((new & NUD_REACHABLE) ? - neigh->parms->reachable_time : 0); - add_timer(&neigh->timer); + neigh->parms->reachable_time : + 0))); } neigh->nud_state = new; } @@ -1217,7 +1208,7 @@ static void neigh_proxy_process(unsigned long arg) while (skb != (struct sk_buff *)&tbl->proxy_queue) { struct sk_buff *back = skb; - long tdif = back->stamp.tv_usec - now; + long tdif = NEIGH_CB(back)->sched_next - now; skb = skb->next; if (tdif <= 0) { @@ -1248,8 +1239,9 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, kfree_skb(skb); return; } - skb->stamp.tv_sec = LOCALLY_ENQUEUED; - skb->stamp.tv_usec = sched_next; + + NEIGH_CB(skb)->sched_next = sched_next; + NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED; spin_lock(&tbl->proxy_queue.lock); if (del_timer(&tbl->proxy_timer)) { @@ -1633,12 +1625,9 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb, memset(&ndst, 0, sizeof(ndst)); - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for_each_cpu(cpu) { struct neigh_statistics *st; - if (!cpu_possible(cpu)) - continue; - st = per_cpu_ptr(tbl->stats, cpu); ndst.ndts_allocs += st->allocs; ndst.ndts_destroys += st->destroys; @@ -2342,8 +2331,8 @@ void neigh_app_ns(struct neighbour *n) } nlh = (struct nlmsghdr *)skb->data; nlh->nlmsg_flags = NLM_F_REQUEST; - NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH; - netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); } static void neigh_app_notify(struct neighbour *n) @@ -2360,8 +2349,8 @@ static void neigh_app_notify(struct neighbour *n) return; } nlh = (struct nlmsghdr *)skb->data; - NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH; - netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); } #endif /* CONFIG_ARPD */ diff --git a/net/core/netfilter.c b/net/core/netfilter.c deleted file mode 100644 index 076c156d5eda..000000000000 --- a/net/core/netfilter.c +++ /dev/null @@ -1,648 +0,0 @@ -/* netfilter.c: look after the filters for various protocols. - * Heavily influenced by the old firewall.c by David Bonn and Alan Cox. - * - * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any - * way. - * - * Rusty Russell (C)2000 -- This code is GPL. - * - * February 2000: Modified by James Morris to have 1 queue per protocol. - * 15-Mar-2000: Added NF_REPEAT --RR. - * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik. - */ -#include <linux/config.h> -#include <linux/kernel.h> -#include <linux/netfilter.h> -#include <net/protocol.h> -#include <linux/init.h> -#include <linux/skbuff.h> -#include <linux/wait.h> -#include <linux/module.h> -#include <linux/interrupt.h> -#include <linux/if.h> -#include <linux/netdevice.h> -#include <linux/inetdevice.h> -#include <linux/tcp.h> -#include <linux/udp.h> -#include <linux/icmp.h> -#include <net/sock.h> -#include <net/route.h> -#include <linux/ip.h> - -/* In this code, we can be waiting indefinitely for userspace to - * service a packet if a hook returns NF_QUEUE. We could keep a count - * of skbuffs queued for userspace, and not deregister a hook unless - * this is zero, but that sucks. Now, we simply check when the - * packets come back: if the hook is gone, the packet is discarded. */ -#ifdef CONFIG_NETFILTER_DEBUG -#define NFDEBUG(format, args...) printk(format , ## args) -#else -#define NFDEBUG(format, args...) -#endif - -/* Sockopts only registered and called from user context, so - net locking would be overkill. Also, [gs]etsockopt calls may - sleep. */ -static DECLARE_MUTEX(nf_sockopt_mutex); - -struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; -static LIST_HEAD(nf_sockopts); -static DEFINE_SPINLOCK(nf_hook_lock); - -/* - * A queue handler may be registered for each protocol. Each is protected by - * long term mutex. The handler must provide an an outfn() to accept packets - * for queueing and must reinject all packets it receives, no matter what. - */ -static struct nf_queue_handler_t { - nf_queue_outfn_t outfn; - void *data; -} queue_handler[NPROTO]; -static DEFINE_RWLOCK(queue_handler_lock); - -int nf_register_hook(struct nf_hook_ops *reg) -{ - struct list_head *i; - - spin_lock_bh(&nf_hook_lock); - list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) { - if (reg->priority < ((struct nf_hook_ops *)i)->priority) - break; - } - list_add_rcu(®->list, i->prev); - spin_unlock_bh(&nf_hook_lock); - - synchronize_net(); - return 0; -} - -void nf_unregister_hook(struct nf_hook_ops *reg) -{ - spin_lock_bh(&nf_hook_lock); - list_del_rcu(®->list); - spin_unlock_bh(&nf_hook_lock); - - synchronize_net(); -} - -/* Do exclusive ranges overlap? */ -static inline int overlap(int min1, int max1, int min2, int max2) -{ - return max1 > min2 && min1 < max2; -} - -/* Functions to register sockopt ranges (exclusive). */ -int nf_register_sockopt(struct nf_sockopt_ops *reg) -{ - struct list_head *i; - int ret = 0; - - if (down_interruptible(&nf_sockopt_mutex) != 0) - return -EINTR; - - list_for_each(i, &nf_sockopts) { - struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i; - if (ops->pf == reg->pf - && (overlap(ops->set_optmin, ops->set_optmax, - reg->set_optmin, reg->set_optmax) - || overlap(ops->get_optmin, ops->get_optmax, - reg->get_optmin, reg->get_optmax))) { - NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n", - ops->set_optmin, ops->set_optmax, - ops->get_optmin, ops->get_optmax, - reg->set_optmin, reg->set_optmax, - reg->get_optmin, reg->get_optmax); - ret = -EBUSY; - goto out; - } - } - - list_add(®->list, &nf_sockopts); -out: - up(&nf_sockopt_mutex); - return ret; -} - -void nf_unregister_sockopt(struct nf_sockopt_ops *reg) -{ - /* No point being interruptible: we're probably in cleanup_module() */ - restart: - down(&nf_sockopt_mutex); - if (reg->use != 0) { - /* To be woken by nf_sockopt call... */ - /* FIXME: Stuart Young's name appears gratuitously. */ - set_current_state(TASK_UNINTERRUPTIBLE); - reg->cleanup_task = current; - up(&nf_sockopt_mutex); - schedule(); - goto restart; - } - list_del(®->list); - up(&nf_sockopt_mutex); -} - -/* Call get/setsockopt() */ -static int nf_sockopt(struct sock *sk, int pf, int val, - char __user *opt, int *len, int get) -{ - struct list_head *i; - struct nf_sockopt_ops *ops; - int ret; - - if (down_interruptible(&nf_sockopt_mutex) != 0) - return -EINTR; - - list_for_each(i, &nf_sockopts) { - ops = (struct nf_sockopt_ops *)i; - if (ops->pf == pf) { - if (get) { - if (val >= ops->get_optmin - && val < ops->get_optmax) { - ops->use++; - up(&nf_sockopt_mutex); - ret = ops->get(sk, val, opt, len); - goto out; - } - } else { - if (val >= ops->set_optmin - && val < ops->set_optmax) { - ops->use++; - up(&nf_sockopt_mutex); - ret = ops->set(sk, val, opt, *len); - goto out; - } - } - } - } - up(&nf_sockopt_mutex); - return -ENOPROTOOPT; - - out: - down(&nf_sockopt_mutex); - ops->use--; - if (ops->cleanup_task) - wake_up_process(ops->cleanup_task); - up(&nf_sockopt_mutex); - return ret; -} - -int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt, - int len) -{ - return nf_sockopt(sk, pf, val, opt, &len, 0); -} - -int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len) -{ - return nf_sockopt(sk, pf, val, opt, len, 1); -} - -static unsigned int nf_iterate(struct list_head *head, - struct sk_buff **skb, - int hook, - const struct net_device *indev, - const struct net_device *outdev, - struct list_head **i, - int (*okfn)(struct sk_buff *), - int hook_thresh) -{ - unsigned int verdict; - - /* - * The caller must not block between calls to this - * function because of risk of continuing from deleted element. - */ - list_for_each_continue_rcu(*i, head) { - struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; - - if (hook_thresh > elem->priority) - continue; - - /* Optimization: we don't need to hold module - reference here, since function can't sleep. --RR */ - verdict = elem->hook(hook, skb, indev, outdev, okfn); - if (verdict != NF_ACCEPT) { -#ifdef CONFIG_NETFILTER_DEBUG - if (unlikely(verdict > NF_MAX_VERDICT)) { - NFDEBUG("Evil return from %p(%u).\n", - elem->hook, hook); - continue; - } -#endif - if (verdict != NF_REPEAT) - return verdict; - *i = (*i)->prev; - } - } - return NF_ACCEPT; -} - -int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) -{ - int ret; - - write_lock_bh(&queue_handler_lock); - if (queue_handler[pf].outfn) - ret = -EBUSY; - else { - queue_handler[pf].outfn = outfn; - queue_handler[pf].data = data; - ret = 0; - } - write_unlock_bh(&queue_handler_lock); - - return ret; -} - -/* The caller must flush their queue before this */ -int nf_unregister_queue_handler(int pf) -{ - write_lock_bh(&queue_handler_lock); - queue_handler[pf].outfn = NULL; - queue_handler[pf].data = NULL; - write_unlock_bh(&queue_handler_lock); - - return 0; -} - -/* - * Any packet that leaves via this function must come back - * through nf_reinject(). - */ -static int nf_queue(struct sk_buff *skb, - struct list_head *elem, - int pf, unsigned int hook, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *)) -{ - int status; - struct nf_info *info; -#ifdef CONFIG_BRIDGE_NETFILTER - struct net_device *physindev = NULL; - struct net_device *physoutdev = NULL; -#endif - - /* QUEUE == DROP if noone is waiting, to be safe. */ - read_lock(&queue_handler_lock); - if (!queue_handler[pf].outfn) { - read_unlock(&queue_handler_lock); - kfree_skb(skb); - return 1; - } - - info = kmalloc(sizeof(*info), GFP_ATOMIC); - if (!info) { - if (net_ratelimit()) - printk(KERN_ERR "OOM queueing packet %p\n", - skb); - read_unlock(&queue_handler_lock); - kfree_skb(skb); - return 1; - } - - *info = (struct nf_info) { - (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn }; - - /* If it's going away, ignore hook. */ - if (!try_module_get(info->elem->owner)) { - read_unlock(&queue_handler_lock); - kfree(info); - return 0; - } - - /* Bump dev refs so they don't vanish while packet is out */ - if (indev) dev_hold(indev); - if (outdev) dev_hold(outdev); - -#ifdef CONFIG_BRIDGE_NETFILTER - if (skb->nf_bridge) { - physindev = skb->nf_bridge->physindev; - if (physindev) dev_hold(physindev); - physoutdev = skb->nf_bridge->physoutdev; - if (physoutdev) dev_hold(physoutdev); - } -#endif - - status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data); - read_unlock(&queue_handler_lock); - - if (status < 0) { - /* James M doesn't say fuck enough. */ - if (indev) dev_put(indev); - if (outdev) dev_put(outdev); -#ifdef CONFIG_BRIDGE_NETFILTER - if (physindev) dev_put(physindev); - if (physoutdev) dev_put(physoutdev); -#endif - module_put(info->elem->owner); - kfree(info); - kfree_skb(skb); - return 1; - } - return 1; -} - -/* Returns 1 if okfn() needs to be executed by the caller, - * -EPERM for NF_DROP, 0 otherwise. */ -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), - int hook_thresh) -{ - struct list_head *elem; - unsigned int verdict; - int ret = 0; - - /* We may already have this, but read-locks nest anyway */ - rcu_read_lock(); - - elem = &nf_hooks[pf][hook]; -next_hook: - verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev, - outdev, &elem, okfn, hook_thresh); - if (verdict == NF_ACCEPT || verdict == NF_STOP) { - ret = 1; - goto unlock; - } else if (verdict == NF_DROP) { - kfree_skb(*pskb); - ret = -EPERM; - } else if (verdict == NF_QUEUE) { - NFDEBUG("nf_hook: Verdict = QUEUE.\n"); - if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn)) - goto next_hook; - } -unlock: - rcu_read_unlock(); - return ret; -} - -void nf_reinject(struct sk_buff *skb, struct nf_info *info, - unsigned int verdict) -{ - struct list_head *elem = &info->elem->list; - struct list_head *i; - - rcu_read_lock(); - - /* Release those devices we held, or Alexey will kill me. */ - if (info->indev) dev_put(info->indev); - if (info->outdev) dev_put(info->outdev); -#ifdef CONFIG_BRIDGE_NETFILTER - if (skb->nf_bridge) { - if (skb->nf_bridge->physindev) - dev_put(skb->nf_bridge->physindev); - if (skb->nf_bridge->physoutdev) - dev_put(skb->nf_bridge->physoutdev); - } -#endif - - /* Drop reference to owner of hook which queued us. */ - module_put(info->elem->owner); - - list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) { - if (i == elem) - break; - } - - if (elem == &nf_hooks[info->pf][info->hook]) { - /* The module which sent it to userspace is gone. */ - NFDEBUG("%s: module disappeared, dropping packet.\n", - __FUNCTION__); - verdict = NF_DROP; - } - - /* Continue traversal iff userspace said ok... */ - if (verdict == NF_REPEAT) { - elem = elem->prev; - verdict = NF_ACCEPT; - } - - if (verdict == NF_ACCEPT) { - next_hook: - verdict = nf_iterate(&nf_hooks[info->pf][info->hook], - &skb, info->hook, - info->indev, info->outdev, &elem, - info->okfn, INT_MIN); - } - - switch (verdict) { - case NF_ACCEPT: - info->okfn(skb); - break; - - case NF_QUEUE: - if (!nf_queue(skb, elem, info->pf, info->hook, - info->indev, info->outdev, info->okfn)) - goto next_hook; - break; - } - rcu_read_unlock(); - - if (verdict == NF_DROP) - kfree_skb(skb); - - kfree(info); - return; -} - -#ifdef CONFIG_INET -/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct sk_buff **pskb) -{ - struct iphdr *iph = (*pskb)->nh.iph; - struct rtable *rt; - struct flowi fl = {}; - struct dst_entry *odst; - unsigned int hh_len; - - /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause - * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. - */ - if (inet_addr_type(iph->saddr) == RTN_LOCAL) { - fl.nl_u.ip4_u.daddr = iph->daddr; - fl.nl_u.ip4_u.saddr = iph->saddr; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; -#ifdef CONFIG_IP_ROUTE_FWMARK - fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; -#endif - fl.proto = iph->protocol; - if (ip_route_output_key(&rt, &fl) != 0) - return -1; - - /* Drop old route. */ - dst_release((*pskb)->dst); - (*pskb)->dst = &rt->u.dst; - } else { - /* non-local src, find valid iif to satisfy - * rp-filter when calling ip_route_input. */ - fl.nl_u.ip4_u.daddr = iph->saddr; - if (ip_route_output_key(&rt, &fl) != 0) - return -1; - - odst = (*pskb)->dst; - if (ip_route_input(*pskb, iph->daddr, iph->saddr, - RT_TOS(iph->tos), rt->u.dst.dev) != 0) { - dst_release(&rt->u.dst); - return -1; - } - dst_release(&rt->u.dst); - dst_release(odst); - } - - if ((*pskb)->dst->error) - return -1; - - /* Change in oif may mean change in hh_len. */ - hh_len = (*pskb)->dst->dev->hard_header_len; - if (skb_headroom(*pskb) < hh_len) { - struct sk_buff *nskb; - - nskb = skb_realloc_headroom(*pskb, hh_len); - if (!nskb) - return -1; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } - - return 0; -} -EXPORT_SYMBOL(ip_route_me_harder); - -int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len) -{ - struct sk_buff *nskb; - - if (writable_len > (*pskb)->len) - return 0; - - /* Not exclusive use of packet? Must copy. */ - if (skb_shared(*pskb) || skb_cloned(*pskb)) - goto copy_skb; - - return pskb_may_pull(*pskb, writable_len); - -copy_skb: - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return 0; - BUG_ON(skb_is_nonlinear(nskb)); - - /* Rest of kernel will get very unhappy if we pass it a - suddenly-orphaned skbuff */ - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - return 1; -} -EXPORT_SYMBOL(skb_ip_make_writable); -#endif /*CONFIG_INET*/ - -/* Internal logging interface, which relies on the real - LOG target modules */ - -#define NF_LOG_PREFIXLEN 128 - -static nf_logfn *nf_logging[NPROTO]; /* = NULL */ -static int reported = 0; -static DEFINE_SPINLOCK(nf_log_lock); - -int nf_log_register(int pf, nf_logfn *logfn) -{ - int ret = -EBUSY; - - /* Any setup of logging members must be done before - * substituting pointer. */ - spin_lock(&nf_log_lock); - if (!nf_logging[pf]) { - rcu_assign_pointer(nf_logging[pf], logfn); - ret = 0; - } - spin_unlock(&nf_log_lock); - return ret; -} - -void nf_log_unregister(int pf, nf_logfn *logfn) -{ - spin_lock(&nf_log_lock); - if (nf_logging[pf] == logfn) - nf_logging[pf] = NULL; - spin_unlock(&nf_log_lock); - - /* Give time to concurrent readers. */ - synchronize_net(); -} - -void nf_log_packet(int pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const char *fmt, ...) -{ - va_list args; - char prefix[NF_LOG_PREFIXLEN]; - nf_logfn *logfn; - - rcu_read_lock(); - logfn = rcu_dereference(nf_logging[pf]); - if (logfn) { - va_start(args, fmt); - vsnprintf(prefix, sizeof(prefix), fmt, args); - va_end(args); - /* We must read logging before nf_logfn[pf] */ - logfn(hooknum, skb, in, out, prefix); - } else if (!reported) { - printk(KERN_WARNING "nf_log_packet: can\'t log yet, " - "no backend logging module loaded in!\n"); - reported++; - } - rcu_read_unlock(); -} -EXPORT_SYMBOL(nf_log_register); -EXPORT_SYMBOL(nf_log_unregister); -EXPORT_SYMBOL(nf_log_packet); - -/* This does not belong here, but locally generated errors need it if connection - tracking in use: without this, connection may not be in hash table, and hence - manufactured ICMP or RST packets will not be associated with it. */ -void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); - -void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) -{ - void (*attach)(struct sk_buff *, struct sk_buff *); - - if (skb->nfct && (attach = ip_ct_attach) != NULL) { - mb(); /* Just to be sure: must be read before executing this */ - attach(new, skb); - } -} - -void __init netfilter_init(void) -{ - int i, h; - - for (i = 0; i < NPROTO; i++) { - for (h = 0; h < NF_MAX_HOOKS; h++) - INIT_LIST_HEAD(&nf_hooks[i][h]); - } -} - -EXPORT_SYMBOL(ip_ct_attach); -EXPORT_SYMBOL(nf_ct_attach); -EXPORT_SYMBOL(nf_getsockopt); -EXPORT_SYMBOL(nf_hook_slow); -EXPORT_SYMBOL(nf_hooks); -EXPORT_SYMBOL(nf_register_hook); -EXPORT_SYMBOL(nf_register_queue_handler); -EXPORT_SYMBOL(nf_register_sockopt); -EXPORT_SYMBOL(nf_reinject); -EXPORT_SYMBOL(nf_setsockopt); -EXPORT_SYMBOL(nf_unregister_hook); -EXPORT_SYMBOL(nf_unregister_queue_handler); -EXPORT_SYMBOL(nf_unregister_sockopt); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c327c9edadc5..802fe11efad0 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -33,6 +33,7 @@ #define MAX_UDP_CHUNK 1460 #define MAX_SKBS 32 #define MAX_QUEUE_DEPTH (MAX_SKBS / 2) +#define MAX_RETRIES 20000 static DEFINE_SPINLOCK(skb_list_lock); static int nr_skbs; @@ -248,14 +249,14 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) int status; struct netpoll_info *npinfo; -repeat: - if(!np || !np->dev || !netif_running(np->dev)) { + if (!np || !np->dev || !netif_running(np->dev)) { __kfree_skb(skb); return; } - /* avoid recursion */ npinfo = np->dev->npinfo; + + /* avoid recursion */ if (npinfo->poll_owner == smp_processor_id() || np->dev->xmit_lock_owner == smp_processor_id()) { if (np->drop) @@ -265,30 +266,37 @@ repeat: return; } - spin_lock(&np->dev->xmit_lock); - np->dev->xmit_lock_owner = smp_processor_id(); + do { + npinfo->tries--; + spin_lock(&np->dev->xmit_lock); + np->dev->xmit_lock_owner = smp_processor_id(); - /* - * network drivers do not expect to be called if the queue is - * stopped. - */ - if (netif_queue_stopped(np->dev)) { + /* + * network drivers do not expect to be called if the queue is + * stopped. + */ + if (netif_queue_stopped(np->dev)) { + np->dev->xmit_lock_owner = -1; + spin_unlock(&np->dev->xmit_lock); + netpoll_poll(np); + udelay(50); + continue; + } + + status = np->dev->hard_start_xmit(skb, np->dev); np->dev->xmit_lock_owner = -1; spin_unlock(&np->dev->xmit_lock); - netpoll_poll(np); - goto repeat; - } - - status = np->dev->hard_start_xmit(skb, np->dev); - np->dev->xmit_lock_owner = -1; - spin_unlock(&np->dev->xmit_lock); + /* success */ + if(!status) { + npinfo->tries = MAX_RETRIES; /* reset */ + return; + } - /* transmit busy */ - if(status) { + /* transmit busy */ netpoll_poll(np); - goto repeat; - } + udelay(50); + } while (npinfo->tries > 0); } void netpoll_send_udp(struct netpoll *np, const char *msg, int len) @@ -349,15 +357,11 @@ static void arp_reply(struct sk_buff *skb) unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; u32 sip, tip; - unsigned long flags; struct sk_buff *send_skb; struct netpoll *np = NULL; - spin_lock_irqsave(&npinfo->rx_lock, flags); if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev) np = npinfo->rx_np; - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - if (!np) return; @@ -639,10 +643,12 @@ int netpoll_setup(struct netpoll *np) if (!npinfo) goto release; + npinfo->rx_flags = 0; npinfo->rx_np = NULL; - npinfo->poll_lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&npinfo->poll_lock); npinfo->poll_owner = -1; - npinfo->rx_lock = SPIN_LOCK_UNLOCKED; + npinfo->tries = MAX_RETRIES; + spin_lock_init(&npinfo->rx_lock); } else npinfo = ndev->npinfo; @@ -697,7 +703,7 @@ int netpoll_setup(struct netpoll *np) if (!np->local_ip) { rcu_read_lock(); - in_dev = __in_dev_get(ndev); + in_dev = __in_dev_get_rcu(ndev); if (!in_dev || !in_dev->ifa_list) { rcu_read_unlock(); @@ -718,9 +724,16 @@ int netpoll_setup(struct netpoll *np) npinfo->rx_np = np; spin_unlock_irqrestore(&npinfo->rx_lock, flags); } + + /* fill up the skb queue */ + refill_skbs(); + /* last thing to do is link it to the net device structure */ ndev->npinfo = npinfo; + /* avoid racing with NAPI reading npinfo */ + synchronize_rcu(); + return 0; release: diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 8eb083b6041a..7fc3e9e28c34 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -75,7 +75,7 @@ * By design there should only be *one* "controlling" process. In practice * multiple write accesses gives unpredictable result. Understood by "write" * to /proc gives result code thats should be read be the "writer". - * For pratical use this should be no problem. + * For practical use this should be no problem. * * Note when adding devices to a specific CPU there good idea to also assign * /proc/irq/XX/smp_affinity so TX-interrupts gets bound to the same CPU. @@ -96,7 +96,7 @@ * New xmit() return, do_div and misc clean up by Stephen Hemminger * <shemminger@osdl.org> 040923 * - * Rany Dunlap fixed u64 printk compiler waring + * Randy Dunlap fixed u64 printk compiler waring * * Remove FCS from BW calculation. Lennert Buytenhek <buytenh@wantstofly.org> * New time handling. Lennert Buytenhek <buytenh@wantstofly.org> 041213 @@ -137,6 +137,7 @@ #include <linux/ipv6.h> #include <linux/udp.h> #include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <linux/wait.h> #include <net/checksum.h> #include <net/ipv6.h> @@ -151,7 +152,7 @@ #include <asm/timex.h> -#define VERSION "pktgen v2.62: Packet Generator for packet performance testing.\n" +#define VERSION "pktgen v2.63: Packet Generator for packet performance testing.\n" /* #define PG_DEBUG(a) a */ #define PG_DEBUG(a) @@ -177,8 +178,8 @@ #define T_REMDEV (1<<3) /* Remove all devs */ /* Locks */ -#define thread_lock() spin_lock(&_thread_lock) -#define thread_unlock() spin_unlock(&_thread_lock) +#define thread_lock() down(&pktgen_sem) +#define thread_unlock() up(&pktgen_sem) /* If lock -- can be removed after some work */ #define if_lock(t) spin_lock(&(t->if_lock)); @@ -187,6 +188,8 @@ /* Used to help with determining the pkts on receive */ #define PKTGEN_MAGIC 0xbe9be955 #define PG_PROC_DIR "pktgen" +#define PGCTRL "pgctrl" +static struct proc_dir_entry *pg_proc_dir = NULL; #define MAX_CFLOWS 65536 @@ -202,11 +205,8 @@ struct pktgen_dev { * Try to keep frequent/infrequent used vars. separated. */ - char ifname[32]; - struct proc_dir_entry *proc_ent; + char ifname[IFNAMSIZ]; char result[512]; - /* proc file names */ - char fname[80]; struct pktgen_thread* pg_thread; /* the owner */ struct pktgen_dev *next; /* Used for chaining in the thread's run-queue */ @@ -244,7 +244,7 @@ struct pktgen_dev { __u32 seq_num; int clone_skb; /* Use multiple SKBs during packet gen. If this number - * is greater than 1, then that many coppies of the same + * is greater than 1, then that many copies of the same * packet will be sent before a new packet is allocated. * For instance, if you want to send 1024 identical packets * before creating a new packet, set clone_skb to 1024. @@ -330,8 +330,6 @@ struct pktgen_thread { struct pktgen_dev *if_list; /* All device here */ struct pktgen_thread* next; char name[32]; - char fname[128]; /* name of proc file */ - struct proc_dir_entry *proc_ent; char result[512]; u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ @@ -396,7 +394,7 @@ static inline s64 divremdi3(s64 x, s64 y, int type) /* End of hacks to deal with 64-bit math on x86 */ -/** Convert to miliseconds */ +/** Convert to milliseconds */ static inline __u64 tv_to_ms(const struct timeval* tv) { __u64 ms = tv->tv_usec / 1000; @@ -425,7 +423,7 @@ static inline __u64 pg_div64(__u64 n, __u64 base) { __u64 tmp = n; /* - * How do we know if the architectrure we are running on + * How do we know if the architecture we are running on * supports division with 64 bit base? * */ @@ -473,16 +471,6 @@ static inline __u64 tv_diff(const struct timeval* a, const struct timeval* b) static char version[] __initdata = VERSION; -static ssize_t proc_pgctrl_read(struct file* file, char __user * buf, size_t count, loff_t *ppos); -static ssize_t proc_pgctrl_write(struct file* file, const char __user * buf, size_t count, loff_t *ppos); -static int proc_if_read(char *buf , char **start, off_t offset, int len, int *eof, void *data); - -static int proc_thread_read(char *buf , char **start, off_t offset, int len, int *eof, void *data); -static int proc_if_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data); -static int proc_thread_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data); -static int create_proc_dir(void); -static int remove_proc_dir(void); - static int pktgen_remove_device(struct pktgen_thread* t, struct pktgen_dev *i); static int pktgen_add_device(struct pktgen_thread* t, const char* ifname); static struct pktgen_thread* pktgen_find_thread(const char* name); @@ -503,83 +491,41 @@ static int pg_delay_d = 0; static int pg_clone_skb_d = 0; static int debug = 0; -static spinlock_t _thread_lock = SPIN_LOCK_UNLOCKED; +static DECLARE_MUTEX(pktgen_sem); static struct pktgen_thread *pktgen_threads = NULL; -static char module_fname[128]; -static struct proc_dir_entry *module_proc_ent = NULL; - static struct notifier_block pktgen_notifier_block = { .notifier_call = pktgen_device_event, }; -static struct file_operations pktgen_fops = { - .read = proc_pgctrl_read, - .write = proc_pgctrl_write, - /* .ioctl = pktgen_ioctl, later maybe */ -}; - /* * /proc handling functions * */ -static struct proc_dir_entry *pg_proc_dir = NULL; -static int proc_pgctrl_read_eof=0; - -static ssize_t proc_pgctrl_read(struct file* file, char __user * buf, - size_t count, loff_t *ppos) +static int pgctrl_show(struct seq_file *seq, void *v) { - char data[200]; - int len = 0; - - if(proc_pgctrl_read_eof) { - proc_pgctrl_read_eof=0; - len = 0; - goto out; - } - - sprintf(data, "%s", VERSION); - - len = strlen(data); - - if(len > count) { - len =-EFAULT; - goto out; - } - - if (copy_to_user(buf, data, len)) { - len =-EFAULT; - goto out; - } - - *ppos += len; - proc_pgctrl_read_eof=1; /* EOF next call */ - - out: - return len; + seq_puts(seq, VERSION); + return 0; } -static ssize_t proc_pgctrl_write(struct file* file,const char __user * buf, - size_t count, loff_t *ppos) +static ssize_t pgctrl_write(struct file* file,const char __user * buf, + size_t count, loff_t *ppos) { - char *data = NULL; int err = 0; + char data[128]; if (!capable(CAP_NET_ADMIN)){ err = -EPERM; goto out; } - data = (void*)vmalloc ((unsigned int)count); + if (count > sizeof(data)) + count = sizeof(data); - if(!data) { - err = -ENOMEM; - goto out; - } if (copy_from_user(data, buf, count)) { - err =-EFAULT; - goto out_free; + err = -EFAULT; + goto out; } data[count-1] = 0; /* Make string */ @@ -594,31 +540,40 @@ static ssize_t proc_pgctrl_write(struct file* file,const char __user * buf, err = count; - out_free: - vfree (data); out: return err; } -static int proc_if_read(char *buf , char **start, off_t offset, - int len, int *eof, void *data) +static int pgctrl_open(struct inode *inode, struct file *file) +{ + return single_open(file, pgctrl_show, PDE(inode)->data); +} + +static struct file_operations pktgen_fops = { + .owner = THIS_MODULE, + .open = pgctrl_open, + .read = seq_read, + .llseek = seq_lseek, + .write = pgctrl_write, + .release = single_release, +}; + +static int pktgen_if_show(struct seq_file *seq, void *v) { - char *p; int i; - struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data); + struct pktgen_dev *pkt_dev = seq->private; __u64 sa; __u64 stopped; __u64 now = getCurUs(); - p = buf; - p += sprintf(p, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n", - (unsigned long long) pkt_dev->count, - pkt_dev->min_pkt_size, pkt_dev->max_pkt_size); + seq_printf(seq, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n", + (unsigned long long) pkt_dev->count, + pkt_dev->min_pkt_size, pkt_dev->max_pkt_size); - p += sprintf(p, " frags: %d delay: %u clone_skb: %d ifname: %s\n", - pkt_dev->nfrags, 1000*pkt_dev->delay_us+pkt_dev->delay_ns, pkt_dev->clone_skb, pkt_dev->ifname); + seq_printf(seq, " frags: %d delay: %u clone_skb: %d ifname: %s\n", + pkt_dev->nfrags, 1000*pkt_dev->delay_us+pkt_dev->delay_ns, pkt_dev->clone_skb, pkt_dev->ifname); - p += sprintf(p, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); + seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); if(pkt_dev->flags & F_IPV6) { @@ -626,19 +581,19 @@ static int proc_if_read(char *buf , char **start, off_t offset, fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr); fmt_ip6(b2, pkt_dev->min_in6_saddr.s6_addr); fmt_ip6(b3, pkt_dev->max_in6_saddr.s6_addr); - p += sprintf(p, " saddr: %s min_saddr: %s max_saddr: %s\n", b1, b2, b3); + seq_printf(seq, " saddr: %s min_saddr: %s max_saddr: %s\n", b1, b2, b3); fmt_ip6(b1, pkt_dev->in6_daddr.s6_addr); fmt_ip6(b2, pkt_dev->min_in6_daddr.s6_addr); fmt_ip6(b3, pkt_dev->max_in6_daddr.s6_addr); - p += sprintf(p, " daddr: %s min_daddr: %s max_daddr: %s\n", b1, b2, b3); + seq_printf(seq, " daddr: %s min_daddr: %s max_daddr: %s\n", b1, b2, b3); } else - p += sprintf(p, " dst_min: %s dst_max: %s\n src_min: %s src_max: %s\n", - pkt_dev->dst_min, pkt_dev->dst_max, pkt_dev->src_min, pkt_dev->src_max); + seq_printf(seq," dst_min: %s dst_max: %s\n src_min: %s src_max: %s\n", + pkt_dev->dst_min, pkt_dev->dst_max, pkt_dev->src_min, pkt_dev->src_max); - p += sprintf(p, " src_mac: "); + seq_puts(seq, " src_mac: "); if ((pkt_dev->src_mac[0] == 0) && (pkt_dev->src_mac[1] == 0) && @@ -648,89 +603,89 @@ static int proc_if_read(char *buf , char **start, off_t offset, (pkt_dev->src_mac[5] == 0)) for (i = 0; i < 6; i++) - p += sprintf(p, "%02X%s", pkt_dev->odev->dev_addr[i], i == 5 ? " " : ":"); + seq_printf(seq, "%02X%s", pkt_dev->odev->dev_addr[i], i == 5 ? " " : ":"); else for (i = 0; i < 6; i++) - p += sprintf(p, "%02X%s", pkt_dev->src_mac[i], i == 5 ? " " : ":"); + seq_printf(seq, "%02X%s", pkt_dev->src_mac[i], i == 5 ? " " : ":"); - p += sprintf(p, "dst_mac: "); + seq_printf(seq, "dst_mac: "); for (i = 0; i < 6; i++) - p += sprintf(p, "%02X%s", pkt_dev->dst_mac[i], i == 5 ? "\n" : ":"); + seq_printf(seq, "%02X%s", pkt_dev->dst_mac[i], i == 5 ? "\n" : ":"); - p += sprintf(p, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", - pkt_dev->udp_src_min, pkt_dev->udp_src_max, pkt_dev->udp_dst_min, - pkt_dev->udp_dst_max); + seq_printf(seq, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", + pkt_dev->udp_src_min, pkt_dev->udp_src_max, pkt_dev->udp_dst_min, + pkt_dev->udp_dst_max); - p += sprintf(p, " src_mac_count: %d dst_mac_count: %d \n Flags: ", - pkt_dev->src_mac_count, pkt_dev->dst_mac_count); + seq_printf(seq, " src_mac_count: %d dst_mac_count: %d \n Flags: ", + pkt_dev->src_mac_count, pkt_dev->dst_mac_count); if (pkt_dev->flags & F_IPV6) - p += sprintf(p, "IPV6 "); + seq_printf(seq, "IPV6 "); if (pkt_dev->flags & F_IPSRC_RND) - p += sprintf(p, "IPSRC_RND "); + seq_printf(seq, "IPSRC_RND "); if (pkt_dev->flags & F_IPDST_RND) - p += sprintf(p, "IPDST_RND "); + seq_printf(seq, "IPDST_RND "); if (pkt_dev->flags & F_TXSIZE_RND) - p += sprintf(p, "TXSIZE_RND "); + seq_printf(seq, "TXSIZE_RND "); if (pkt_dev->flags & F_UDPSRC_RND) - p += sprintf(p, "UDPSRC_RND "); + seq_printf(seq, "UDPSRC_RND "); if (pkt_dev->flags & F_UDPDST_RND) - p += sprintf(p, "UDPDST_RND "); + seq_printf(seq, "UDPDST_RND "); if (pkt_dev->flags & F_MACSRC_RND) - p += sprintf(p, "MACSRC_RND "); + seq_printf(seq, "MACSRC_RND "); if (pkt_dev->flags & F_MACDST_RND) - p += sprintf(p, "MACDST_RND "); + seq_printf(seq, "MACDST_RND "); - p += sprintf(p, "\n"); + seq_puts(seq, "\n"); sa = pkt_dev->started_at; stopped = pkt_dev->stopped_at; if (pkt_dev->running) stopped = now; /* not really stopped, more like last-running-at */ - p += sprintf(p, "Current:\n pkts-sofar: %llu errors: %llu\n started: %lluus stopped: %lluus idle: %lluus\n", - (unsigned long long) pkt_dev->sofar, - (unsigned long long) pkt_dev->errors, - (unsigned long long) sa, - (unsigned long long) stopped, - (unsigned long long) pkt_dev->idle_acc); + seq_printf(seq, "Current:\n pkts-sofar: %llu errors: %llu\n started: %lluus stopped: %lluus idle: %lluus\n", + (unsigned long long) pkt_dev->sofar, + (unsigned long long) pkt_dev->errors, + (unsigned long long) sa, + (unsigned long long) stopped, + (unsigned long long) pkt_dev->idle_acc); - p += sprintf(p, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n", - pkt_dev->seq_num, pkt_dev->cur_dst_mac_offset, pkt_dev->cur_src_mac_offset); + seq_printf(seq, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n", + pkt_dev->seq_num, pkt_dev->cur_dst_mac_offset, + pkt_dev->cur_src_mac_offset); if(pkt_dev->flags & F_IPV6) { char b1[128], b2[128]; fmt_ip6(b1, pkt_dev->cur_in6_daddr.s6_addr); fmt_ip6(b2, pkt_dev->cur_in6_saddr.s6_addr); - p += sprintf(p, " cur_saddr: %s cur_daddr: %s\n", b2, b1); + seq_printf(seq, " cur_saddr: %s cur_daddr: %s\n", b2, b1); } else - p += sprintf(p, " cur_saddr: 0x%x cur_daddr: 0x%x\n", - pkt_dev->cur_saddr, pkt_dev->cur_daddr); + seq_printf(seq, " cur_saddr: 0x%x cur_daddr: 0x%x\n", + pkt_dev->cur_saddr, pkt_dev->cur_daddr); - p += sprintf(p, " cur_udp_dst: %d cur_udp_src: %d\n", - pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src); + seq_printf(seq, " cur_udp_dst: %d cur_udp_src: %d\n", + pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src); - p += sprintf(p, " flows: %u\n", pkt_dev->nflows); + seq_printf(seq, " flows: %u\n", pkt_dev->nflows); if (pkt_dev->result[0]) - p += sprintf(p, "Result: %s\n", pkt_dev->result); + seq_printf(seq, "Result: %s\n", pkt_dev->result); else - p += sprintf(p, "Result: Idle\n"); - *eof = 1; + seq_printf(seq, "Result: Idle\n"); - return p - buf; + return 0; } @@ -802,13 +757,14 @@ done_str: return i; } -static int proc_if_write(struct file *file, const char __user *user_buffer, - unsigned long count, void *data) +static ssize_t pktgen_if_write(struct file *file, const char __user *user_buffer, + size_t count, loff_t *offset) { + struct seq_file *seq = (struct seq_file *) file->private_data; + struct pktgen_dev *pkt_dev = seq->private; int i = 0, max, len; char name[16], valstr[32]; unsigned long value = 0; - struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data); char* pg_result = NULL; int tmp = 0; char buf[128]; @@ -849,7 +805,8 @@ static int proc_if_write(struct file *file, const char __user *user_buffer, if (copy_from_user(tb, user_buffer, count)) return -EFAULT; tb[count] = 0; - printk("pktgen: %s,%lu buffer -:%s:-\n", name, count, tb); + printk("pktgen: %s,%lu buffer -:%s:-\n", name, + (unsigned long) count, tb); } if (!strcmp(name, "min_pkt_size")) { @@ -1335,92 +1292,98 @@ static int proc_if_write(struct file *file, const char __user *user_buffer, return -EINVAL; } -static int proc_thread_read(char *buf , char **start, off_t offset, - int len, int *eof, void *data) +static int pktgen_if_open(struct inode *inode, struct file *file) { - char *p; - struct pktgen_thread *t = (struct pktgen_thread*)(data); - struct pktgen_dev *pkt_dev = NULL; + return single_open(file, pktgen_if_show, PDE(inode)->data); +} +static struct file_operations pktgen_if_fops = { + .owner = THIS_MODULE, + .open = pktgen_if_open, + .read = seq_read, + .llseek = seq_lseek, + .write = pktgen_if_write, + .release = single_release, +}; - if (!t) { - printk("pktgen: ERROR: could not find thread in proc_thread_read\n"); - return -EINVAL; - } +static int pktgen_thread_show(struct seq_file *seq, void *v) +{ + struct pktgen_thread *t = seq->private; + struct pktgen_dev *pkt_dev = NULL; - p = buf; - p += sprintf(p, "Name: %s max_before_softirq: %d\n", + BUG_ON(!t); + + seq_printf(seq, "Name: %s max_before_softirq: %d\n", t->name, t->max_before_softirq); - p += sprintf(p, "Running: "); + seq_printf(seq, "Running: "); if_lock(t); for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next) if(pkt_dev->running) - p += sprintf(p, "%s ", pkt_dev->ifname); + seq_printf(seq, "%s ", pkt_dev->ifname); - p += sprintf(p, "\nStopped: "); + seq_printf(seq, "\nStopped: "); for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next) if(!pkt_dev->running) - p += sprintf(p, "%s ", pkt_dev->ifname); + seq_printf(seq, "%s ", pkt_dev->ifname); if (t->result[0]) - p += sprintf(p, "\nResult: %s\n", t->result); + seq_printf(seq, "\nResult: %s\n", t->result); else - p += sprintf(p, "\nResult: NA\n"); - - *eof = 1; + seq_printf(seq, "\nResult: NA\n"); if_unlock(t); - return p - buf; + return 0; } -static int proc_thread_write(struct file *file, const char __user *user_buffer, - unsigned long count, void *data) +static ssize_t pktgen_thread_write(struct file *file, + const char __user *user_buffer, + size_t count, loff_t *offset) { + struct seq_file *seq = (struct seq_file *) file->private_data; + struct pktgen_thread *t = seq->private; int i = 0, max, len, ret; char name[40]; - struct pktgen_thread *t; char *pg_result; unsigned long value = 0; - + if (count < 1) { // sprintf(pg_result, "Wrong command format"); return -EINVAL; } - + max = count - i; len = count_trail_chars(&user_buffer[i], max); - if (len < 0) - return len; - + if (len < 0) + return len; + i += len; - + /* Read variable name */ len = strn_len(&user_buffer[i], sizeof(name) - 1); - if (len < 0) - return len; + if (len < 0) + return len; memset(name, 0, sizeof(name)); if (copy_from_user(name, &user_buffer[i], len)) return -EFAULT; i += len; - + max = count -i; len = count_trail_chars(&user_buffer[i], max); - if (len < 0) - return len; - + if (len < 0) + return len; + i += len; - if (debug) - printk("pktgen: t=%s, count=%lu\n", name, count); - + if (debug) + printk("pktgen: t=%s, count=%lu\n", name, + (unsigned long) count); - t = (struct pktgen_thread*)(data); if(!t) { printk("pktgen: ERROR: No thread\n"); ret = -EINVAL; @@ -1452,8 +1415,7 @@ static int proc_thread_write(struct file *file, const char __user *user_buffer, thread_lock(); t->control |= T_REMDEV; thread_unlock(); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ/8); /* Propagate thread->control */ + schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ ret = count; sprintf(pg_result, "OK: rem_device_all"); goto out; @@ -1475,32 +1437,19 @@ static int proc_thread_write(struct file *file, const char __user *user_buffer, return ret; } -static int create_proc_dir(void) +static int pktgen_thread_open(struct inode *inode, struct file *file) { - int len; - /* does proc_dir already exists */ - len = strlen(PG_PROC_DIR); - - for (pg_proc_dir = proc_net->subdir; pg_proc_dir; pg_proc_dir=pg_proc_dir->next) { - if ((pg_proc_dir->namelen == len) && - (! memcmp(pg_proc_dir->name, PG_PROC_DIR, len))) - break; - } - - if (!pg_proc_dir) - pg_proc_dir = create_proc_entry(PG_PROC_DIR, S_IFDIR, proc_net); - - if (!pg_proc_dir) - return -ENODEV; - - return 0; + return single_open(file, pktgen_thread_show, PDE(inode)->data); } -static int remove_proc_dir(void) -{ - remove_proc_entry(PG_PROC_DIR, proc_net); - return 0; -} +static struct file_operations pktgen_thread_fops = { + .owner = THIS_MODULE, + .open = pktgen_thread_open, + .read = seq_read, + .llseek = seq_lseek, + .write = pktgen_thread_write, + .release = single_release, +}; /* Think find or remove for NN */ static struct pktgen_dev *__pktgen_NN_threads(const char* ifname, int remove) @@ -1679,13 +1628,12 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) struct in_device *in_dev; rcu_read_lock(); - in_dev = __in_dev_get(pkt_dev->odev); + in_dev = __in_dev_get_rcu(pkt_dev->odev); if (in_dev) { if (in_dev->ifa_list) { pkt_dev->saddr_min = in_dev->ifa_list->ifa_address; pkt_dev->saddr_max = pkt_dev->saddr_min; } - __in_dev_put(in_dev); } rcu_read_unlock(); } @@ -1715,11 +1663,10 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) start = now = getCurUs(); printk(KERN_INFO "sleeping for %d\n", (int)(spin_until_us - now)); while (now < spin_until_us) { - /* TODO: optimise sleeping behavior */ - if (spin_until_us - now > (1000000/HZ)+1) { - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1); - } else if (spin_until_us - now > 100) { + /* TODO: optimize sleeping behavior */ + if (spin_until_us - now > jiffies_to_usecs(1)+1) + schedule_timeout_interruptible(1); + else if (spin_until_us - now > 100) { do_softirq(); if (!pkt_dev->running) return; @@ -2375,7 +2322,7 @@ static void pktgen_stop_all_threads_ifs(void) pktgen_stop(t); t = t->next; } - thread_unlock(); + thread_unlock(); } static int thread_is_running(struct pktgen_thread *t ) @@ -2449,8 +2396,7 @@ static void pktgen_run_all_threads(void) } thread_unlock(); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ/8); /* Propagate thread->control */ + schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ pktgen_wait_all_threads_run(); } @@ -2567,10 +2513,9 @@ static void pktgen_rem_thread(struct pktgen_thread *t) struct pktgen_thread *tmp = pktgen_threads; - if (strlen(t->fname)) - remove_proc_entry(t->fname, NULL); + remove_proc_entry(t->name, pg_proc_dir); - thread_lock(); + thread_lock(); if (tmp == t) pktgen_threads = tmp->next; @@ -2840,7 +2785,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char* i if_lock(t); for(pkt_dev=t->if_list; pkt_dev; pkt_dev = pkt_dev->next ) { - if (strcmp(pkt_dev->ifname, ifname) == 0) { + if (strncmp(pkt_dev->ifname, ifname, IFNAMSIZ) == 0) { break; } } @@ -2879,74 +2824,70 @@ static int add_dev_to_thread(struct pktgen_thread *t, struct pktgen_dev *pkt_dev static int pktgen_add_device(struct pktgen_thread *t, const char* ifname) { struct pktgen_dev *pkt_dev; + struct proc_dir_entry *pe; /* We don't allow a device to be on several threads */ - if( (pkt_dev = __pktgen_NN_threads(ifname, FIND)) == NULL) { - - pkt_dev = kmalloc(sizeof(struct pktgen_dev), GFP_KERNEL); - if (!pkt_dev) - return -ENOMEM; + pkt_dev = __pktgen_NN_threads(ifname, FIND); + if (pkt_dev) { + printk("pktgen: ERROR: interface already used.\n"); + return -EBUSY; + } - memset(pkt_dev, 0, sizeof(struct pktgen_dev)); + pkt_dev = kzalloc(sizeof(struct pktgen_dev), GFP_KERNEL); + if (!pkt_dev) + return -ENOMEM; - pkt_dev->flows = vmalloc(MAX_CFLOWS*sizeof(struct flow_state)); - if (pkt_dev->flows == NULL) { - kfree(pkt_dev); - return -ENOMEM; - } - memset(pkt_dev->flows, 0, MAX_CFLOWS*sizeof(struct flow_state)); - - pkt_dev->min_pkt_size = ETH_ZLEN; - pkt_dev->max_pkt_size = ETH_ZLEN; - pkt_dev->nfrags = 0; - pkt_dev->clone_skb = pg_clone_skb_d; - pkt_dev->delay_us = pg_delay_d / 1000; - pkt_dev->delay_ns = pg_delay_d % 1000; - pkt_dev->count = pg_count_d; - pkt_dev->sofar = 0; - pkt_dev->udp_src_min = 9; /* sink port */ - pkt_dev->udp_src_max = 9; - pkt_dev->udp_dst_min = 9; - pkt_dev->udp_dst_max = 9; - - strncpy(pkt_dev->ifname, ifname, 31); - sprintf(pkt_dev->fname, "net/%s/%s", PG_PROC_DIR, ifname); - - if (! pktgen_setup_dev(pkt_dev)) { - printk("pktgen: ERROR: pktgen_setup_dev failed.\n"); - if (pkt_dev->flows) - vfree(pkt_dev->flows); - kfree(pkt_dev); - return -ENODEV; - } + pkt_dev->flows = vmalloc(MAX_CFLOWS*sizeof(struct flow_state)); + if (pkt_dev->flows == NULL) { + kfree(pkt_dev); + return -ENOMEM; + } + memset(pkt_dev->flows, 0, MAX_CFLOWS*sizeof(struct flow_state)); - pkt_dev->proc_ent = create_proc_entry(pkt_dev->fname, 0600, NULL); - if (!pkt_dev->proc_ent) { - printk("pktgen: cannot create %s procfs entry.\n", pkt_dev->fname); - if (pkt_dev->flows) - vfree(pkt_dev->flows); - kfree(pkt_dev); - return -EINVAL; - } - pkt_dev->proc_ent->read_proc = proc_if_read; - pkt_dev->proc_ent->write_proc = proc_if_write; - pkt_dev->proc_ent->data = (void*)(pkt_dev); - pkt_dev->proc_ent->owner = THIS_MODULE; + pkt_dev->min_pkt_size = ETH_ZLEN; + pkt_dev->max_pkt_size = ETH_ZLEN; + pkt_dev->nfrags = 0; + pkt_dev->clone_skb = pg_clone_skb_d; + pkt_dev->delay_us = pg_delay_d / 1000; + pkt_dev->delay_ns = pg_delay_d % 1000; + pkt_dev->count = pg_count_d; + pkt_dev->sofar = 0; + pkt_dev->udp_src_min = 9; /* sink port */ + pkt_dev->udp_src_max = 9; + pkt_dev->udp_dst_min = 9; + pkt_dev->udp_dst_max = 9; + + strncpy(pkt_dev->ifname, ifname, IFNAMSIZ); + + if (! pktgen_setup_dev(pkt_dev)) { + printk("pktgen: ERROR: pktgen_setup_dev failed.\n"); + if (pkt_dev->flows) + vfree(pkt_dev->flows); + kfree(pkt_dev); + return -ENODEV; + } + + pe = create_proc_entry(ifname, 0600, pg_proc_dir); + if (!pe) { + printk("pktgen: cannot create %s/%s procfs entry.\n", + PG_PROC_DIR, ifname); + if (pkt_dev->flows) + vfree(pkt_dev->flows); + kfree(pkt_dev); + return -EINVAL; + } + pe->proc_fops = &pktgen_if_fops; + pe->data = pkt_dev; - return add_dev_to_thread(t, pkt_dev); - } - else { - printk("pktgen: ERROR: interface already used.\n"); - return -EBUSY; - } + return add_dev_to_thread(t, pkt_dev); } static struct pktgen_thread *pktgen_find_thread(const char* name) { struct pktgen_thread *t = NULL; - thread_lock(); + thread_lock(); t = pktgen_threads; while (t) { @@ -2962,6 +2903,7 @@ static struct pktgen_thread *pktgen_find_thread(const char* name) static int pktgen_create_thread(const char* name, int cpu) { struct pktgen_thread *t = NULL; + struct proc_dir_entry *pe; if (strlen(name) > 31) { printk("pktgen: ERROR: Thread name cannot be more than 31 characters.\n"); @@ -2973,28 +2915,26 @@ static int pktgen_create_thread(const char* name, int cpu) return -EINVAL; } - t = (struct pktgen_thread*)(kmalloc(sizeof(struct pktgen_thread), GFP_KERNEL)); + t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL); if (!t) { printk("pktgen: ERROR: out of memory, can't create new thread.\n"); return -ENOMEM; } - memset(t, 0, sizeof(struct pktgen_thread)); strcpy(t->name, name); spin_lock_init(&t->if_lock); t->cpu = cpu; - sprintf(t->fname, "net/%s/%s", PG_PROC_DIR, t->name); - t->proc_ent = create_proc_entry(t->fname, 0600, NULL); - if (!t->proc_ent) { - printk("pktgen: cannot create %s procfs entry.\n", t->fname); + pe = create_proc_entry(t->name, 0600, pg_proc_dir); + if (!pe) { + printk("pktgen: cannot create %s/%s procfs entry.\n", + PG_PROC_DIR, t->name); kfree(t); return -EINVAL; } - t->proc_ent->read_proc = proc_thread_read; - t->proc_ent->write_proc = proc_thread_write; - t->proc_ent->data = (void*)(t); - t->proc_ent->owner = THIS_MODULE; + + pe->proc_fops = &pktgen_thread_fops; + pe->data = t; t->next = pktgen_threads; pktgen_threads = t; @@ -3049,8 +2989,7 @@ static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_ /* Clean up proc file system */ - if (strlen(pkt_dev->fname)) - remove_proc_entry(pkt_dev->fname, NULL); + remove_proc_entry(pkt_dev->ifname, pg_proc_dir); if (pkt_dev->flows) vfree(pkt_dev->flows); @@ -3061,31 +3000,31 @@ static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_ static int __init pg_init(void) { int cpu; - printk(version); + struct proc_dir_entry *pe; - module_fname[0] = 0; + printk(version); - create_proc_dir(); + pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net); + if (!pg_proc_dir) + return -ENODEV; + pg_proc_dir->owner = THIS_MODULE; - sprintf(module_fname, "net/%s/pgctrl", PG_PROC_DIR); - module_proc_ent = create_proc_entry(module_fname, 0600, NULL); - if (!module_proc_ent) { - printk("pktgen: ERROR: cannot create %s procfs entry.\n", module_fname); + pe = create_proc_entry(PGCTRL, 0600, pg_proc_dir); + if (pe == NULL) { + printk("pktgen: ERROR: cannot create %s procfs entry.\n", PGCTRL); + proc_net_remove(PG_PROC_DIR); return -EINVAL; } - module_proc_ent->proc_fops = &pktgen_fops; - module_proc_ent->data = NULL; + pe->proc_fops = &pktgen_fops; + pe->data = NULL; /* Register us to receive netdevice events */ register_netdevice_notifier(&pktgen_notifier_block); - for (cpu = 0; cpu < NR_CPUS ; cpu++) { + for_each_online_cpu(cpu) { char buf[30]; - if (!cpu_online(cpu)) - continue; - sprintf(buf, "kpktgend_%i", cpu); pktgen_create_thread(buf, cpu); } @@ -3110,10 +3049,8 @@ static void __exit pg_cleanup(void) unregister_netdevice_notifier(&pktgen_notifier_block); /* Clean up proc file system */ - - remove_proc_entry(module_fname, NULL); - - remove_proc_dir(); + remove_proc_entry(PGCTRL, pg_proc_dir); + proc_net_remove(PG_PROC_DIR); } diff --git a/net/core/request_sock.c b/net/core/request_sock.c index bb55675f0685..b8203de5ff07 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -32,7 +32,6 @@ * Further increasing requires to change hash table size. */ int sysctl_max_syn_backlog = 256; -EXPORT_SYMBOL(sysctl_max_syn_backlog); int reqsk_queue_alloc(struct request_sock_queue *queue, const int nr_table_entries) @@ -53,6 +52,8 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); rwlock_init(&queue->syn_wait_lock); queue->rskq_accept_head = queue->rskq_accept_head = NULL; + queue->rskq_defer_accept = 0; + lopt->nr_table_entries = nr_table_entries; write_lock_bh(&queue->syn_wait_lock); queue->listen_opt = lopt; @@ -62,3 +63,28 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, } EXPORT_SYMBOL(reqsk_queue_alloc); + +void reqsk_queue_destroy(struct request_sock_queue *queue) +{ + /* make all the listen_opt local to us */ + struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); + + if (lopt->qlen != 0) { + int i; + + for (i = 0; i < lopt->nr_table_entries; i++) { + struct request_sock *req; + + while ((req = lopt->syn_table[i]) != NULL) { + lopt->syn_table[i] = req->dl_next; + lopt->qlen--; + reqsk_free(req); + } + } + } + + BUG_TRAP(lopt->qlen == 0); + kfree(lopt); +} + +EXPORT_SYMBOL(reqsk_queue_destroy); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4b1bb30e6381..9bed7569ce3f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -148,7 +148,7 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) { int err = 0; - NETLINK_CB(skb).dst_groups = group; + NETLINK_CB(skb).dst_group = group; if (echo) atomic_inc(&skb->users); netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL); @@ -458,8 +458,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) kfree_skb(skb); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_LINK; - netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL); + NETLINK_CB(skb).dst_group = RTNLGRP_LINK; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL); } static int rtnetlink_done(struct netlink_callback *cb) @@ -708,7 +708,8 @@ void __init rtnetlink_init(void) if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv); + rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv, + THIS_MODULE); if (rtnl == NULL) panic("rtnetlink_init: cannot initialize rtnetlink\n"); netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7eab867ede59..95501e40100e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -68,7 +68,8 @@ #include <asm/uaccess.h> #include <asm/system.h> -static kmem_cache_t *skbuff_head_cache; +static kmem_cache_t *skbuff_head_cache __read_mostly; +static kmem_cache_t *skbuff_fclone_cache __read_mostly; /* * Keep out-of-line to prevent kernel bloat. @@ -118,9 +119,11 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) */ /** - * alloc_skb - allocate a network buffer + * __alloc_skb - allocate a network buffer * @size: size to allocate * @gfp_mask: allocation mask + * @fclone: allocate from fclone cache instead of head cache + * and allocate a cloned (child) skb * * Allocate a new &sk_buff. The returned buffer has no headroom and a * tail room of size bytes. The object has a reference count of one. @@ -129,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ -struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) +struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, + int fclone) { struct sk_buff *skb; u8 *data; /* Get the HEAD */ - skb = kmem_cache_alloc(skbuff_head_cache, - gfp_mask & ~__GFP_DMA); + if (fclone) + skb = kmem_cache_alloc(skbuff_fclone_cache, + gfp_mask & ~__GFP_DMA); + else + skb = kmem_cache_alloc(skbuff_head_cache, + gfp_mask & ~__GFP_DMA); + if (!skb) goto out; @@ -153,12 +162,22 @@ struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) skb->data = data; skb->tail = data; skb->end = data + size; + if (fclone) { + struct sk_buff *child = skb + 1; + atomic_t *fclone_ref = (atomic_t *) (child + 1); + skb->fclone = SKB_FCLONE_ORIG; + atomic_set(fclone_ref, 1); + + child->fclone = SKB_FCLONE_UNAVAILABLE; + } atomic_set(&(skb_shinfo(skb)->dataref), 1); skb_shinfo(skb)->nr_frags = 0; skb_shinfo(skb)->tso_size = 0; skb_shinfo(skb)->tso_segs = 0; skb_shinfo(skb)->frag_list = NULL; + skb_shinfo(skb)->ufo_size = 0; + skb_shinfo(skb)->ip6_frag_id = 0; out: return skb; nodata: @@ -183,7 +202,7 @@ nodata: */ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { struct sk_buff *skb; u8 *data; @@ -266,8 +285,34 @@ void skb_release_data(struct sk_buff *skb) */ void kfree_skbmem(struct sk_buff *skb) { + struct sk_buff *other; + atomic_t *fclone_ref; + skb_release_data(skb); - kmem_cache_free(skbuff_head_cache, skb); + switch (skb->fclone) { + case SKB_FCLONE_UNAVAILABLE: + kmem_cache_free(skbuff_head_cache, skb); + break; + + case SKB_FCLONE_ORIG: + fclone_ref = (atomic_t *) (skb + 2); + if (atomic_dec_and_test(fclone_ref)) + kmem_cache_free(skbuff_fclone_cache, skb); + break; + + case SKB_FCLONE_CLONE: + fclone_ref = (atomic_t *) (skb + 1); + other = skb - 1; + + /* The clone portion is available for + * fast-cloning again. + */ + skb->fclone = SKB_FCLONE_UNAVAILABLE; + + if (atomic_dec_and_test(fclone_ref)) + kmem_cache_free(skbuff_fclone_cache, other); + break; + }; } /** @@ -281,8 +326,6 @@ void kfree_skbmem(struct sk_buff *skb) void __kfree_skb(struct sk_buff *skb) { - BUG_ON(skb->list != NULL); - dst_release(skb->dst); #ifdef CONFIG_XFRM secpath_put(skb->sp); @@ -302,7 +345,6 @@ void __kfree_skb(struct sk_buff *skb) skb->tc_index = 0; #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = 0; - skb->tc_classid = 0; #endif #endif @@ -323,21 +365,29 @@ void __kfree_skb(struct sk_buff *skb) * %GFP_ATOMIC. */ -struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) +struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) { - struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); - - if (!n) - return NULL; + struct sk_buff *n; + + n = skb + 1; + if (skb->fclone == SKB_FCLONE_ORIG && + n->fclone == SKB_FCLONE_UNAVAILABLE) { + atomic_t *fclone_ref = (atomic_t *) (n + 1); + n->fclone = SKB_FCLONE_CLONE; + atomic_inc(fclone_ref); + } else { + n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); + if (!n) + return NULL; + n->fclone = SKB_FCLONE_UNAVAILABLE; + } #define C(x) n->x = skb->x n->next = n->prev = NULL; - n->list = NULL; n->sk = NULL; - C(stamp); + C(tstamp); C(dev); - C(real_dev); C(h); C(nh); C(mac); @@ -361,18 +411,17 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) n->destructor = NULL; #ifdef CONFIG_NETFILTER C(nfmark); - C(nfcache); C(nfct); nf_conntrack_get(skb->nfct); C(nfctinfo); +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + C(ipvs_property); +#endif #ifdef CONFIG_BRIDGE_NETFILTER C(nf_bridge); nf_bridge_get(skb->nf_bridge); #endif #endif /*CONFIG_NETFILTER*/ -#if defined(CONFIG_HIPPI) - C(private); -#endif #ifdef CONFIG_NET_SCHED C(tc_index); #ifdef CONFIG_NET_CLS_ACT @@ -380,7 +429,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); n->tc_verd = CLR_TC_MUNGED(n->tc_verd); C(input_dev); - C(tc_classid); #endif #endif @@ -404,10 +452,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) */ unsigned long offset = new->data - old->data; - new->list = NULL; new->sk = NULL; new->dev = old->dev; - new->real_dev = old->real_dev; new->priority = old->priority; new->protocol = old->protocol; new->dst = dst_clone(old->dst); @@ -419,15 +465,18 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mac.raw = old->mac.raw + offset; memcpy(new->cb, old->cb, sizeof(old->cb)); new->local_df = old->local_df; + new->fclone = SKB_FCLONE_UNAVAILABLE; new->pkt_type = old->pkt_type; - new->stamp = old->stamp; + new->tstamp = old->tstamp; new->destructor = NULL; #ifdef CONFIG_NETFILTER new->nfmark = old->nfmark; - new->nfcache = old->nfcache; new->nfct = old->nfct; nf_conntrack_get(old->nfct); new->nfctinfo = old->nfctinfo; +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + new->ipvs_property = old->ipvs_property; +#endif #ifdef CONFIG_BRIDGE_NETFILTER new->nf_bridge = old->nf_bridge; nf_bridge_get(old->nf_bridge); @@ -461,7 +510,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) * header is going to be modified. Use pskb_copy() instead. */ -struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_mask) +struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { int headerlen = skb->data - skb->head; /* @@ -500,7 +549,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_ma * The returned buffer has a reference count of 1. */ -struct sk_buff *pskb_copy(struct sk_buff *skb, unsigned int __nocast gfp_mask) +struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) { /* * Allocate the copy buffer @@ -559,7 +608,7 @@ out: */ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { int i; u8 *data; @@ -650,7 +699,7 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) */ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { /* * Allocate the copy buffer @@ -1344,50 +1393,43 @@ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) __skb_queue_tail(list, newsk); spin_unlock_irqrestore(&list->lock, flags); } + /** * skb_unlink - remove a buffer from a list * @skb: buffer to remove + * @list: list to use * - * Place a packet after a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls + * Remove a packet from a list. The list locks are taken and this + * function is atomic with respect to other list locked calls * - * Works even without knowing the list it is sitting on, which can be - * handy at times. It also means that THE LIST MUST EXIST when you - * unlink. Thus a list must have its contents unlinked before it is - * destroyed. + * You must know what list the SKB is on. */ -void skb_unlink(struct sk_buff *skb) +void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { - struct sk_buff_head *list = skb->list; - - if (list) { - unsigned long flags; + unsigned long flags; - spin_lock_irqsave(&list->lock, flags); - if (skb->list == list) - __skb_unlink(skb, skb->list); - spin_unlock_irqrestore(&list->lock, flags); - } + spin_lock_irqsave(&list->lock, flags); + __skb_unlink(skb, list); + spin_unlock_irqrestore(&list->lock, flags); } - /** * skb_append - append a buffer * @old: buffer to insert after * @newsk: buffer to insert + * @list: list to use * * Place a packet after a given packet in a list. The list locks are taken * and this function is atomic with respect to other list locked calls. * A buffer cannot be placed on two lists at the same time. */ - -void skb_append(struct sk_buff *old, struct sk_buff *newsk) +void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { unsigned long flags; - spin_lock_irqsave(&old->list->lock, flags); - __skb_append(old, newsk); - spin_unlock_irqrestore(&old->list->lock, flags); + spin_lock_irqsave(&list->lock, flags); + __skb_append(old, newsk, list); + spin_unlock_irqrestore(&list->lock, flags); } @@ -1395,19 +1437,21 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk) * skb_insert - insert a buffer * @old: buffer to insert before * @newsk: buffer to insert + * @list: list to use + * + * Place a packet before a given packet in a list. The list locks are + * taken and this function is atomic with respect to other list locked + * calls. * - * Place a packet before a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls * A buffer cannot be placed on two lists at the same time. */ - -void skb_insert(struct sk_buff *old, struct sk_buff *newsk) +void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { unsigned long flags; - spin_lock_irqsave(&old->list->lock, flags); - __skb_insert(newsk, old->prev, old, old->list); - spin_unlock_irqrestore(&old->list->lock, flags); + spin_lock_irqsave(&list->lock, flags); + __skb_insert(newsk, old->prev, old, list); + spin_unlock_irqrestore(&list->lock, flags); } #if 0 @@ -1654,6 +1698,78 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, return textsearch_find(config, state); } +/** + * skb_append_datato_frags: - append the user data to a skb + * @sk: sock structure + * @skb: skb structure to be appened with user data. + * @getfrag: call back function to be used for getting the user data + * @from: pointer to user message iov + * @length: length of the iov message + * + * Description: This procedure append the user data in the fragment part + * of the skb if any page alloc fails user this procedure returns -ENOMEM + */ +int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length) +{ + int frg_cnt = 0; + skb_frag_t *frag = NULL; + struct page *page = NULL; + int copy, left; + int offset = 0; + int ret; + + do { + /* Return error if we don't have space for new frag */ + frg_cnt = skb_shinfo(skb)->nr_frags; + if (frg_cnt >= MAX_SKB_FRAGS) + return -EFAULT; + + /* allocate a new page for next frag */ + page = alloc_pages(sk->sk_allocation, 0); + + /* If alloc_page fails just return failure and caller will + * free previous allocated pages by doing kfree_skb() + */ + if (page == NULL) + return -ENOMEM; + + /* initialize the next frag */ + sk->sk_sndmsg_page = page; + sk->sk_sndmsg_off = 0; + skb_fill_page_desc(skb, frg_cnt, page, 0, 0); + skb->truesize += PAGE_SIZE; + atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); + + /* get the new initialized frag */ + frg_cnt = skb_shinfo(skb)->nr_frags; + frag = &skb_shinfo(skb)->frags[frg_cnt - 1]; + + /* copy the user data to page */ + left = PAGE_SIZE - frag->page_offset; + copy = (length > left)? left : length; + + ret = getfrag(from, (page_address(frag->page) + + frag->page_offset + frag->size), + offset, copy, 0, skb); + if (ret < 0) + return -EFAULT; + + /* copy was successful so update the size parameters */ + sk->sk_sndmsg_off += copy; + frag->size += copy; + skb->len += copy; + skb->data_len += copy; + offset += copy; + length -= copy; + + } while (length > 0); + + return 0; +} + void __init skb_init(void) { skbuff_head_cache = kmem_cache_create("skbuff_head_cache", @@ -1663,12 +1779,21 @@ void __init skb_init(void) NULL, NULL); if (!skbuff_head_cache) panic("cannot create skbuff cache"); + + skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", + (2*sizeof(struct sk_buff)) + + sizeof(atomic_t), + 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!skbuff_fclone_cache) + panic("cannot create skbuff cache"); } EXPORT_SYMBOL(___pskb_trim); EXPORT_SYMBOL(__kfree_skb); EXPORT_SYMBOL(__pskb_pull_tail); -EXPORT_SYMBOL(alloc_skb); +EXPORT_SYMBOL(__alloc_skb); EXPORT_SYMBOL(pskb_copy); EXPORT_SYMBOL(pskb_expand_head); EXPORT_SYMBOL(skb_checksum); @@ -1696,3 +1821,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read); EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text); +EXPORT_SYMBOL(skb_append_datato_frags); diff --git a/net/core/sock.c b/net/core/sock.c index 12f6d9a2a522..9602ceb3bac9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -260,7 +260,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, if (val > sysctl_wmem_max) val = sysctl_wmem_max; - +set_sndbuf: sk->sk_userlocks |= SOCK_SNDBUF_LOCK; if ((val * 2) < SOCK_MIN_SNDBUF) sk->sk_sndbuf = SOCK_MIN_SNDBUF; @@ -274,6 +274,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname, sk->sk_write_space(sk); break; + case SO_SNDBUFFORCE: + if (!capable(CAP_NET_ADMIN)) { + ret = -EPERM; + break; + } + goto set_sndbuf; + case SO_RCVBUF: /* Don't error on this BSD doesn't and if you think about it this is right. Otherwise apps have to @@ -282,7 +289,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, if (val > sysctl_rmem_max) val = sysctl_rmem_max; - +set_rcvbuf: sk->sk_userlocks |= SOCK_RCVBUF_LOCK; /* FIXME: is this lower bound the right one? */ if ((val * 2) < SOCK_MIN_RCVBUF) @@ -291,6 +298,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname, sk->sk_rcvbuf = val * 2; break; + case SO_RCVBUFFORCE: + if (!capable(CAP_NET_ADMIN)) { + ret = -EPERM; + break; + } + goto set_rcvbuf; + case SO_KEEPALIVE: #ifdef CONFIG_INET if (sk->sk_protocol == IPPROTO_TCP) @@ -327,11 +341,11 @@ int sock_setsockopt(struct socket *sock, int level, int optname, sock_reset_flag(sk, SOCK_LINGER); else { #if (BITS_PER_LONG == 32) - if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ) + if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ) sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT; else #endif - sk->sk_lingertime = ling.l_linger * HZ; + sk->sk_lingertime = (unsigned int)ling.l_linger * HZ; sock_set_flag(sk, SOCK_LINGER); } break; @@ -623,7 +637,7 @@ lenout: * @prot: struct proto associated with this new sock instance * @zero_it: if we should zero the newly allocated sock */ -struct sock *sk_alloc(int family, unsigned int __nocast priority, +struct sock *sk_alloc(int family, gfp_t priority, struct proto *prot, int zero_it) { struct sock *sk = NULL; @@ -646,16 +660,20 @@ struct sock *sk_alloc(int family, unsigned int __nocast priority, sock_lock_init(sk); } - if (security_sk_alloc(sk, family, priority)) { - if (slab != NULL) - kmem_cache_free(slab, sk); - else - kfree(sk); - sk = NULL; - } else - __module_get(prot->owner); + if (security_sk_alloc(sk, family, priority)) + goto out_free; + + if (!try_module_get(prot->owner)) + goto out_free; } return sk; + +out_free: + if (slab != NULL) + kmem_cache_free(slab, sk); + else + kfree(sk); + return NULL; } void sk_free(struct sock *sk) @@ -686,6 +704,80 @@ void sk_free(struct sock *sk) module_put(owner); } +struct sock *sk_clone(const struct sock *sk, const gfp_t priority) +{ + struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0); + + if (newsk != NULL) { + struct sk_filter *filter; + + memcpy(newsk, sk, sk->sk_prot->obj_size); + + /* SANITY */ + sk_node_init(&newsk->sk_node); + sock_lock_init(newsk); + bh_lock_sock(newsk); + + atomic_set(&newsk->sk_rmem_alloc, 0); + atomic_set(&newsk->sk_wmem_alloc, 0); + atomic_set(&newsk->sk_omem_alloc, 0); + skb_queue_head_init(&newsk->sk_receive_queue); + skb_queue_head_init(&newsk->sk_write_queue); + + rwlock_init(&newsk->sk_dst_lock); + rwlock_init(&newsk->sk_callback_lock); + + newsk->sk_dst_cache = NULL; + newsk->sk_wmem_queued = 0; + newsk->sk_forward_alloc = 0; + newsk->sk_send_head = NULL; + newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; + newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; + + sock_reset_flag(newsk, SOCK_DONE); + skb_queue_head_init(&newsk->sk_error_queue); + + filter = newsk->sk_filter; + if (filter != NULL) + sk_filter_charge(newsk, filter); + + if (unlikely(xfrm_sk_clone_policy(newsk))) { + /* It is still raw copy of parent, so invalidate + * destructor and make plain sk_free() */ + newsk->sk_destruct = NULL; + sk_free(newsk); + newsk = NULL; + goto out; + } + + newsk->sk_err = 0; + newsk->sk_priority = 0; + atomic_set(&newsk->sk_refcnt, 2); + + /* + * Increment the counter in the same struct proto as the master + * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that + * is the same as sk->sk_prot->socks, as this field was copied + * with memcpy). + * + * This _changes_ the previous behaviour, where + * tcp_create_openreq_child always was incrementing the + * equivalent to tcp_prot->socks (inet_sock_nr), so this have + * to be taken into account in all callers. -acme + */ + sk_refcnt_debug_inc(newsk); + newsk->sk_socket = NULL; + newsk->sk_sleep = NULL; + + if (newsk->sk_prot->sockets_allocated) + atomic_inc(newsk->sk_prot->sockets_allocated); + } +out: + return newsk; +} + +EXPORT_SYMBOL_GPL(sk_clone); + void __init sk_init(void) { if (num_physpages <= 4096) { @@ -753,7 +845,7 @@ unsigned long sock_i_ino(struct sock *sk) * Allocate a skb from the socket's send buffer. */ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, - unsigned int __nocast priority) + gfp_t priority) { if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { struct sk_buff * skb = alloc_skb(size, priority); @@ -769,7 +861,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, * Allocate a skb from the socket's receive buffer. */ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, - unsigned int __nocast priority) + gfp_t priority) { if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { struct sk_buff *skb = alloc_skb(size, priority); @@ -784,7 +876,7 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, /* * Allocate a memory block from the socket's option memory buffer. */ -void *sock_kmalloc(struct sock *sk, int size, unsigned int __nocast priority) +void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) { if ((unsigned)size <= sysctl_optmem_max && atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { @@ -848,7 +940,7 @@ static struct sk_buff *sock_alloc_send_pskb(struct sock *sk, int noblock, int *errcode) { struct sk_buff *skb; - unsigned int gfp_mask; + gfp_t gfp_mask; long timeo; int err; @@ -1353,11 +1445,7 @@ void sk_common_release(struct sock *sk) xfrm_sk_free_policy(sk); -#ifdef INET_REFCNT_DEBUG - if (atomic_read(&sk->sk_refcnt) != 1) - printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n", - sk, atomic_read(&sk->sk_refcnt)); -#endif + sk_refcnt_debug_release(sk); sock_put(sk); } @@ -1368,7 +1456,8 @@ static LIST_HEAD(proto_list); int proto_register(struct proto *prot, int alloc_slab) { - char *request_sock_slab_name; + char *request_sock_slab_name = NULL; + char *timewait_sock_slab_name; int rc = -ENOBUFS; if (alloc_slab) { @@ -1399,6 +1488,23 @@ int proto_register(struct proto *prot, int alloc_slab) goto out_free_request_sock_slab_name; } } + + if (prot->twsk_obj_size) { + static const char mask[] = "tw_sock_%s"; + + timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); + + if (timewait_sock_slab_name == NULL) + goto out_free_request_sock_slab; + + sprintf(timewait_sock_slab_name, mask, prot->name); + prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name, + prot->twsk_obj_size, + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (prot->twsk_slab == NULL) + goto out_free_timewait_sock_slab_name; + } } write_lock(&proto_list_lock); @@ -1407,6 +1513,13 @@ int proto_register(struct proto *prot, int alloc_slab) rc = 0; out: return rc; +out_free_timewait_sock_slab_name: + kfree(timewait_sock_slab_name); +out_free_request_sock_slab: + if (prot->rsk_prot && prot->rsk_prot->slab) { + kmem_cache_destroy(prot->rsk_prot->slab); + prot->rsk_prot->slab = NULL; + } out_free_request_sock_slab_name: kfree(request_sock_slab_name); out_free_sock_slab: @@ -1420,6 +1533,8 @@ EXPORT_SYMBOL(proto_register); void proto_unregister(struct proto *prot) { write_lock(&proto_list_lock); + list_del(&prot->node); + write_unlock(&proto_list_lock); if (prot->slab != NULL) { kmem_cache_destroy(prot->slab); @@ -1434,8 +1549,13 @@ void proto_unregister(struct proto *prot) prot->rsk_prot->slab = NULL; } - list_del(&prot->node); - write_unlock(&proto_list_lock); + if (prot->twsk_slab != NULL) { + const char *name = kmem_cache_name(prot->twsk_slab); + + kmem_cache_destroy(prot->twsk_slab); + kfree(name); + prot->twsk_slab = NULL; + } } EXPORT_SYMBOL(proto_unregister); @@ -1602,8 +1722,8 @@ EXPORT_SYMBOL(sock_wfree); EXPORT_SYMBOL(sock_wmalloc); EXPORT_SYMBOL(sock_i_uid); EXPORT_SYMBOL(sock_i_ino); -#ifdef CONFIG_SYSCTL EXPORT_SYMBOL(sysctl_optmem_max); +#ifdef CONFIG_SYSCTL EXPORT_SYMBOL(sysctl_rmem_max); EXPORT_SYMBOL(sysctl_wmem_max); #endif diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 8f817ad9f546..2f278c8e4743 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -9,23 +9,18 @@ #include <linux/sysctl.h> #include <linux/config.h> #include <linux/module.h> +#include <linux/socket.h> +#include <net/sock.h> #ifdef CONFIG_SYSCTL extern int netdev_max_backlog; -extern int netdev_budget; extern int weight_p; -extern int net_msg_cost; -extern int net_msg_burst; extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; -extern __u32 sysctl_wmem_default; -extern __u32 sysctl_rmem_default; extern int sysctl_core_destroy_delay; -extern int sysctl_optmem_max; -extern int sysctl_somaxconn; #ifdef CONFIG_NET_DIVERT extern char sysctl_divert_version[]; diff --git a/net/core/utils.c b/net/core/utils.c index 88eb8b68e26b..7b5970fc9e40 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -16,7 +16,9 @@ #include <linux/module.h> #include <linux/jiffies.h> #include <linux/kernel.h> +#include <linux/inet.h> #include <linux/mm.h> +#include <linux/net.h> #include <linux/string.h> #include <linux/types.h> #include <linux/random.h> diff --git a/net/core/wireless.c b/net/core/wireless.c index 3ff5639c0b78..271ddb35b0b2 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -58,6 +58,13 @@ * o Add wmb() in iw_handler_set_spy() for non-coherent archs/cpus * Based on patch from Pavel Roskin <proski@gnu.org> : * o Fix kernel data leak to user space in private handler handling + * + * v7 - 18.3.05 - Jean II + * o Remove (struct iw_point *)->pointer from events and streams + * o Remove spy_offset from struct iw_handler_def + * o Start deprecating dev->get_wireless_stats, output a warning + * o If IW_QUAL_DBM is set, show dBm values in /proc/net/wireless + * o Don't loose INVALID/DBM flags when clearing UPDATED flags (iwstats) */ /***************************** INCLUDES *****************************/ @@ -446,10 +453,19 @@ static inline struct iw_statistics *get_wireless_stats(struct net_device *dev) (dev->wireless_handlers->get_wireless_stats != NULL)) return dev->wireless_handlers->get_wireless_stats(dev); - /* Old location, will be phased out in next WE */ - return (dev->get_wireless_stats ? - dev->get_wireless_stats(dev) : - (struct iw_statistics *) NULL); + /* Old location, field to be removed in next WE */ + if(dev->get_wireless_stats) { + static int printed_message; + + if (!printed_message++) + printk(KERN_DEBUG "%s (WE) : Driver using old /proc/net/wireless support, please fix driver !\n", + dev->name); + + return dev->get_wireless_stats(dev); + } + + /* Not found */ + return (struct iw_statistics *) NULL; } /* ---------------------------------------------------------------- */ @@ -541,16 +557,18 @@ static __inline__ void wireless_seq_printf_stats(struct seq_file *seq, dev->name, stats->status, stats->qual.qual, stats->qual.updated & IW_QUAL_QUAL_UPDATED ? '.' : ' ', - ((__u8) stats->qual.level), + ((__s32) stats->qual.level) - + ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0), stats->qual.updated & IW_QUAL_LEVEL_UPDATED ? '.' : ' ', - ((__u8) stats->qual.noise), + ((__s32) stats->qual.noise) - + ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0), stats->qual.updated & IW_QUAL_NOISE_UPDATED ? '.' : ' ', stats->discard.nwid, stats->discard.code, stats->discard.fragment, stats->discard.retries, stats->discard.misc, stats->miss.beacon); - stats->qual.updated = 0; + stats->qual.updated &= ~IW_QUAL_ALL_UPDATED; } } @@ -571,10 +589,6 @@ static int wireless_seq_show(struct seq_file *seq, void *v) return 0; } -extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); -extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); -extern void dev_seq_stop(struct seq_file *seq, void *v); - static struct seq_operations wireless_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, @@ -597,6 +611,7 @@ static struct file_operations wireless_seq_fops = { int __init wireless_proc_init(void) { + /* Create /proc/net/wireless entry */ if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops)) return -ENOMEM; @@ -631,9 +646,9 @@ static inline int dev_iwstats(struct net_device *dev, struct ifreq *ifr) sizeof(struct iw_statistics))) return -EFAULT; - /* Check if we need to clear the update flag */ + /* Check if we need to clear the updated flag */ if(wrq->u.data.flags != 0) - stats->qual.updated = 0; + stats->qual.updated &= ~IW_QUAL_ALL_UPDATED; return 0; } else return -EOPNOTSUPP; @@ -1144,8 +1159,8 @@ static inline void rtmsg_iwinfo(struct net_device * dev, kfree_skb(skb); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_LINK; - netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_LINK; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); } #endif /* WE_EVENT_NETLINK */ @@ -1165,10 +1180,11 @@ void wireless_send_event(struct net_device * dev, struct iw_event *event; /* Mallocated whole event */ int event_len; /* Its size */ int hdr_len; /* Size of the event header */ + int wrqu_off = 0; /* Offset in wrqu */ /* Don't "optimise" the following variable, it will crash */ unsigned cmd_index; /* *MUST* be unsigned */ - /* Get the description of the IOCTL */ + /* Get the description of the Event */ if(cmd <= SIOCIWLAST) { cmd_index = cmd - SIOCIWFIRST; if(cmd_index < standard_ioctl_num) @@ -1211,6 +1227,8 @@ void wireless_send_event(struct net_device * dev, /* Calculate extra_len - extra is NULL for restricted events */ if(extra != NULL) extra_len = wrqu->data.length * descr->token_size; + /* Always at an offset in wrqu */ + wrqu_off = IW_EV_POINT_OFF; #ifdef WE_EVENT_DEBUG printk(KERN_DEBUG "%s (WE) : Event 0x%04X, tokens %d, extra_len %d\n", dev->name, cmd, wrqu->data.length, extra_len); #endif /* WE_EVENT_DEBUG */ @@ -1221,7 +1239,7 @@ void wireless_send_event(struct net_device * dev, event_len = hdr_len + extra_len; #ifdef WE_EVENT_DEBUG - printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, event_len %d\n", dev->name, cmd, hdr_len, event_len); + printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, wrqu_off %d, event_len %d\n", dev->name, cmd, hdr_len, wrqu_off, event_len); #endif /* WE_EVENT_DEBUG */ /* Create temporary buffer to hold the event */ @@ -1232,7 +1250,7 @@ void wireless_send_event(struct net_device * dev, /* Fill event */ event->len = event_len; event->cmd = cmd; - memcpy(&event->u, wrqu, hdr_len - IW_EV_LCP_LEN); + memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN); if(extra != NULL) memcpy(((char *) event) + hdr_len, extra, extra_len); @@ -1253,7 +1271,7 @@ void wireless_send_event(struct net_device * dev, * Now, the driver can delegate this task to Wireless Extensions. * It needs to use those standard spy iw_handler in struct iw_handler_def, * push data to us via wireless_spy_update() and include struct iw_spy_data - * in its private part (and advertise it in iw_handler_def->spy_offset). + * in its private part (and export it in net_device->wireless_data->spy_data). * One of the main advantage of centralising spy support here is that * it becomes much easier to improve and extend it without having to touch * the drivers. One example is the addition of the Spy-Threshold events. @@ -1270,10 +1288,7 @@ static inline struct iw_spy_data * get_spydata(struct net_device *dev) /* This is the new way */ if(dev->wireless_data) return(dev->wireless_data->spy_data); - - /* This is the old way. Doesn't work for multi-headed drivers. - * It will be removed in the next version of WE. */ - return (dev->priv + dev->wireless_handlers->spy_offset); + return NULL; } /*------------------------------------------------------------------*/ @@ -1288,10 +1303,6 @@ int iw_handler_set_spy(struct net_device * dev, struct iw_spy_data * spydata = get_spydata(dev); struct sockaddr * address = (struct sockaddr *) extra; - if(!dev->wireless_data) - /* Help user know that driver needs updating */ - printk(KERN_DEBUG "%s (WE) : Driver using old/buggy spy support, please fix driver !\n", - dev->name); /* Make sure driver is not buggy or using the old API */ if(!spydata) return -EOPNOTSUPP; @@ -1322,7 +1333,7 @@ int iw_handler_set_spy(struct net_device * dev, sizeof(struct iw_quality) * IW_MAX_SPY); #ifdef WE_SPY_DEBUG - printk(KERN_DEBUG "iw_handler_set_spy() : offset %ld, spydata %p, num %d\n", dev->wireless_handlers->spy_offset, spydata, wrqu->data.length); + printk(KERN_DEBUG "iw_handler_set_spy() : wireless_data %p, spydata %p, num %d\n", dev->wireless_data, spydata, wrqu->data.length); for (i = 0; i < wrqu->data.length; i++) printk(KERN_DEBUG "%02X:%02X:%02X:%02X:%02X:%02X \n", @@ -1375,7 +1386,7 @@ int iw_handler_get_spy(struct net_device * dev, sizeof(struct iw_quality) * spydata->spy_number); /* Reset updated flags. */ for(i = 0; i < spydata->spy_number; i++) - spydata->spy_stat[i].updated = 0; + spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED; return 0; } @@ -1490,7 +1501,7 @@ void wireless_spy_update(struct net_device * dev, return; #ifdef WE_SPY_DEBUG - printk(KERN_DEBUG "wireless_spy_update() : offset %ld, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_handlers->spy_offset, spydata, address[0], address[1], address[2], address[3], address[4], address[5]); + printk(KERN_DEBUG "wireless_spy_update() : wireless_data %p, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_data, spydata, address[0], address[1], address[2], address[3], address[4], address[5]); #endif /* WE_SPY_DEBUG */ /* Update all records that match */ |