summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c87
-rw-r--r--net/core/dev.c108
-rw-r--r--net/core/dst.c3
-rw-r--r--net/core/ethtool.c102
-rw-r--r--net/core/filter.c6
-rw-r--r--net/core/flow.c2
-rw-r--r--net/core/neighbour.c91
-rw-r--r--net/core/netfilter.c648
-rw-r--r--net/core/netpoll.c69
-rw-r--r--net/core/pktgen.c531
-rw-r--r--net/core/request_sock.c28
-rw-r--r--net/core/rtnetlink.c9
-rw-r--r--net/core/skbuff.c248
-rw-r--r--net/core/sock.c172
-rw-r--r--net/core/sysctl_net_core.c9
-rw-r--r--net/core/utils.c2
-rw-r--r--net/core/wireless.c71
18 files changed, 930 insertions, 1257 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index f5f5e58943e8..630da0f0579e 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -12,7 +12,6 @@ obj-y += dev.o ethtool.o dev_mcast.o dst.o \
obj-$(CONFIG_XFRM) += flow.o
obj-$(CONFIG_SYSFS) += net-sysfs.o
-obj-$(CONFIG_NETFILTER) += netfilter.o
obj-$(CONFIG_NET_DIVERT) += dv.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_NET_RADIO) += wireless.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index fcee054b6f75..d219435d086c 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -43,7 +43,6 @@
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/inet.h>
-#include <linux/tcp.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/poll.h>
@@ -51,9 +50,10 @@
#include <net/protocol.h>
#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/checksum.h>
+#include <net/checksum.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
/*
* Is a socket 'connection oriented' ?
@@ -211,74 +211,49 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
struct iovec *to, int len)
{
- int start = skb_headlen(skb);
- int i, copy = start - offset;
+ int i, err, fraglen, end = 0;
+ struct sk_buff *next = skb_shinfo(skb)->frag_list;
- /* Copy header. */
- if (copy > 0) {
- if (copy > len)
- copy = len;
- if (memcpy_toiovec(to, skb->data + offset, copy))
- goto fault;
- if ((len -= copy) == 0)
- return 0;
- offset += copy;
- }
+ if (!len)
+ return 0;
- /* Copy paged appendix. Hmm... why does this look so complicated? */
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- int end;
+next_skb:
+ fraglen = skb_headlen(skb);
+ i = -1;
- BUG_TRAP(start <= offset + len);
+ while (1) {
+ int start = end;
- end = start + skb_shinfo(skb)->frags[i].size;
- if ((copy = end - offset) > 0) {
- int err;
- u8 *vaddr;
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- struct page *page = frag->page;
+ if ((end += fraglen) > offset) {
+ int copy = end - offset, o = offset - start;
if (copy > len)
copy = len;
- vaddr = kmap(page);
- err = memcpy_toiovec(to, vaddr + frag->page_offset +
- offset - start, copy);
- kunmap(page);
+ if (i == -1)
+ err = memcpy_toiovec(to, skb->data + o, copy);
+ else {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ struct page *page = frag->page;
+ void *p = kmap(page) + frag->page_offset + o;
+ err = memcpy_toiovec(to, p, copy);
+ kunmap(page);
+ }
if (err)
goto fault;
if (!(len -= copy))
return 0;
offset += copy;
}
- start = end;
+ if (++i >= skb_shinfo(skb)->nr_frags)
+ break;
+ fraglen = skb_shinfo(skb)->frags[i].size;
}
-
- if (skb_shinfo(skb)->frag_list) {
- struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
- for (; list; list = list->next) {
- int end;
-
- BUG_TRAP(start <= offset + len);
-
- end = start + list->len;
- if ((copy = end - offset) > 0) {
- if (copy > len)
- copy = len;
- if (skb_copy_datagram_iovec(list,
- offset - start,
- to, copy))
- goto fault;
- if ((len -= copy) == 0)
- return 0;
- offset += copy;
- }
- start = end;
- }
+ if (next) {
+ skb = next;
+ BUG_ON(skb_shinfo(skb)->frag_list);
+ next = skb->next;
+ goto next_skb;
}
- if (!len)
- return 0;
-
fault:
return -EFAULT;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 52a3bf7ae177..8d1541595277 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -267,10 +267,6 @@ void dev_add_pack(struct packet_type *pt)
spin_unlock_bh(&ptype_lock);
}
-extern void linkwatch_run_queue(void);
-
-
-
/**
* __dev_remove_pack - remove packet handler
* @pt: packet type declaration
@@ -578,6 +574,8 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
return dev;
}
+EXPORT_SYMBOL(dev_getbyhwaddr);
+
struct net_device *dev_getfirstbyhwtype(unsigned short type)
{
struct net_device *dev;
@@ -1009,13 +1007,22 @@ void net_disable_timestamp(void)
atomic_dec(&netstamp_needed);
}
-static inline void net_timestamp(struct timeval *stamp)
+void __net_timestamp(struct sk_buff *skb)
+{
+ struct timeval tv;
+
+ do_gettimeofday(&tv);
+ skb_set_timestamp(skb, &tv);
+}
+EXPORT_SYMBOL(__net_timestamp);
+
+static inline void net_timestamp(struct sk_buff *skb)
{
if (atomic_read(&netstamp_needed))
- do_gettimeofday(stamp);
+ __net_timestamp(skb);
else {
- stamp->tv_sec = 0;
- stamp->tv_usec = 0;
+ skb->tstamp.off_sec = 0;
+ skb->tstamp.off_usec = 0;
}
}
@@ -1027,7 +1034,8 @@ static inline void net_timestamp(struct timeval *stamp)
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
struct packet_type *ptype;
- net_timestamp(&skb->stamp);
+
+ net_timestamp(skb);
rcu_read_lock();
list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -1058,7 +1066,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
skb2->h.raw = skb2->nh.raw;
skb2->pkt_type = PACKET_OUTGOING;
- ptype->func(skb2, skb->dev, ptype);
+ ptype->func(skb2, skb->dev, ptype, skb->dev);
}
}
rcu_read_unlock();
@@ -1123,10 +1131,8 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
#define illegal_highdma(dev, skb) (0)
#endif
-extern void skb_release_data(struct sk_buff *);
-
/* Keep head the same: replace data */
-int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask)
+int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask)
{
unsigned int size;
u8 *data;
@@ -1253,6 +1259,8 @@ int dev_queue_xmit(struct sk_buff *skb)
if (skb_checksum_help(skb, 0))
goto out_kfree_skb;
+ spin_lock_prefetch(&dev->queue_lock);
+
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
*/
@@ -1379,8 +1387,8 @@ int netif_rx(struct sk_buff *skb)
if (netpoll_rx(skb))
return NET_RX_DROP;
- if (!skb->stamp.tv_sec)
- net_timestamp(&skb->stamp);
+ if (!skb->tstamp.off_sec)
+ net_timestamp(skb);
/*
* The code is rearranged so that the path is the most
@@ -1425,14 +1433,14 @@ int netif_rx_ni(struct sk_buff *skb)
EXPORT_SYMBOL(netif_rx_ni);
-static __inline__ void skb_bond(struct sk_buff *skb)
+static inline struct net_device *skb_bond(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
- if (dev->master) {
- skb->real_dev = skb->dev;
+ if (dev->master)
skb->dev = dev->master;
- }
+
+ return dev;
}
static void net_tx_action(struct softirq_action *h)
@@ -1482,10 +1490,11 @@ static void net_tx_action(struct softirq_action *h)
}
static __inline__ int deliver_skb(struct sk_buff *skb,
- struct packet_type *pt_prev)
+ struct packet_type *pt_prev,
+ struct net_device *orig_dev)
{
atomic_inc(&skb->users);
- return pt_prev->func(skb, skb->dev, pt_prev);
+ return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
@@ -1496,7 +1505,8 @@ struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
static __inline__ int handle_bridge(struct sk_buff **pskb,
- struct packet_type **pt_prev, int *ret)
+ struct packet_type **pt_prev, int *ret,
+ struct net_device *orig_dev)
{
struct net_bridge_port *port;
@@ -1505,14 +1515,14 @@ static __inline__ int handle_bridge(struct sk_buff **pskb,
return 0;
if (*pt_prev) {
- *ret = deliver_skb(*pskb, *pt_prev);
+ *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
*pt_prev = NULL;
}
return br_handle_frame_hook(port, pskb);
}
#else
-#define handle_bridge(skb, pt_prev, ret) (0)
+#define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
#endif
#ifdef CONFIG_NET_CLS_ACT
@@ -1534,17 +1544,14 @@ static int ing_filter(struct sk_buff *skb)
__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
if (MAX_RED_LOOP < ttl++) {
printk("Redir loop detected Dropping packet (%s->%s)\n",
- skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
+ skb->input_dev->name, skb->dev->name);
return TC_ACT_SHOT;
}
skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
- if (NULL == skb->input_dev) {
- skb->input_dev = skb->dev;
- printk("ing_filter: fixed %s out %s\n",skb->input_dev->name,skb->dev->name);
- }
+
spin_lock(&dev->ingress_lock);
if ((q = dev->qdisc_ingress) != NULL)
result = q->enqueue(skb, q);
@@ -1559,6 +1566,7 @@ static int ing_filter(struct sk_buff *skb)
int netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
+ struct net_device *orig_dev;
int ret = NET_RX_DROP;
unsigned short type;
@@ -1566,10 +1574,13 @@ int netif_receive_skb(struct sk_buff *skb)
if (skb->dev->poll && netpoll_rx(skb))
return NET_RX_DROP;
- if (!skb->stamp.tv_sec)
- net_timestamp(&skb->stamp);
+ if (!skb->tstamp.off_sec)
+ net_timestamp(skb);
+
+ if (!skb->input_dev)
+ skb->input_dev = skb->dev;
- skb_bond(skb);
+ orig_dev = skb_bond(skb);
__get_cpu_var(netdev_rx_stat).total++;
@@ -1590,14 +1601,14 @@ int netif_receive_skb(struct sk_buff *skb)
list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
- ret = deliver_skb(skb, pt_prev);
+ ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}
#ifdef CONFIG_NET_CLS_ACT
if (pt_prev) {
- ret = deliver_skb(skb, pt_prev);
+ ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL; /* noone else should process this after*/
} else {
skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
@@ -1616,7 +1627,7 @@ ncls:
handle_diverter(skb);
- if (handle_bridge(&skb, &pt_prev, &ret))
+ if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
goto out;
type = skb->protocol;
@@ -1624,13 +1635,13 @@ ncls:
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev)
- ret = deliver_skb(skb, pt_prev);
+ ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}
if (pt_prev) {
- ret = pt_prev->func(skb, skb->dev, pt_prev);
+ ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
@@ -1696,7 +1707,8 @@ static void net_rx_action(struct softirq_action *h)
struct softnet_data *queue = &__get_cpu_var(softnet_data);
unsigned long start_time = jiffies;
int budget = netdev_budget;
-
+ void *have;
+
local_irq_disable();
while (!list_empty(&queue->poll_list)) {
@@ -1709,10 +1721,10 @@ static void net_rx_action(struct softirq_action *h)
dev = list_entry(queue->poll_list.next,
struct net_device, poll_list);
- netpoll_poll_lock(dev);
+ have = netpoll_poll_lock(dev);
if (dev->quota <= 0 || dev->poll(dev, &budget)) {
- netpoll_poll_unlock(dev);
+ netpoll_poll_unlock(have);
local_irq_disable();
list_del(&dev->poll_list);
list_add_tail(&dev->poll_list, &queue->poll_list);
@@ -1721,7 +1733,7 @@ static void net_rx_action(struct softirq_action *h)
else
dev->quota = dev->weight;
} else {
- netpoll_poll_unlock(dev);
+ netpoll_poll_unlock(have);
dev_put(dev);
local_irq_disable();
}
@@ -2705,6 +2717,20 @@ int register_netdevice(struct net_device *dev)
dev->name);
dev->features &= ~NETIF_F_TSO;
}
+ if (dev->features & NETIF_F_UFO) {
+ if (!(dev->features & NETIF_F_HW_CSUM)) {
+ printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
+ "NETIF_F_HW_CSUM feature.\n",
+ dev->name);
+ dev->features &= ~NETIF_F_UFO;
+ }
+ if (!(dev->features & NETIF_F_SG)) {
+ printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
+ "NETIF_F_SG feature.\n",
+ dev->name);
+ dev->features &= ~NETIF_F_UFO;
+ }
+ }
/*
* nil rebuild_header routine,
diff --git a/net/core/dst.c b/net/core/dst.c
index 334790da9f16..470c05bc4cb2 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -39,8 +39,7 @@ static unsigned long dst_gc_timer_inc = DST_GC_MAX;
static void dst_run_gc(unsigned long);
static void ___dst_free(struct dst_entry * dst);
-static struct timer_list dst_gc_timer =
- TIMER_INITIALIZER(dst_run_gc, DST_GC_MIN, 0);
+static DEFINE_TIMER(dst_gc_timer, dst_run_gc, DST_GC_MIN, 0);
static void dst_run_gc(unsigned long dummy)
{
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a3eeb88e1c81..0350586e9195 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -81,6 +81,32 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data)
return 0;
}
+int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *addr, u8 *data)
+{
+ unsigned char len = dev->addr_len;
+ if ( addr->size < len )
+ return -ETOOSMALL;
+
+ addr->size = len;
+ memcpy(data, dev->perm_addr, len);
+ return 0;
+}
+
+
+u32 ethtool_op_get_ufo(struct net_device *dev)
+{
+ return (dev->features & NETIF_F_UFO) != 0;
+}
+
+int ethtool_op_set_ufo(struct net_device *dev, u32 data)
+{
+ if (data)
+ dev->features |= NETIF_F_UFO;
+ else
+ dev->features &= ~NETIF_F_UFO;
+ return 0;
+}
+
/* Handlers for each ethtool command */
static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
@@ -471,6 +497,11 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data)
return err;
}
+ if (!data && dev->ethtool_ops->set_ufo) {
+ err = dev->ethtool_ops->set_ufo(dev, 0);
+ if (err)
+ return err;
+ }
return dev->ethtool_ops->set_sg(dev, data);
}
@@ -557,6 +588,32 @@ static int ethtool_set_tso(struct net_device *dev, char __user *useraddr)
return dev->ethtool_ops->set_tso(dev, edata.data);
}
+static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr)
+{
+ struct ethtool_value edata = { ETHTOOL_GTSO };
+
+ if (!dev->ethtool_ops->get_ufo)
+ return -EOPNOTSUPP;
+ edata.data = dev->ethtool_ops->get_ufo(dev);
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+}
+static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
+{
+ struct ethtool_value edata;
+
+ if (!dev->ethtool_ops->set_ufo)
+ return -EOPNOTSUPP;
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+ if (edata.data && !(dev->features & NETIF_F_SG))
+ return -EINVAL;
+ if (edata.data && !(dev->features & NETIF_F_HW_CSUM))
+ return -EINVAL;
+ return dev->ethtool_ops->set_ufo(dev, edata.data);
+}
+
static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
{
struct ethtool_test test;
@@ -683,6 +740,39 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
return ret;
}
+static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr)
+{
+ struct ethtool_perm_addr epaddr;
+ u8 *data;
+ int ret;
+
+ if (!dev->ethtool_ops->get_perm_addr)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&epaddr,useraddr,sizeof(epaddr)))
+ return -EFAULT;
+
+ data = kmalloc(epaddr.size, GFP_USER);
+ if (!data)
+ return -ENOMEM;
+
+ ret = dev->ethtool_ops->get_perm_addr(dev,&epaddr,data);
+ if (ret)
+ return ret;
+
+ ret = -EFAULT;
+ if (copy_to_user(useraddr, &epaddr, sizeof(epaddr)))
+ goto out;
+ useraddr += sizeof(epaddr);
+ if (copy_to_user(useraddr, data, epaddr.size))
+ goto out;
+ ret = 0;
+
+ out:
+ kfree(data);
+ return ret;
+}
+
/* The main entry point in this file. Called from net/core/dev.c */
int dev_ethtool(struct ifreq *ifr)
@@ -806,6 +896,15 @@ int dev_ethtool(struct ifreq *ifr)
case ETHTOOL_GSTATS:
rc = ethtool_get_stats(dev, useraddr);
break;
+ case ETHTOOL_GPERMADDR:
+ rc = ethtool_get_perm_addr(dev, useraddr);
+ break;
+ case ETHTOOL_GUFO:
+ rc = ethtool_get_ufo(dev, useraddr);
+ break;
+ case ETHTOOL_SUFO:
+ rc = ethtool_set_ufo(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
@@ -826,6 +925,7 @@ int dev_ethtool(struct ifreq *ifr)
EXPORT_SYMBOL(dev_ethtool);
EXPORT_SYMBOL(ethtool_op_get_link);
+EXPORT_SYMBOL_GPL(ethtool_op_get_perm_addr);
EXPORT_SYMBOL(ethtool_op_get_sg);
EXPORT_SYMBOL(ethtool_op_get_tso);
EXPORT_SYMBOL(ethtool_op_get_tx_csum);
@@ -833,3 +933,5 @@ EXPORT_SYMBOL(ethtool_op_set_sg);
EXPORT_SYMBOL(ethtool_op_set_tso);
EXPORT_SYMBOL(ethtool_op_set_tx_csum);
EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
+EXPORT_SYMBOL(ethtool_op_set_ufo);
+EXPORT_SYMBOL(ethtool_op_get_ufo);
diff --git a/net/core/filter.c b/net/core/filter.c
index cd91a24f9720..079c2edff789 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -182,7 +182,7 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
A = ntohl(*(u32 *)ptr);
continue;
}
- return 0;
+ break;
case BPF_LD|BPF_H|BPF_ABS:
k = fentry->k;
load_h:
@@ -191,7 +191,7 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
A = ntohs(*(u16 *)ptr);
continue;
}
- return 0;
+ break;
case BPF_LD|BPF_B|BPF_ABS:
k = fentry->k;
load_b:
@@ -200,7 +200,7 @@ load_b:
A = *(u8 *)ptr;
continue;
}
- return 0;
+ break;
case BPF_LD|BPF_W|BPF_LEN:
A = skb->len;
continue;
diff --git a/net/core/flow.c b/net/core/flow.c
index f289570b15a3..7e95b39de9fd 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -42,7 +42,7 @@ static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
#define flow_table(cpu) (per_cpu(flow_tables, cpu))
-static kmem_cache_t *flow_cachep;
+static kmem_cache_t *flow_cachep __read_mostly;
static int flow_lwm, flow_hwm;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1beb782ac41b..e68700f950a5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -61,7 +61,9 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
static struct neigh_table *neigh_tables;
+#ifdef CONFIG_PROC_FS
static struct file_operations neigh_stat_seq_fops;
+#endif
/*
Neighbour hash table buckets are protected with rwlock tbl->lock.
@@ -173,39 +175,10 @@ static void pneigh_queue_purge(struct sk_buff_head *list)
}
}
-void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
+static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
{
int i;
- write_lock_bh(&tbl->lock);
-
- for (i=0; i <= tbl->hash_mask; i++) {
- struct neighbour *n, **np;
-
- np = &tbl->hash_buckets[i];
- while ((n = *np) != NULL) {
- if (dev && n->dev != dev) {
- np = &n->next;
- continue;
- }
- *np = n->next;
- write_lock_bh(&n->lock);
- n->dead = 1;
- neigh_del_timer(n);
- write_unlock_bh(&n->lock);
- neigh_release(n);
- }
- }
-
- write_unlock_bh(&tbl->lock);
-}
-
-int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
-{
- int i;
-
- write_lock_bh(&tbl->lock);
-
for (i = 0; i <= tbl->hash_mask; i++) {
struct neighbour *n, **np = &tbl->hash_buckets[i];
@@ -241,7 +214,19 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
neigh_release(n);
}
}
+}
+
+void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
+{
+ write_lock_bh(&tbl->lock);
+ neigh_flush_dev(tbl, dev);
+ write_unlock_bh(&tbl->lock);
+}
+int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+{
+ write_lock_bh(&tbl->lock);
+ neigh_flush_dev(tbl, dev);
pneigh_ifdown(tbl, dev);
write_unlock_bh(&tbl->lock);
@@ -725,6 +710,14 @@ static __inline__ int neigh_max_probes(struct neighbour *n)
p->ucast_probes + p->app_probes + p->mcast_probes);
}
+static inline void neigh_add_timer(struct neighbour *n, unsigned long when)
+{
+ if (unlikely(mod_timer(&n->timer, when))) {
+ printk("NEIGH: BUG, double timer add, state is %x\n",
+ n->nud_state);
+ dump_stack();
+ }
+}
/* Called when a timer expires for a neighbour entry. */
@@ -806,11 +799,10 @@ static void neigh_timer_handler(unsigned long arg)
}
if (neigh->nud_state & NUD_IN_TIMER) {
- neigh_hold(neigh);
if (time_before(next, jiffies + HZ/2))
next = jiffies + HZ/2;
- neigh->timer.expires = next;
- add_timer(&neigh->timer);
+ if (!mod_timer(&neigh->timer, next))
+ neigh_hold(neigh);
}
if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
struct sk_buff *skb = skb_peek(&neigh->arp_queue);
@@ -852,8 +844,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
atomic_set(&neigh->probes, neigh->parms->ucast_probes);
neigh->nud_state = NUD_INCOMPLETE;
neigh_hold(neigh);
- neigh->timer.expires = now + 1;
- add_timer(&neigh->timer);
+ neigh_add_timer(neigh, now + 1);
} else {
neigh->nud_state = NUD_FAILED;
write_unlock_bh(&neigh->lock);
@@ -866,8 +857,8 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
neigh_hold(neigh);
neigh->nud_state = NUD_DELAY;
- neigh->timer.expires = jiffies + neigh->parms->delay_probe_time;
- add_timer(&neigh->timer);
+ neigh_add_timer(neigh,
+ jiffies + neigh->parms->delay_probe_time);
}
if (neigh->nud_state == NUD_INCOMPLETE) {
@@ -1013,10 +1004,10 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
neigh_del_timer(neigh);
if (new & NUD_IN_TIMER) {
neigh_hold(neigh);
- neigh->timer.expires = jiffies +
+ neigh_add_timer(neigh, (jiffies +
((new & NUD_REACHABLE) ?
- neigh->parms->reachable_time : 0);
- add_timer(&neigh->timer);
+ neigh->parms->reachable_time :
+ 0)));
}
neigh->nud_state = new;
}
@@ -1217,7 +1208,7 @@ static void neigh_proxy_process(unsigned long arg)
while (skb != (struct sk_buff *)&tbl->proxy_queue) {
struct sk_buff *back = skb;
- long tdif = back->stamp.tv_usec - now;
+ long tdif = NEIGH_CB(back)->sched_next - now;
skb = skb->next;
if (tdif <= 0) {
@@ -1248,8 +1239,9 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
kfree_skb(skb);
return;
}
- skb->stamp.tv_sec = LOCALLY_ENQUEUED;
- skb->stamp.tv_usec = sched_next;
+
+ NEIGH_CB(skb)->sched_next = sched_next;
+ NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
spin_lock(&tbl->proxy_queue.lock);
if (del_timer(&tbl->proxy_timer)) {
@@ -1633,12 +1625,9 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
memset(&ndst, 0, sizeof(ndst));
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ for_each_cpu(cpu) {
struct neigh_statistics *st;
- if (!cpu_possible(cpu))
- continue;
-
st = per_cpu_ptr(tbl->stats, cpu);
ndst.ndts_allocs += st->allocs;
ndst.ndts_destroys += st->destroys;
@@ -2342,8 +2331,8 @@ void neigh_app_ns(struct neighbour *n)
}
nlh = (struct nlmsghdr *)skb->data;
nlh->nlmsg_flags = NLM_F_REQUEST;
- NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
- netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+ NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH;
+ netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
}
static void neigh_app_notify(struct neighbour *n)
@@ -2360,8 +2349,8 @@ static void neigh_app_notify(struct neighbour *n)
return;
}
nlh = (struct nlmsghdr *)skb->data;
- NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
- netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+ NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH;
+ netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
}
#endif /* CONFIG_ARPD */
diff --git a/net/core/netfilter.c b/net/core/netfilter.c
deleted file mode 100644
index 076c156d5eda..000000000000
--- a/net/core/netfilter.c
+++ /dev/null
@@ -1,648 +0,0 @@
-/* netfilter.c: look after the filters for various protocols.
- * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
- *
- * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
- * way.
- *
- * Rusty Russell (C)2000 -- This code is GPL.
- *
- * February 2000: Modified by James Morris to have 1 queue per protocol.
- * 15-Mar-2000: Added NF_REPEAT --RR.
- * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik.
- */
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/netfilter.h>
-#include <net/protocol.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/wait.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/if.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/icmp.h>
-#include <net/sock.h>
-#include <net/route.h>
-#include <linux/ip.h>
-
-/* In this code, we can be waiting indefinitely for userspace to
- * service a packet if a hook returns NF_QUEUE. We could keep a count
- * of skbuffs queued for userspace, and not deregister a hook unless
- * this is zero, but that sucks. Now, we simply check when the
- * packets come back: if the hook is gone, the packet is discarded. */
-#ifdef CONFIG_NETFILTER_DEBUG
-#define NFDEBUG(format, args...) printk(format , ## args)
-#else
-#define NFDEBUG(format, args...)
-#endif
-
-/* Sockopts only registered and called from user context, so
- net locking would be overkill. Also, [gs]etsockopt calls may
- sleep. */
-static DECLARE_MUTEX(nf_sockopt_mutex);
-
-struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
-static LIST_HEAD(nf_sockopts);
-static DEFINE_SPINLOCK(nf_hook_lock);
-
-/*
- * A queue handler may be registered for each protocol. Each is protected by
- * long term mutex. The handler must provide an an outfn() to accept packets
- * for queueing and must reinject all packets it receives, no matter what.
- */
-static struct nf_queue_handler_t {
- nf_queue_outfn_t outfn;
- void *data;
-} queue_handler[NPROTO];
-static DEFINE_RWLOCK(queue_handler_lock);
-
-int nf_register_hook(struct nf_hook_ops *reg)
-{
- struct list_head *i;
-
- spin_lock_bh(&nf_hook_lock);
- list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
- if (reg->priority < ((struct nf_hook_ops *)i)->priority)
- break;
- }
- list_add_rcu(&reg->list, i->prev);
- spin_unlock_bh(&nf_hook_lock);
-
- synchronize_net();
- return 0;
-}
-
-void nf_unregister_hook(struct nf_hook_ops *reg)
-{
- spin_lock_bh(&nf_hook_lock);
- list_del_rcu(&reg->list);
- spin_unlock_bh(&nf_hook_lock);
-
- synchronize_net();
-}
-
-/* Do exclusive ranges overlap? */
-static inline int overlap(int min1, int max1, int min2, int max2)
-{
- return max1 > min2 && min1 < max2;
-}
-
-/* Functions to register sockopt ranges (exclusive). */
-int nf_register_sockopt(struct nf_sockopt_ops *reg)
-{
- struct list_head *i;
- int ret = 0;
-
- if (down_interruptible(&nf_sockopt_mutex) != 0)
- return -EINTR;
-
- list_for_each(i, &nf_sockopts) {
- struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
- if (ops->pf == reg->pf
- && (overlap(ops->set_optmin, ops->set_optmax,
- reg->set_optmin, reg->set_optmax)
- || overlap(ops->get_optmin, ops->get_optmax,
- reg->get_optmin, reg->get_optmax))) {
- NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
- ops->set_optmin, ops->set_optmax,
- ops->get_optmin, ops->get_optmax,
- reg->set_optmin, reg->set_optmax,
- reg->get_optmin, reg->get_optmax);
- ret = -EBUSY;
- goto out;
- }
- }
-
- list_add(&reg->list, &nf_sockopts);
-out:
- up(&nf_sockopt_mutex);
- return ret;
-}
-
-void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
-{
- /* No point being interruptible: we're probably in cleanup_module() */
- restart:
- down(&nf_sockopt_mutex);
- if (reg->use != 0) {
- /* To be woken by nf_sockopt call... */
- /* FIXME: Stuart Young's name appears gratuitously. */
- set_current_state(TASK_UNINTERRUPTIBLE);
- reg->cleanup_task = current;
- up(&nf_sockopt_mutex);
- schedule();
- goto restart;
- }
- list_del(&reg->list);
- up(&nf_sockopt_mutex);
-}
-
-/* Call get/setsockopt() */
-static int nf_sockopt(struct sock *sk, int pf, int val,
- char __user *opt, int *len, int get)
-{
- struct list_head *i;
- struct nf_sockopt_ops *ops;
- int ret;
-
- if (down_interruptible(&nf_sockopt_mutex) != 0)
- return -EINTR;
-
- list_for_each(i, &nf_sockopts) {
- ops = (struct nf_sockopt_ops *)i;
- if (ops->pf == pf) {
- if (get) {
- if (val >= ops->get_optmin
- && val < ops->get_optmax) {
- ops->use++;
- up(&nf_sockopt_mutex);
- ret = ops->get(sk, val, opt, len);
- goto out;
- }
- } else {
- if (val >= ops->set_optmin
- && val < ops->set_optmax) {
- ops->use++;
- up(&nf_sockopt_mutex);
- ret = ops->set(sk, val, opt, *len);
- goto out;
- }
- }
- }
- }
- up(&nf_sockopt_mutex);
- return -ENOPROTOOPT;
-
- out:
- down(&nf_sockopt_mutex);
- ops->use--;
- if (ops->cleanup_task)
- wake_up_process(ops->cleanup_task);
- up(&nf_sockopt_mutex);
- return ret;
-}
-
-int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
- int len)
-{
- return nf_sockopt(sk, pf, val, opt, &len, 0);
-}
-
-int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
-{
- return nf_sockopt(sk, pf, val, opt, len, 1);
-}
-
-static unsigned int nf_iterate(struct list_head *head,
- struct sk_buff **skb,
- int hook,
- const struct net_device *indev,
- const struct net_device *outdev,
- struct list_head **i,
- int (*okfn)(struct sk_buff *),
- int hook_thresh)
-{
- unsigned int verdict;
-
- /*
- * The caller must not block between calls to this
- * function because of risk of continuing from deleted element.
- */
- list_for_each_continue_rcu(*i, head) {
- struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
-
- if (hook_thresh > elem->priority)
- continue;
-
- /* Optimization: we don't need to hold module
- reference here, since function can't sleep. --RR */
- verdict = elem->hook(hook, skb, indev, outdev, okfn);
- if (verdict != NF_ACCEPT) {
-#ifdef CONFIG_NETFILTER_DEBUG
- if (unlikely(verdict > NF_MAX_VERDICT)) {
- NFDEBUG("Evil return from %p(%u).\n",
- elem->hook, hook);
- continue;
- }
-#endif
- if (verdict != NF_REPEAT)
- return verdict;
- *i = (*i)->prev;
- }
- }
- return NF_ACCEPT;
-}
-
-int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
-{
- int ret;
-
- write_lock_bh(&queue_handler_lock);
- if (queue_handler[pf].outfn)
- ret = -EBUSY;
- else {
- queue_handler[pf].outfn = outfn;
- queue_handler[pf].data = data;
- ret = 0;
- }
- write_unlock_bh(&queue_handler_lock);
-
- return ret;
-}
-
-/* The caller must flush their queue before this */
-int nf_unregister_queue_handler(int pf)
-{
- write_lock_bh(&queue_handler_lock);
- queue_handler[pf].outfn = NULL;
- queue_handler[pf].data = NULL;
- write_unlock_bh(&queue_handler_lock);
-
- return 0;
-}
-
-/*
- * Any packet that leaves via this function must come back
- * through nf_reinject().
- */
-static int nf_queue(struct sk_buff *skb,
- struct list_head *elem,
- int pf, unsigned int hook,
- struct net_device *indev,
- struct net_device *outdev,
- int (*okfn)(struct sk_buff *))
-{
- int status;
- struct nf_info *info;
-#ifdef CONFIG_BRIDGE_NETFILTER
- struct net_device *physindev = NULL;
- struct net_device *physoutdev = NULL;
-#endif
-
- /* QUEUE == DROP if noone is waiting, to be safe. */
- read_lock(&queue_handler_lock);
- if (!queue_handler[pf].outfn) {
- read_unlock(&queue_handler_lock);
- kfree_skb(skb);
- return 1;
- }
-
- info = kmalloc(sizeof(*info), GFP_ATOMIC);
- if (!info) {
- if (net_ratelimit())
- printk(KERN_ERR "OOM queueing packet %p\n",
- skb);
- read_unlock(&queue_handler_lock);
- kfree_skb(skb);
- return 1;
- }
-
- *info = (struct nf_info) {
- (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
-
- /* If it's going away, ignore hook. */
- if (!try_module_get(info->elem->owner)) {
- read_unlock(&queue_handler_lock);
- kfree(info);
- return 0;
- }
-
- /* Bump dev refs so they don't vanish while packet is out */
- if (indev) dev_hold(indev);
- if (outdev) dev_hold(outdev);
-
-#ifdef CONFIG_BRIDGE_NETFILTER
- if (skb->nf_bridge) {
- physindev = skb->nf_bridge->physindev;
- if (physindev) dev_hold(physindev);
- physoutdev = skb->nf_bridge->physoutdev;
- if (physoutdev) dev_hold(physoutdev);
- }
-#endif
-
- status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
- read_unlock(&queue_handler_lock);
-
- if (status < 0) {
- /* James M doesn't say fuck enough. */
- if (indev) dev_put(indev);
- if (outdev) dev_put(outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
- if (physindev) dev_put(physindev);
- if (physoutdev) dev_put(physoutdev);
-#endif
- module_put(info->elem->owner);
- kfree(info);
- kfree_skb(skb);
- return 1;
- }
- return 1;
-}
-
-/* Returns 1 if okfn() needs to be executed by the caller,
- * -EPERM for NF_DROP, 0 otherwise. */
-int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
- struct net_device *indev,
- struct net_device *outdev,
- int (*okfn)(struct sk_buff *),
- int hook_thresh)
-{
- struct list_head *elem;
- unsigned int verdict;
- int ret = 0;
-
- /* We may already have this, but read-locks nest anyway */
- rcu_read_lock();
-
- elem = &nf_hooks[pf][hook];
-next_hook:
- verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
- outdev, &elem, okfn, hook_thresh);
- if (verdict == NF_ACCEPT || verdict == NF_STOP) {
- ret = 1;
- goto unlock;
- } else if (verdict == NF_DROP) {
- kfree_skb(*pskb);
- ret = -EPERM;
- } else if (verdict == NF_QUEUE) {
- NFDEBUG("nf_hook: Verdict = QUEUE.\n");
- if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn))
- goto next_hook;
- }
-unlock:
- rcu_read_unlock();
- return ret;
-}
-
-void nf_reinject(struct sk_buff *skb, struct nf_info *info,
- unsigned int verdict)
-{
- struct list_head *elem = &info->elem->list;
- struct list_head *i;
-
- rcu_read_lock();
-
- /* Release those devices we held, or Alexey will kill me. */
- if (info->indev) dev_put(info->indev);
- if (info->outdev) dev_put(info->outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
- if (skb->nf_bridge) {
- if (skb->nf_bridge->physindev)
- dev_put(skb->nf_bridge->physindev);
- if (skb->nf_bridge->physoutdev)
- dev_put(skb->nf_bridge->physoutdev);
- }
-#endif
-
- /* Drop reference to owner of hook which queued us. */
- module_put(info->elem->owner);
-
- list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
- if (i == elem)
- break;
- }
-
- if (elem == &nf_hooks[info->pf][info->hook]) {
- /* The module which sent it to userspace is gone. */
- NFDEBUG("%s: module disappeared, dropping packet.\n",
- __FUNCTION__);
- verdict = NF_DROP;
- }
-
- /* Continue traversal iff userspace said ok... */
- if (verdict == NF_REPEAT) {
- elem = elem->prev;
- verdict = NF_ACCEPT;
- }
-
- if (verdict == NF_ACCEPT) {
- next_hook:
- verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
- &skb, info->hook,
- info->indev, info->outdev, &elem,
- info->okfn, INT_MIN);
- }
-
- switch (verdict) {
- case NF_ACCEPT:
- info->okfn(skb);
- break;
-
- case NF_QUEUE:
- if (!nf_queue(skb, elem, info->pf, info->hook,
- info->indev, info->outdev, info->okfn))
- goto next_hook;
- break;
- }
- rcu_read_unlock();
-
- if (verdict == NF_DROP)
- kfree_skb(skb);
-
- kfree(info);
- return;
-}
-
-#ifdef CONFIG_INET
-/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct sk_buff **pskb)
-{
- struct iphdr *iph = (*pskb)->nh.iph;
- struct rtable *rt;
- struct flowi fl = {};
- struct dst_entry *odst;
- unsigned int hh_len;
-
- /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
- * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
- */
- if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
- fl.nl_u.ip4_u.daddr = iph->daddr;
- fl.nl_u.ip4_u.saddr = iph->saddr;
- fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
- fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
-#ifdef CONFIG_IP_ROUTE_FWMARK
- fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
-#endif
- fl.proto = iph->protocol;
- if (ip_route_output_key(&rt, &fl) != 0)
- return -1;
-
- /* Drop old route. */
- dst_release((*pskb)->dst);
- (*pskb)->dst = &rt->u.dst;
- } else {
- /* non-local src, find valid iif to satisfy
- * rp-filter when calling ip_route_input. */
- fl.nl_u.ip4_u.daddr = iph->saddr;
- if (ip_route_output_key(&rt, &fl) != 0)
- return -1;
-
- odst = (*pskb)->dst;
- if (ip_route_input(*pskb, iph->daddr, iph->saddr,
- RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
- dst_release(&rt->u.dst);
- return -1;
- }
- dst_release(&rt->u.dst);
- dst_release(odst);
- }
-
- if ((*pskb)->dst->error)
- return -1;
-
- /* Change in oif may mean change in hh_len. */
- hh_len = (*pskb)->dst->dev->hard_header_len;
- if (skb_headroom(*pskb) < hh_len) {
- struct sk_buff *nskb;
-
- nskb = skb_realloc_headroom(*pskb, hh_len);
- if (!nskb)
- return -1;
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(ip_route_me_harder);
-
-int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
-{
- struct sk_buff *nskb;
-
- if (writable_len > (*pskb)->len)
- return 0;
-
- /* Not exclusive use of packet? Must copy. */
- if (skb_shared(*pskb) || skb_cloned(*pskb))
- goto copy_skb;
-
- return pskb_may_pull(*pskb, writable_len);
-
-copy_skb:
- nskb = skb_copy(*pskb, GFP_ATOMIC);
- if (!nskb)
- return 0;
- BUG_ON(skb_is_nonlinear(nskb));
-
- /* Rest of kernel will get very unhappy if we pass it a
- suddenly-orphaned skbuff */
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- return 1;
-}
-EXPORT_SYMBOL(skb_ip_make_writable);
-#endif /*CONFIG_INET*/
-
-/* Internal logging interface, which relies on the real
- LOG target modules */
-
-#define NF_LOG_PREFIXLEN 128
-
-static nf_logfn *nf_logging[NPROTO]; /* = NULL */
-static int reported = 0;
-static DEFINE_SPINLOCK(nf_log_lock);
-
-int nf_log_register(int pf, nf_logfn *logfn)
-{
- int ret = -EBUSY;
-
- /* Any setup of logging members must be done before
- * substituting pointer. */
- spin_lock(&nf_log_lock);
- if (!nf_logging[pf]) {
- rcu_assign_pointer(nf_logging[pf], logfn);
- ret = 0;
- }
- spin_unlock(&nf_log_lock);
- return ret;
-}
-
-void nf_log_unregister(int pf, nf_logfn *logfn)
-{
- spin_lock(&nf_log_lock);
- if (nf_logging[pf] == logfn)
- nf_logging[pf] = NULL;
- spin_unlock(&nf_log_lock);
-
- /* Give time to concurrent readers. */
- synchronize_net();
-}
-
-void nf_log_packet(int pf,
- unsigned int hooknum,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const char *fmt, ...)
-{
- va_list args;
- char prefix[NF_LOG_PREFIXLEN];
- nf_logfn *logfn;
-
- rcu_read_lock();
- logfn = rcu_dereference(nf_logging[pf]);
- if (logfn) {
- va_start(args, fmt);
- vsnprintf(prefix, sizeof(prefix), fmt, args);
- va_end(args);
- /* We must read logging before nf_logfn[pf] */
- logfn(hooknum, skb, in, out, prefix);
- } else if (!reported) {
- printk(KERN_WARNING "nf_log_packet: can\'t log yet, "
- "no backend logging module loaded in!\n");
- reported++;
- }
- rcu_read_unlock();
-}
-EXPORT_SYMBOL(nf_log_register);
-EXPORT_SYMBOL(nf_log_unregister);
-EXPORT_SYMBOL(nf_log_packet);
-
-/* This does not belong here, but locally generated errors need it if connection
- tracking in use: without this, connection may not be in hash table, and hence
- manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
-
-void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
-{
- void (*attach)(struct sk_buff *, struct sk_buff *);
-
- if (skb->nfct && (attach = ip_ct_attach) != NULL) {
- mb(); /* Just to be sure: must be read before executing this */
- attach(new, skb);
- }
-}
-
-void __init netfilter_init(void)
-{
- int i, h;
-
- for (i = 0; i < NPROTO; i++) {
- for (h = 0; h < NF_MAX_HOOKS; h++)
- INIT_LIST_HEAD(&nf_hooks[i][h]);
- }
-}
-
-EXPORT_SYMBOL(ip_ct_attach);
-EXPORT_SYMBOL(nf_ct_attach);
-EXPORT_SYMBOL(nf_getsockopt);
-EXPORT_SYMBOL(nf_hook_slow);
-EXPORT_SYMBOL(nf_hooks);
-EXPORT_SYMBOL(nf_register_hook);
-EXPORT_SYMBOL(nf_register_queue_handler);
-EXPORT_SYMBOL(nf_register_sockopt);
-EXPORT_SYMBOL(nf_reinject);
-EXPORT_SYMBOL(nf_setsockopt);
-EXPORT_SYMBOL(nf_unregister_hook);
-EXPORT_SYMBOL(nf_unregister_queue_handler);
-EXPORT_SYMBOL(nf_unregister_sockopt);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c327c9edadc5..802fe11efad0 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -33,6 +33,7 @@
#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32
#define MAX_QUEUE_DEPTH (MAX_SKBS / 2)
+#define MAX_RETRIES 20000
static DEFINE_SPINLOCK(skb_list_lock);
static int nr_skbs;
@@ -248,14 +249,14 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
int status;
struct netpoll_info *npinfo;
-repeat:
- if(!np || !np->dev || !netif_running(np->dev)) {
+ if (!np || !np->dev || !netif_running(np->dev)) {
__kfree_skb(skb);
return;
}
- /* avoid recursion */
npinfo = np->dev->npinfo;
+
+ /* avoid recursion */
if (npinfo->poll_owner == smp_processor_id() ||
np->dev->xmit_lock_owner == smp_processor_id()) {
if (np->drop)
@@ -265,30 +266,37 @@ repeat:
return;
}
- spin_lock(&np->dev->xmit_lock);
- np->dev->xmit_lock_owner = smp_processor_id();
+ do {
+ npinfo->tries--;
+ spin_lock(&np->dev->xmit_lock);
+ np->dev->xmit_lock_owner = smp_processor_id();
- /*
- * network drivers do not expect to be called if the queue is
- * stopped.
- */
- if (netif_queue_stopped(np->dev)) {
+ /*
+ * network drivers do not expect to be called if the queue is
+ * stopped.
+ */
+ if (netif_queue_stopped(np->dev)) {
+ np->dev->xmit_lock_owner = -1;
+ spin_unlock(&np->dev->xmit_lock);
+ netpoll_poll(np);
+ udelay(50);
+ continue;
+ }
+
+ status = np->dev->hard_start_xmit(skb, np->dev);
np->dev->xmit_lock_owner = -1;
spin_unlock(&np->dev->xmit_lock);
- netpoll_poll(np);
- goto repeat;
- }
-
- status = np->dev->hard_start_xmit(skb, np->dev);
- np->dev->xmit_lock_owner = -1;
- spin_unlock(&np->dev->xmit_lock);
+ /* success */
+ if(!status) {
+ npinfo->tries = MAX_RETRIES; /* reset */
+ return;
+ }
- /* transmit busy */
- if(status) {
+ /* transmit busy */
netpoll_poll(np);
- goto repeat;
- }
+ udelay(50);
+ } while (npinfo->tries > 0);
}
void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
@@ -349,15 +357,11 @@ static void arp_reply(struct sk_buff *skb)
unsigned char *arp_ptr;
int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
u32 sip, tip;
- unsigned long flags;
struct sk_buff *send_skb;
struct netpoll *np = NULL;
- spin_lock_irqsave(&npinfo->rx_lock, flags);
if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev)
np = npinfo->rx_np;
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-
if (!np)
return;
@@ -639,10 +643,12 @@ int netpoll_setup(struct netpoll *np)
if (!npinfo)
goto release;
+ npinfo->rx_flags = 0;
npinfo->rx_np = NULL;
- npinfo->poll_lock = SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&npinfo->poll_lock);
npinfo->poll_owner = -1;
- npinfo->rx_lock = SPIN_LOCK_UNLOCKED;
+ npinfo->tries = MAX_RETRIES;
+ spin_lock_init(&npinfo->rx_lock);
} else
npinfo = ndev->npinfo;
@@ -697,7 +703,7 @@ int netpoll_setup(struct netpoll *np)
if (!np->local_ip) {
rcu_read_lock();
- in_dev = __in_dev_get(ndev);
+ in_dev = __in_dev_get_rcu(ndev);
if (!in_dev || !in_dev->ifa_list) {
rcu_read_unlock();
@@ -718,9 +724,16 @@ int netpoll_setup(struct netpoll *np)
npinfo->rx_np = np;
spin_unlock_irqrestore(&npinfo->rx_lock, flags);
}
+
+ /* fill up the skb queue */
+ refill_skbs();
+
/* last thing to do is link it to the net device structure */
ndev->npinfo = npinfo;
+ /* avoid racing with NAPI reading npinfo */
+ synchronize_rcu();
+
return 0;
release:
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8eb083b6041a..7fc3e9e28c34 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -75,7 +75,7 @@
* By design there should only be *one* "controlling" process. In practice
* multiple write accesses gives unpredictable result. Understood by "write"
* to /proc gives result code thats should be read be the "writer".
- * For pratical use this should be no problem.
+ * For practical use this should be no problem.
*
* Note when adding devices to a specific CPU there good idea to also assign
* /proc/irq/XX/smp_affinity so TX-interrupts gets bound to the same CPU.
@@ -96,7 +96,7 @@
* New xmit() return, do_div and misc clean up by Stephen Hemminger
* <shemminger@osdl.org> 040923
*
- * Rany Dunlap fixed u64 printk compiler waring
+ * Randy Dunlap fixed u64 printk compiler waring
*
* Remove FCS from BW calculation. Lennert Buytenhek <buytenh@wantstofly.org>
* New time handling. Lennert Buytenhek <buytenh@wantstofly.org> 041213
@@ -137,6 +137,7 @@
#include <linux/ipv6.h>
#include <linux/udp.h>
#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
#include <linux/wait.h>
#include <net/checksum.h>
#include <net/ipv6.h>
@@ -151,7 +152,7 @@
#include <asm/timex.h>
-#define VERSION "pktgen v2.62: Packet Generator for packet performance testing.\n"
+#define VERSION "pktgen v2.63: Packet Generator for packet performance testing.\n"
/* #define PG_DEBUG(a) a */
#define PG_DEBUG(a)
@@ -177,8 +178,8 @@
#define T_REMDEV (1<<3) /* Remove all devs */
/* Locks */
-#define thread_lock() spin_lock(&_thread_lock)
-#define thread_unlock() spin_unlock(&_thread_lock)
+#define thread_lock() down(&pktgen_sem)
+#define thread_unlock() up(&pktgen_sem)
/* If lock -- can be removed after some work */
#define if_lock(t) spin_lock(&(t->if_lock));
@@ -187,6 +188,8 @@
/* Used to help with determining the pkts on receive */
#define PKTGEN_MAGIC 0xbe9be955
#define PG_PROC_DIR "pktgen"
+#define PGCTRL "pgctrl"
+static struct proc_dir_entry *pg_proc_dir = NULL;
#define MAX_CFLOWS 65536
@@ -202,11 +205,8 @@ struct pktgen_dev {
* Try to keep frequent/infrequent used vars. separated.
*/
- char ifname[32];
- struct proc_dir_entry *proc_ent;
+ char ifname[IFNAMSIZ];
char result[512];
- /* proc file names */
- char fname[80];
struct pktgen_thread* pg_thread; /* the owner */
struct pktgen_dev *next; /* Used for chaining in the thread's run-queue */
@@ -244,7 +244,7 @@ struct pktgen_dev {
__u32 seq_num;
int clone_skb; /* Use multiple SKBs during packet gen. If this number
- * is greater than 1, then that many coppies of the same
+ * is greater than 1, then that many copies of the same
* packet will be sent before a new packet is allocated.
* For instance, if you want to send 1024 identical packets
* before creating a new packet, set clone_skb to 1024.
@@ -330,8 +330,6 @@ struct pktgen_thread {
struct pktgen_dev *if_list; /* All device here */
struct pktgen_thread* next;
char name[32];
- char fname[128]; /* name of proc file */
- struct proc_dir_entry *proc_ent;
char result[512];
u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */
@@ -396,7 +394,7 @@ static inline s64 divremdi3(s64 x, s64 y, int type)
/* End of hacks to deal with 64-bit math on x86 */
-/** Convert to miliseconds */
+/** Convert to milliseconds */
static inline __u64 tv_to_ms(const struct timeval* tv)
{
__u64 ms = tv->tv_usec / 1000;
@@ -425,7 +423,7 @@ static inline __u64 pg_div64(__u64 n, __u64 base)
{
__u64 tmp = n;
/*
- * How do we know if the architectrure we are running on
+ * How do we know if the architecture we are running on
* supports division with 64 bit base?
*
*/
@@ -473,16 +471,6 @@ static inline __u64 tv_diff(const struct timeval* a, const struct timeval* b)
static char version[] __initdata = VERSION;
-static ssize_t proc_pgctrl_read(struct file* file, char __user * buf, size_t count, loff_t *ppos);
-static ssize_t proc_pgctrl_write(struct file* file, const char __user * buf, size_t count, loff_t *ppos);
-static int proc_if_read(char *buf , char **start, off_t offset, int len, int *eof, void *data);
-
-static int proc_thread_read(char *buf , char **start, off_t offset, int len, int *eof, void *data);
-static int proc_if_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data);
-static int proc_thread_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data);
-static int create_proc_dir(void);
-static int remove_proc_dir(void);
-
static int pktgen_remove_device(struct pktgen_thread* t, struct pktgen_dev *i);
static int pktgen_add_device(struct pktgen_thread* t, const char* ifname);
static struct pktgen_thread* pktgen_find_thread(const char* name);
@@ -503,83 +491,41 @@ static int pg_delay_d = 0;
static int pg_clone_skb_d = 0;
static int debug = 0;
-static spinlock_t _thread_lock = SPIN_LOCK_UNLOCKED;
+static DECLARE_MUTEX(pktgen_sem);
static struct pktgen_thread *pktgen_threads = NULL;
-static char module_fname[128];
-static struct proc_dir_entry *module_proc_ent = NULL;
-
static struct notifier_block pktgen_notifier_block = {
.notifier_call = pktgen_device_event,
};
-static struct file_operations pktgen_fops = {
- .read = proc_pgctrl_read,
- .write = proc_pgctrl_write,
- /* .ioctl = pktgen_ioctl, later maybe */
-};
-
/*
* /proc handling functions
*
*/
-static struct proc_dir_entry *pg_proc_dir = NULL;
-static int proc_pgctrl_read_eof=0;
-
-static ssize_t proc_pgctrl_read(struct file* file, char __user * buf,
- size_t count, loff_t *ppos)
+static int pgctrl_show(struct seq_file *seq, void *v)
{
- char data[200];
- int len = 0;
-
- if(proc_pgctrl_read_eof) {
- proc_pgctrl_read_eof=0;
- len = 0;
- goto out;
- }
-
- sprintf(data, "%s", VERSION);
-
- len = strlen(data);
-
- if(len > count) {
- len =-EFAULT;
- goto out;
- }
-
- if (copy_to_user(buf, data, len)) {
- len =-EFAULT;
- goto out;
- }
-
- *ppos += len;
- proc_pgctrl_read_eof=1; /* EOF next call */
-
- out:
- return len;
+ seq_puts(seq, VERSION);
+ return 0;
}
-static ssize_t proc_pgctrl_write(struct file* file,const char __user * buf,
- size_t count, loff_t *ppos)
+static ssize_t pgctrl_write(struct file* file,const char __user * buf,
+ size_t count, loff_t *ppos)
{
- char *data = NULL;
int err = 0;
+ char data[128];
if (!capable(CAP_NET_ADMIN)){
err = -EPERM;
goto out;
}
- data = (void*)vmalloc ((unsigned int)count);
+ if (count > sizeof(data))
+ count = sizeof(data);
- if(!data) {
- err = -ENOMEM;
- goto out;
- }
if (copy_from_user(data, buf, count)) {
- err =-EFAULT;
- goto out_free;
+ err = -EFAULT;
+ goto out;
}
data[count-1] = 0; /* Make string */
@@ -594,31 +540,40 @@ static ssize_t proc_pgctrl_write(struct file* file,const char __user * buf,
err = count;
- out_free:
- vfree (data);
out:
return err;
}
-static int proc_if_read(char *buf , char **start, off_t offset,
- int len, int *eof, void *data)
+static int pgctrl_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, pgctrl_show, PDE(inode)->data);
+}
+
+static struct file_operations pktgen_fops = {
+ .owner = THIS_MODULE,
+ .open = pgctrl_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .write = pgctrl_write,
+ .release = single_release,
+};
+
+static int pktgen_if_show(struct seq_file *seq, void *v)
{
- char *p;
int i;
- struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data);
+ struct pktgen_dev *pkt_dev = seq->private;
__u64 sa;
__u64 stopped;
__u64 now = getCurUs();
- p = buf;
- p += sprintf(p, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n",
- (unsigned long long) pkt_dev->count,
- pkt_dev->min_pkt_size, pkt_dev->max_pkt_size);
+ seq_printf(seq, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n",
+ (unsigned long long) pkt_dev->count,
+ pkt_dev->min_pkt_size, pkt_dev->max_pkt_size);
- p += sprintf(p, " frags: %d delay: %u clone_skb: %d ifname: %s\n",
- pkt_dev->nfrags, 1000*pkt_dev->delay_us+pkt_dev->delay_ns, pkt_dev->clone_skb, pkt_dev->ifname);
+ seq_printf(seq, " frags: %d delay: %u clone_skb: %d ifname: %s\n",
+ pkt_dev->nfrags, 1000*pkt_dev->delay_us+pkt_dev->delay_ns, pkt_dev->clone_skb, pkt_dev->ifname);
- p += sprintf(p, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow);
+ seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow);
if(pkt_dev->flags & F_IPV6) {
@@ -626,19 +581,19 @@ static int proc_if_read(char *buf , char **start, off_t offset,
fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr);
fmt_ip6(b2, pkt_dev->min_in6_saddr.s6_addr);
fmt_ip6(b3, pkt_dev->max_in6_saddr.s6_addr);
- p += sprintf(p, " saddr: %s min_saddr: %s max_saddr: %s\n", b1, b2, b3);
+ seq_printf(seq, " saddr: %s min_saddr: %s max_saddr: %s\n", b1, b2, b3);
fmt_ip6(b1, pkt_dev->in6_daddr.s6_addr);
fmt_ip6(b2, pkt_dev->min_in6_daddr.s6_addr);
fmt_ip6(b3, pkt_dev->max_in6_daddr.s6_addr);
- p += sprintf(p, " daddr: %s min_daddr: %s max_daddr: %s\n", b1, b2, b3);
+ seq_printf(seq, " daddr: %s min_daddr: %s max_daddr: %s\n", b1, b2, b3);
}
else
- p += sprintf(p, " dst_min: %s dst_max: %s\n src_min: %s src_max: %s\n",
- pkt_dev->dst_min, pkt_dev->dst_max, pkt_dev->src_min, pkt_dev->src_max);
+ seq_printf(seq," dst_min: %s dst_max: %s\n src_min: %s src_max: %s\n",
+ pkt_dev->dst_min, pkt_dev->dst_max, pkt_dev->src_min, pkt_dev->src_max);
- p += sprintf(p, " src_mac: ");
+ seq_puts(seq, " src_mac: ");
if ((pkt_dev->src_mac[0] == 0) &&
(pkt_dev->src_mac[1] == 0) &&
@@ -648,89 +603,89 @@ static int proc_if_read(char *buf , char **start, off_t offset,
(pkt_dev->src_mac[5] == 0))
for (i = 0; i < 6; i++)
- p += sprintf(p, "%02X%s", pkt_dev->odev->dev_addr[i], i == 5 ? " " : ":");
+ seq_printf(seq, "%02X%s", pkt_dev->odev->dev_addr[i], i == 5 ? " " : ":");
else
for (i = 0; i < 6; i++)
- p += sprintf(p, "%02X%s", pkt_dev->src_mac[i], i == 5 ? " " : ":");
+ seq_printf(seq, "%02X%s", pkt_dev->src_mac[i], i == 5 ? " " : ":");
- p += sprintf(p, "dst_mac: ");
+ seq_printf(seq, "dst_mac: ");
for (i = 0; i < 6; i++)
- p += sprintf(p, "%02X%s", pkt_dev->dst_mac[i], i == 5 ? "\n" : ":");
+ seq_printf(seq, "%02X%s", pkt_dev->dst_mac[i], i == 5 ? "\n" : ":");
- p += sprintf(p, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n",
- pkt_dev->udp_src_min, pkt_dev->udp_src_max, pkt_dev->udp_dst_min,
- pkt_dev->udp_dst_max);
+ seq_printf(seq, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n",
+ pkt_dev->udp_src_min, pkt_dev->udp_src_max, pkt_dev->udp_dst_min,
+ pkt_dev->udp_dst_max);
- p += sprintf(p, " src_mac_count: %d dst_mac_count: %d \n Flags: ",
- pkt_dev->src_mac_count, pkt_dev->dst_mac_count);
+ seq_printf(seq, " src_mac_count: %d dst_mac_count: %d \n Flags: ",
+ pkt_dev->src_mac_count, pkt_dev->dst_mac_count);
if (pkt_dev->flags & F_IPV6)
- p += sprintf(p, "IPV6 ");
+ seq_printf(seq, "IPV6 ");
if (pkt_dev->flags & F_IPSRC_RND)
- p += sprintf(p, "IPSRC_RND ");
+ seq_printf(seq, "IPSRC_RND ");
if (pkt_dev->flags & F_IPDST_RND)
- p += sprintf(p, "IPDST_RND ");
+ seq_printf(seq, "IPDST_RND ");
if (pkt_dev->flags & F_TXSIZE_RND)
- p += sprintf(p, "TXSIZE_RND ");
+ seq_printf(seq, "TXSIZE_RND ");
if (pkt_dev->flags & F_UDPSRC_RND)
- p += sprintf(p, "UDPSRC_RND ");
+ seq_printf(seq, "UDPSRC_RND ");
if (pkt_dev->flags & F_UDPDST_RND)
- p += sprintf(p, "UDPDST_RND ");
+ seq_printf(seq, "UDPDST_RND ");
if (pkt_dev->flags & F_MACSRC_RND)
- p += sprintf(p, "MACSRC_RND ");
+ seq_printf(seq, "MACSRC_RND ");
if (pkt_dev->flags & F_MACDST_RND)
- p += sprintf(p, "MACDST_RND ");
+ seq_printf(seq, "MACDST_RND ");
- p += sprintf(p, "\n");
+ seq_puts(seq, "\n");
sa = pkt_dev->started_at;
stopped = pkt_dev->stopped_at;
if (pkt_dev->running)
stopped = now; /* not really stopped, more like last-running-at */
- p += sprintf(p, "Current:\n pkts-sofar: %llu errors: %llu\n started: %lluus stopped: %lluus idle: %lluus\n",
- (unsigned long long) pkt_dev->sofar,
- (unsigned long long) pkt_dev->errors,
- (unsigned long long) sa,
- (unsigned long long) stopped,
- (unsigned long long) pkt_dev->idle_acc);
+ seq_printf(seq, "Current:\n pkts-sofar: %llu errors: %llu\n started: %lluus stopped: %lluus idle: %lluus\n",
+ (unsigned long long) pkt_dev->sofar,
+ (unsigned long long) pkt_dev->errors,
+ (unsigned long long) sa,
+ (unsigned long long) stopped,
+ (unsigned long long) pkt_dev->idle_acc);
- p += sprintf(p, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n",
- pkt_dev->seq_num, pkt_dev->cur_dst_mac_offset, pkt_dev->cur_src_mac_offset);
+ seq_printf(seq, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n",
+ pkt_dev->seq_num, pkt_dev->cur_dst_mac_offset,
+ pkt_dev->cur_src_mac_offset);
if(pkt_dev->flags & F_IPV6) {
char b1[128], b2[128];
fmt_ip6(b1, pkt_dev->cur_in6_daddr.s6_addr);
fmt_ip6(b2, pkt_dev->cur_in6_saddr.s6_addr);
- p += sprintf(p, " cur_saddr: %s cur_daddr: %s\n", b2, b1);
+ seq_printf(seq, " cur_saddr: %s cur_daddr: %s\n", b2, b1);
}
else
- p += sprintf(p, " cur_saddr: 0x%x cur_daddr: 0x%x\n",
- pkt_dev->cur_saddr, pkt_dev->cur_daddr);
+ seq_printf(seq, " cur_saddr: 0x%x cur_daddr: 0x%x\n",
+ pkt_dev->cur_saddr, pkt_dev->cur_daddr);
- p += sprintf(p, " cur_udp_dst: %d cur_udp_src: %d\n",
- pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src);
+ seq_printf(seq, " cur_udp_dst: %d cur_udp_src: %d\n",
+ pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src);
- p += sprintf(p, " flows: %u\n", pkt_dev->nflows);
+ seq_printf(seq, " flows: %u\n", pkt_dev->nflows);
if (pkt_dev->result[0])
- p += sprintf(p, "Result: %s\n", pkt_dev->result);
+ seq_printf(seq, "Result: %s\n", pkt_dev->result);
else
- p += sprintf(p, "Result: Idle\n");
- *eof = 1;
+ seq_printf(seq, "Result: Idle\n");
- return p - buf;
+ return 0;
}
@@ -802,13 +757,14 @@ done_str:
return i;
}
-static int proc_if_write(struct file *file, const char __user *user_buffer,
- unsigned long count, void *data)
+static ssize_t pktgen_if_write(struct file *file, const char __user *user_buffer,
+ size_t count, loff_t *offset)
{
+ struct seq_file *seq = (struct seq_file *) file->private_data;
+ struct pktgen_dev *pkt_dev = seq->private;
int i = 0, max, len;
char name[16], valstr[32];
unsigned long value = 0;
- struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data);
char* pg_result = NULL;
int tmp = 0;
char buf[128];
@@ -849,7 +805,8 @@ static int proc_if_write(struct file *file, const char __user *user_buffer,
if (copy_from_user(tb, user_buffer, count))
return -EFAULT;
tb[count] = 0;
- printk("pktgen: %s,%lu buffer -:%s:-\n", name, count, tb);
+ printk("pktgen: %s,%lu buffer -:%s:-\n", name,
+ (unsigned long) count, tb);
}
if (!strcmp(name, "min_pkt_size")) {
@@ -1335,92 +1292,98 @@ static int proc_if_write(struct file *file, const char __user *user_buffer,
return -EINVAL;
}
-static int proc_thread_read(char *buf , char **start, off_t offset,
- int len, int *eof, void *data)
+static int pktgen_if_open(struct inode *inode, struct file *file)
{
- char *p;
- struct pktgen_thread *t = (struct pktgen_thread*)(data);
- struct pktgen_dev *pkt_dev = NULL;
+ return single_open(file, pktgen_if_show, PDE(inode)->data);
+}
+static struct file_operations pktgen_if_fops = {
+ .owner = THIS_MODULE,
+ .open = pktgen_if_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .write = pktgen_if_write,
+ .release = single_release,
+};
- if (!t) {
- printk("pktgen: ERROR: could not find thread in proc_thread_read\n");
- return -EINVAL;
- }
+static int pktgen_thread_show(struct seq_file *seq, void *v)
+{
+ struct pktgen_thread *t = seq->private;
+ struct pktgen_dev *pkt_dev = NULL;
- p = buf;
- p += sprintf(p, "Name: %s max_before_softirq: %d\n",
+ BUG_ON(!t);
+
+ seq_printf(seq, "Name: %s max_before_softirq: %d\n",
t->name, t->max_before_softirq);
- p += sprintf(p, "Running: ");
+ seq_printf(seq, "Running: ");
if_lock(t);
for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next)
if(pkt_dev->running)
- p += sprintf(p, "%s ", pkt_dev->ifname);
+ seq_printf(seq, "%s ", pkt_dev->ifname);
- p += sprintf(p, "\nStopped: ");
+ seq_printf(seq, "\nStopped: ");
for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next)
if(!pkt_dev->running)
- p += sprintf(p, "%s ", pkt_dev->ifname);
+ seq_printf(seq, "%s ", pkt_dev->ifname);
if (t->result[0])
- p += sprintf(p, "\nResult: %s\n", t->result);
+ seq_printf(seq, "\nResult: %s\n", t->result);
else
- p += sprintf(p, "\nResult: NA\n");
-
- *eof = 1;
+ seq_printf(seq, "\nResult: NA\n");
if_unlock(t);
- return p - buf;
+ return 0;
}
-static int proc_thread_write(struct file *file, const char __user *user_buffer,
- unsigned long count, void *data)
+static ssize_t pktgen_thread_write(struct file *file,
+ const char __user *user_buffer,
+ size_t count, loff_t *offset)
{
+ struct seq_file *seq = (struct seq_file *) file->private_data;
+ struct pktgen_thread *t = seq->private;
int i = 0, max, len, ret;
char name[40];
- struct pktgen_thread *t;
char *pg_result;
unsigned long value = 0;
-
+
if (count < 1) {
// sprintf(pg_result, "Wrong command format");
return -EINVAL;
}
-
+
max = count - i;
len = count_trail_chars(&user_buffer[i], max);
- if (len < 0)
- return len;
-
+ if (len < 0)
+ return len;
+
i += len;
-
+
/* Read variable name */
len = strn_len(&user_buffer[i], sizeof(name) - 1);
- if (len < 0)
- return len;
+ if (len < 0)
+ return len;
memset(name, 0, sizeof(name));
if (copy_from_user(name, &user_buffer[i], len))
return -EFAULT;
i += len;
-
+
max = count -i;
len = count_trail_chars(&user_buffer[i], max);
- if (len < 0)
- return len;
-
+ if (len < 0)
+ return len;
+
i += len;
- if (debug)
- printk("pktgen: t=%s, count=%lu\n", name, count);
-
+ if (debug)
+ printk("pktgen: t=%s, count=%lu\n", name,
+ (unsigned long) count);
- t = (struct pktgen_thread*)(data);
if(!t) {
printk("pktgen: ERROR: No thread\n");
ret = -EINVAL;
@@ -1452,8 +1415,7 @@ static int proc_thread_write(struct file *file, const char __user *user_buffer,
thread_lock();
t->control |= T_REMDEV;
thread_unlock();
- current->state = TASK_INTERRUPTIBLE;
- schedule_timeout(HZ/8); /* Propagate thread->control */
+ schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */
ret = count;
sprintf(pg_result, "OK: rem_device_all");
goto out;
@@ -1475,32 +1437,19 @@ static int proc_thread_write(struct file *file, const char __user *user_buffer,
return ret;
}
-static int create_proc_dir(void)
+static int pktgen_thread_open(struct inode *inode, struct file *file)
{
- int len;
- /* does proc_dir already exists */
- len = strlen(PG_PROC_DIR);
-
- for (pg_proc_dir = proc_net->subdir; pg_proc_dir; pg_proc_dir=pg_proc_dir->next) {
- if ((pg_proc_dir->namelen == len) &&
- (! memcmp(pg_proc_dir->name, PG_PROC_DIR, len)))
- break;
- }
-
- if (!pg_proc_dir)
- pg_proc_dir = create_proc_entry(PG_PROC_DIR, S_IFDIR, proc_net);
-
- if (!pg_proc_dir)
- return -ENODEV;
-
- return 0;
+ return single_open(file, pktgen_thread_show, PDE(inode)->data);
}
-static int remove_proc_dir(void)
-{
- remove_proc_entry(PG_PROC_DIR, proc_net);
- return 0;
-}
+static struct file_operations pktgen_thread_fops = {
+ .owner = THIS_MODULE,
+ .open = pktgen_thread_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .write = pktgen_thread_write,
+ .release = single_release,
+};
/* Think find or remove for NN */
static struct pktgen_dev *__pktgen_NN_threads(const char* ifname, int remove)
@@ -1679,13 +1628,12 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
struct in_device *in_dev;
rcu_read_lock();
- in_dev = __in_dev_get(pkt_dev->odev);
+ in_dev = __in_dev_get_rcu(pkt_dev->odev);
if (in_dev) {
if (in_dev->ifa_list) {
pkt_dev->saddr_min = in_dev->ifa_list->ifa_address;
pkt_dev->saddr_max = pkt_dev->saddr_min;
}
- __in_dev_put(in_dev);
}
rcu_read_unlock();
}
@@ -1715,11 +1663,10 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us)
start = now = getCurUs();
printk(KERN_INFO "sleeping for %d\n", (int)(spin_until_us - now));
while (now < spin_until_us) {
- /* TODO: optimise sleeping behavior */
- if (spin_until_us - now > (1000000/HZ)+1) {
- current->state = TASK_INTERRUPTIBLE;
- schedule_timeout(1);
- } else if (spin_until_us - now > 100) {
+ /* TODO: optimize sleeping behavior */
+ if (spin_until_us - now > jiffies_to_usecs(1)+1)
+ schedule_timeout_interruptible(1);
+ else if (spin_until_us - now > 100) {
do_softirq();
if (!pkt_dev->running)
return;
@@ -2375,7 +2322,7 @@ static void pktgen_stop_all_threads_ifs(void)
pktgen_stop(t);
t = t->next;
}
- thread_unlock();
+ thread_unlock();
}
static int thread_is_running(struct pktgen_thread *t )
@@ -2449,8 +2396,7 @@ static void pktgen_run_all_threads(void)
}
thread_unlock();
- current->state = TASK_INTERRUPTIBLE;
- schedule_timeout(HZ/8); /* Propagate thread->control */
+ schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */
pktgen_wait_all_threads_run();
}
@@ -2567,10 +2513,9 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
struct pktgen_thread *tmp = pktgen_threads;
- if (strlen(t->fname))
- remove_proc_entry(t->fname, NULL);
+ remove_proc_entry(t->name, pg_proc_dir);
- thread_lock();
+ thread_lock();
if (tmp == t)
pktgen_threads = tmp->next;
@@ -2840,7 +2785,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char* i
if_lock(t);
for(pkt_dev=t->if_list; pkt_dev; pkt_dev = pkt_dev->next ) {
- if (strcmp(pkt_dev->ifname, ifname) == 0) {
+ if (strncmp(pkt_dev->ifname, ifname, IFNAMSIZ) == 0) {
break;
}
}
@@ -2879,74 +2824,70 @@ static int add_dev_to_thread(struct pktgen_thread *t, struct pktgen_dev *pkt_dev
static int pktgen_add_device(struct pktgen_thread *t, const char* ifname)
{
struct pktgen_dev *pkt_dev;
+ struct proc_dir_entry *pe;
/* We don't allow a device to be on several threads */
- if( (pkt_dev = __pktgen_NN_threads(ifname, FIND)) == NULL) {
-
- pkt_dev = kmalloc(sizeof(struct pktgen_dev), GFP_KERNEL);
- if (!pkt_dev)
- return -ENOMEM;
+ pkt_dev = __pktgen_NN_threads(ifname, FIND);
+ if (pkt_dev) {
+ printk("pktgen: ERROR: interface already used.\n");
+ return -EBUSY;
+ }
- memset(pkt_dev, 0, sizeof(struct pktgen_dev));
+ pkt_dev = kzalloc(sizeof(struct pktgen_dev), GFP_KERNEL);
+ if (!pkt_dev)
+ return -ENOMEM;
- pkt_dev->flows = vmalloc(MAX_CFLOWS*sizeof(struct flow_state));
- if (pkt_dev->flows == NULL) {
- kfree(pkt_dev);
- return -ENOMEM;
- }
- memset(pkt_dev->flows, 0, MAX_CFLOWS*sizeof(struct flow_state));
-
- pkt_dev->min_pkt_size = ETH_ZLEN;
- pkt_dev->max_pkt_size = ETH_ZLEN;
- pkt_dev->nfrags = 0;
- pkt_dev->clone_skb = pg_clone_skb_d;
- pkt_dev->delay_us = pg_delay_d / 1000;
- pkt_dev->delay_ns = pg_delay_d % 1000;
- pkt_dev->count = pg_count_d;
- pkt_dev->sofar = 0;
- pkt_dev->udp_src_min = 9; /* sink port */
- pkt_dev->udp_src_max = 9;
- pkt_dev->udp_dst_min = 9;
- pkt_dev->udp_dst_max = 9;
-
- strncpy(pkt_dev->ifname, ifname, 31);
- sprintf(pkt_dev->fname, "net/%s/%s", PG_PROC_DIR, ifname);
-
- if (! pktgen_setup_dev(pkt_dev)) {
- printk("pktgen: ERROR: pktgen_setup_dev failed.\n");
- if (pkt_dev->flows)
- vfree(pkt_dev->flows);
- kfree(pkt_dev);
- return -ENODEV;
- }
+ pkt_dev->flows = vmalloc(MAX_CFLOWS*sizeof(struct flow_state));
+ if (pkt_dev->flows == NULL) {
+ kfree(pkt_dev);
+ return -ENOMEM;
+ }
+ memset(pkt_dev->flows, 0, MAX_CFLOWS*sizeof(struct flow_state));
- pkt_dev->proc_ent = create_proc_entry(pkt_dev->fname, 0600, NULL);
- if (!pkt_dev->proc_ent) {
- printk("pktgen: cannot create %s procfs entry.\n", pkt_dev->fname);
- if (pkt_dev->flows)
- vfree(pkt_dev->flows);
- kfree(pkt_dev);
- return -EINVAL;
- }
- pkt_dev->proc_ent->read_proc = proc_if_read;
- pkt_dev->proc_ent->write_proc = proc_if_write;
- pkt_dev->proc_ent->data = (void*)(pkt_dev);
- pkt_dev->proc_ent->owner = THIS_MODULE;
+ pkt_dev->min_pkt_size = ETH_ZLEN;
+ pkt_dev->max_pkt_size = ETH_ZLEN;
+ pkt_dev->nfrags = 0;
+ pkt_dev->clone_skb = pg_clone_skb_d;
+ pkt_dev->delay_us = pg_delay_d / 1000;
+ pkt_dev->delay_ns = pg_delay_d % 1000;
+ pkt_dev->count = pg_count_d;
+ pkt_dev->sofar = 0;
+ pkt_dev->udp_src_min = 9; /* sink port */
+ pkt_dev->udp_src_max = 9;
+ pkt_dev->udp_dst_min = 9;
+ pkt_dev->udp_dst_max = 9;
+
+ strncpy(pkt_dev->ifname, ifname, IFNAMSIZ);
+
+ if (! pktgen_setup_dev(pkt_dev)) {
+ printk("pktgen: ERROR: pktgen_setup_dev failed.\n");
+ if (pkt_dev->flows)
+ vfree(pkt_dev->flows);
+ kfree(pkt_dev);
+ return -ENODEV;
+ }
+
+ pe = create_proc_entry(ifname, 0600, pg_proc_dir);
+ if (!pe) {
+ printk("pktgen: cannot create %s/%s procfs entry.\n",
+ PG_PROC_DIR, ifname);
+ if (pkt_dev->flows)
+ vfree(pkt_dev->flows);
+ kfree(pkt_dev);
+ return -EINVAL;
+ }
+ pe->proc_fops = &pktgen_if_fops;
+ pe->data = pkt_dev;
- return add_dev_to_thread(t, pkt_dev);
- }
- else {
- printk("pktgen: ERROR: interface already used.\n");
- return -EBUSY;
- }
+ return add_dev_to_thread(t, pkt_dev);
}
static struct pktgen_thread *pktgen_find_thread(const char* name)
{
struct pktgen_thread *t = NULL;
- thread_lock();
+ thread_lock();
t = pktgen_threads;
while (t) {
@@ -2962,6 +2903,7 @@ static struct pktgen_thread *pktgen_find_thread(const char* name)
static int pktgen_create_thread(const char* name, int cpu)
{
struct pktgen_thread *t = NULL;
+ struct proc_dir_entry *pe;
if (strlen(name) > 31) {
printk("pktgen: ERROR: Thread name cannot be more than 31 characters.\n");
@@ -2973,28 +2915,26 @@ static int pktgen_create_thread(const char* name, int cpu)
return -EINVAL;
}
- t = (struct pktgen_thread*)(kmalloc(sizeof(struct pktgen_thread), GFP_KERNEL));
+ t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL);
if (!t) {
printk("pktgen: ERROR: out of memory, can't create new thread.\n");
return -ENOMEM;
}
- memset(t, 0, sizeof(struct pktgen_thread));
strcpy(t->name, name);
spin_lock_init(&t->if_lock);
t->cpu = cpu;
- sprintf(t->fname, "net/%s/%s", PG_PROC_DIR, t->name);
- t->proc_ent = create_proc_entry(t->fname, 0600, NULL);
- if (!t->proc_ent) {
- printk("pktgen: cannot create %s procfs entry.\n", t->fname);
+ pe = create_proc_entry(t->name, 0600, pg_proc_dir);
+ if (!pe) {
+ printk("pktgen: cannot create %s/%s procfs entry.\n",
+ PG_PROC_DIR, t->name);
kfree(t);
return -EINVAL;
}
- t->proc_ent->read_proc = proc_thread_read;
- t->proc_ent->write_proc = proc_thread_write;
- t->proc_ent->data = (void*)(t);
- t->proc_ent->owner = THIS_MODULE;
+
+ pe->proc_fops = &pktgen_thread_fops;
+ pe->data = t;
t->next = pktgen_threads;
pktgen_threads = t;
@@ -3049,8 +2989,7 @@ static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_
/* Clean up proc file system */
- if (strlen(pkt_dev->fname))
- remove_proc_entry(pkt_dev->fname, NULL);
+ remove_proc_entry(pkt_dev->ifname, pg_proc_dir);
if (pkt_dev->flows)
vfree(pkt_dev->flows);
@@ -3061,31 +3000,31 @@ static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_
static int __init pg_init(void)
{
int cpu;
- printk(version);
+ struct proc_dir_entry *pe;
- module_fname[0] = 0;
+ printk(version);
- create_proc_dir();
+ pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net);
+ if (!pg_proc_dir)
+ return -ENODEV;
+ pg_proc_dir->owner = THIS_MODULE;
- sprintf(module_fname, "net/%s/pgctrl", PG_PROC_DIR);
- module_proc_ent = create_proc_entry(module_fname, 0600, NULL);
- if (!module_proc_ent) {
- printk("pktgen: ERROR: cannot create %s procfs entry.\n", module_fname);
+ pe = create_proc_entry(PGCTRL, 0600, pg_proc_dir);
+ if (pe == NULL) {
+ printk("pktgen: ERROR: cannot create %s procfs entry.\n", PGCTRL);
+ proc_net_remove(PG_PROC_DIR);
return -EINVAL;
}
- module_proc_ent->proc_fops = &pktgen_fops;
- module_proc_ent->data = NULL;
+ pe->proc_fops = &pktgen_fops;
+ pe->data = NULL;
/* Register us to receive netdevice events */
register_netdevice_notifier(&pktgen_notifier_block);
- for (cpu = 0; cpu < NR_CPUS ; cpu++) {
+ for_each_online_cpu(cpu) {
char buf[30];
- if (!cpu_online(cpu))
- continue;
-
sprintf(buf, "kpktgend_%i", cpu);
pktgen_create_thread(buf, cpu);
}
@@ -3110,10 +3049,8 @@ static void __exit pg_cleanup(void)
unregister_netdevice_notifier(&pktgen_notifier_block);
/* Clean up proc file system */
-
- remove_proc_entry(module_fname, NULL);
-
- remove_proc_dir();
+ remove_proc_entry(PGCTRL, pg_proc_dir);
+ proc_net_remove(PG_PROC_DIR);
}
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index bb55675f0685..b8203de5ff07 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -32,7 +32,6 @@
* Further increasing requires to change hash table size.
*/
int sysctl_max_syn_backlog = 256;
-EXPORT_SYMBOL(sysctl_max_syn_backlog);
int reqsk_queue_alloc(struct request_sock_queue *queue,
const int nr_table_entries)
@@ -53,6 +52,8 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
rwlock_init(&queue->syn_wait_lock);
queue->rskq_accept_head = queue->rskq_accept_head = NULL;
+ queue->rskq_defer_accept = 0;
+ lopt->nr_table_entries = nr_table_entries;
write_lock_bh(&queue->syn_wait_lock);
queue->listen_opt = lopt;
@@ -62,3 +63,28 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
}
EXPORT_SYMBOL(reqsk_queue_alloc);
+
+void reqsk_queue_destroy(struct request_sock_queue *queue)
+{
+ /* make all the listen_opt local to us */
+ struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
+
+ if (lopt->qlen != 0) {
+ int i;
+
+ for (i = 0; i < lopt->nr_table_entries; i++) {
+ struct request_sock *req;
+
+ while ((req = lopt->syn_table[i]) != NULL) {
+ lopt->syn_table[i] = req->dl_next;
+ lopt->qlen--;
+ reqsk_free(req);
+ }
+ }
+ }
+
+ BUG_TRAP(lopt->qlen == 0);
+ kfree(lopt);
+}
+
+EXPORT_SYMBOL(reqsk_queue_destroy);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4b1bb30e6381..9bed7569ce3f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -148,7 +148,7 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
{
int err = 0;
- NETLINK_CB(skb).dst_groups = group;
+ NETLINK_CB(skb).dst_group = group;
if (echo)
atomic_inc(&skb->users);
netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
@@ -458,8 +458,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
kfree_skb(skb);
return;
}
- NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
- netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL);
+ NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
+ netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL);
}
static int rtnetlink_done(struct netlink_callback *cb)
@@ -708,7 +708,8 @@ void __init rtnetlink_init(void)
if (!rta_buf)
panic("rtnetlink_init: cannot allocate rta_buf\n");
- rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv);
+ rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
+ THIS_MODULE);
if (rtnl == NULL)
panic("rtnetlink_init: cannot initialize rtnetlink\n");
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7eab867ede59..95501e40100e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -68,7 +68,8 @@
#include <asm/uaccess.h>
#include <asm/system.h>
-static kmem_cache_t *skbuff_head_cache;
+static kmem_cache_t *skbuff_head_cache __read_mostly;
+static kmem_cache_t *skbuff_fclone_cache __read_mostly;
/*
* Keep out-of-line to prevent kernel bloat.
@@ -118,9 +119,11 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
*/
/**
- * alloc_skb - allocate a network buffer
+ * __alloc_skb - allocate a network buffer
* @size: size to allocate
* @gfp_mask: allocation mask
+ * @fclone: allocate from fclone cache instead of head cache
+ * and allocate a cloned (child) skb
*
* Allocate a new &sk_buff. The returned buffer has no headroom and a
* tail room of size bytes. The object has a reference count of one.
@@ -129,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
* Buffers may only be allocated from interrupts using a @gfp_mask of
* %GFP_ATOMIC.
*/
-struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
+struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
+ int fclone)
{
struct sk_buff *skb;
u8 *data;
/* Get the HEAD */
- skb = kmem_cache_alloc(skbuff_head_cache,
- gfp_mask & ~__GFP_DMA);
+ if (fclone)
+ skb = kmem_cache_alloc(skbuff_fclone_cache,
+ gfp_mask & ~__GFP_DMA);
+ else
+ skb = kmem_cache_alloc(skbuff_head_cache,
+ gfp_mask & ~__GFP_DMA);
+
if (!skb)
goto out;
@@ -153,12 +162,22 @@ struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
skb->data = data;
skb->tail = data;
skb->end = data + size;
+ if (fclone) {
+ struct sk_buff *child = skb + 1;
+ atomic_t *fclone_ref = (atomic_t *) (child + 1);
+ skb->fclone = SKB_FCLONE_ORIG;
+ atomic_set(fclone_ref, 1);
+
+ child->fclone = SKB_FCLONE_UNAVAILABLE;
+ }
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->tso_size = 0;
skb_shinfo(skb)->tso_segs = 0;
skb_shinfo(skb)->frag_list = NULL;
+ skb_shinfo(skb)->ufo_size = 0;
+ skb_shinfo(skb)->ip6_frag_id = 0;
out:
return skb;
nodata:
@@ -183,7 +202,7 @@ nodata:
*/
struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
unsigned int size,
- unsigned int __nocast gfp_mask)
+ gfp_t gfp_mask)
{
struct sk_buff *skb;
u8 *data;
@@ -266,8 +285,34 @@ void skb_release_data(struct sk_buff *skb)
*/
void kfree_skbmem(struct sk_buff *skb)
{
+ struct sk_buff *other;
+ atomic_t *fclone_ref;
+
skb_release_data(skb);
- kmem_cache_free(skbuff_head_cache, skb);
+ switch (skb->fclone) {
+ case SKB_FCLONE_UNAVAILABLE:
+ kmem_cache_free(skbuff_head_cache, skb);
+ break;
+
+ case SKB_FCLONE_ORIG:
+ fclone_ref = (atomic_t *) (skb + 2);
+ if (atomic_dec_and_test(fclone_ref))
+ kmem_cache_free(skbuff_fclone_cache, skb);
+ break;
+
+ case SKB_FCLONE_CLONE:
+ fclone_ref = (atomic_t *) (skb + 1);
+ other = skb - 1;
+
+ /* The clone portion is available for
+ * fast-cloning again.
+ */
+ skb->fclone = SKB_FCLONE_UNAVAILABLE;
+
+ if (atomic_dec_and_test(fclone_ref))
+ kmem_cache_free(skbuff_fclone_cache, other);
+ break;
+ };
}
/**
@@ -281,8 +326,6 @@ void kfree_skbmem(struct sk_buff *skb)
void __kfree_skb(struct sk_buff *skb)
{
- BUG_ON(skb->list != NULL);
-
dst_release(skb->dst);
#ifdef CONFIG_XFRM
secpath_put(skb->sp);
@@ -302,7 +345,6 @@ void __kfree_skb(struct sk_buff *skb)
skb->tc_index = 0;
#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = 0;
- skb->tc_classid = 0;
#endif
#endif
@@ -323,21 +365,29 @@ void __kfree_skb(struct sk_buff *skb)
* %GFP_ATOMIC.
*/
-struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
+struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
{
- struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
-
- if (!n)
- return NULL;
+ struct sk_buff *n;
+
+ n = skb + 1;
+ if (skb->fclone == SKB_FCLONE_ORIG &&
+ n->fclone == SKB_FCLONE_UNAVAILABLE) {
+ atomic_t *fclone_ref = (atomic_t *) (n + 1);
+ n->fclone = SKB_FCLONE_CLONE;
+ atomic_inc(fclone_ref);
+ } else {
+ n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+ if (!n)
+ return NULL;
+ n->fclone = SKB_FCLONE_UNAVAILABLE;
+ }
#define C(x) n->x = skb->x
n->next = n->prev = NULL;
- n->list = NULL;
n->sk = NULL;
- C(stamp);
+ C(tstamp);
C(dev);
- C(real_dev);
C(h);
C(nh);
C(mac);
@@ -361,18 +411,17 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
n->destructor = NULL;
#ifdef CONFIG_NETFILTER
C(nfmark);
- C(nfcache);
C(nfct);
nf_conntrack_get(skb->nfct);
C(nfctinfo);
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+ C(ipvs_property);
+#endif
#ifdef CONFIG_BRIDGE_NETFILTER
C(nf_bridge);
nf_bridge_get(skb->nf_bridge);
#endif
#endif /*CONFIG_NETFILTER*/
-#if defined(CONFIG_HIPPI)
- C(private);
-#endif
#ifdef CONFIG_NET_SCHED
C(tc_index);
#ifdef CONFIG_NET_CLS_ACT
@@ -380,7 +429,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
C(input_dev);
- C(tc_classid);
#endif
#endif
@@ -404,10 +452,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
*/
unsigned long offset = new->data - old->data;
- new->list = NULL;
new->sk = NULL;
new->dev = old->dev;
- new->real_dev = old->real_dev;
new->priority = old->priority;
new->protocol = old->protocol;
new->dst = dst_clone(old->dst);
@@ -419,15 +465,18 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->mac.raw = old->mac.raw + offset;
memcpy(new->cb, old->cb, sizeof(old->cb));
new->local_df = old->local_df;
+ new->fclone = SKB_FCLONE_UNAVAILABLE;
new->pkt_type = old->pkt_type;
- new->stamp = old->stamp;
+ new->tstamp = old->tstamp;
new->destructor = NULL;
#ifdef CONFIG_NETFILTER
new->nfmark = old->nfmark;
- new->nfcache = old->nfcache;
new->nfct = old->nfct;
nf_conntrack_get(old->nfct);
new->nfctinfo = old->nfctinfo;
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+ new->ipvs_property = old->ipvs_property;
+#endif
#ifdef CONFIG_BRIDGE_NETFILTER
new->nf_bridge = old->nf_bridge;
nf_bridge_get(old->nf_bridge);
@@ -461,7 +510,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
* header is going to be modified. Use pskb_copy() instead.
*/
-struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_mask)
+struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
{
int headerlen = skb->data - skb->head;
/*
@@ -500,7 +549,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_ma
* The returned buffer has a reference count of 1.
*/
-struct sk_buff *pskb_copy(struct sk_buff *skb, unsigned int __nocast gfp_mask)
+struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
{
/*
* Allocate the copy buffer
@@ -559,7 +608,7 @@ out:
*/
int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
- unsigned int __nocast gfp_mask)
+ gfp_t gfp_mask)
{
int i;
u8 *data;
@@ -650,7 +699,7 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
*/
struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
int newheadroom, int newtailroom,
- unsigned int __nocast gfp_mask)
+ gfp_t gfp_mask)
{
/*
* Allocate the copy buffer
@@ -1344,50 +1393,43 @@ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
__skb_queue_tail(list, newsk);
spin_unlock_irqrestore(&list->lock, flags);
}
+
/**
* skb_unlink - remove a buffer from a list
* @skb: buffer to remove
+ * @list: list to use
*
- * Place a packet after a given packet in a list. The list locks are taken
- * and this function is atomic with respect to other list locked calls
+ * Remove a packet from a list. The list locks are taken and this
+ * function is atomic with respect to other list locked calls
*
- * Works even without knowing the list it is sitting on, which can be
- * handy at times. It also means that THE LIST MUST EXIST when you
- * unlink. Thus a list must have its contents unlinked before it is
- * destroyed.
+ * You must know what list the SKB is on.
*/
-void skb_unlink(struct sk_buff *skb)
+void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
{
- struct sk_buff_head *list = skb->list;
-
- if (list) {
- unsigned long flags;
+ unsigned long flags;
- spin_lock_irqsave(&list->lock, flags);
- if (skb->list == list)
- __skb_unlink(skb, skb->list);
- spin_unlock_irqrestore(&list->lock, flags);
- }
+ spin_lock_irqsave(&list->lock, flags);
+ __skb_unlink(skb, list);
+ spin_unlock_irqrestore(&list->lock, flags);
}
-
/**
* skb_append - append a buffer
* @old: buffer to insert after
* @newsk: buffer to insert
+ * @list: list to use
*
* Place a packet after a given packet in a list. The list locks are taken
* and this function is atomic with respect to other list locked calls.
* A buffer cannot be placed on two lists at the same time.
*/
-
-void skb_append(struct sk_buff *old, struct sk_buff *newsk)
+void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
{
unsigned long flags;
- spin_lock_irqsave(&old->list->lock, flags);
- __skb_append(old, newsk);
- spin_unlock_irqrestore(&old->list->lock, flags);
+ spin_lock_irqsave(&list->lock, flags);
+ __skb_append(old, newsk, list);
+ spin_unlock_irqrestore(&list->lock, flags);
}
@@ -1395,19 +1437,21 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk)
* skb_insert - insert a buffer
* @old: buffer to insert before
* @newsk: buffer to insert
+ * @list: list to use
+ *
+ * Place a packet before a given packet in a list. The list locks are
+ * taken and this function is atomic with respect to other list locked
+ * calls.
*
- * Place a packet before a given packet in a list. The list locks are taken
- * and this function is atomic with respect to other list locked calls
* A buffer cannot be placed on two lists at the same time.
*/
-
-void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
+void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
{
unsigned long flags;
- spin_lock_irqsave(&old->list->lock, flags);
- __skb_insert(newsk, old->prev, old, old->list);
- spin_unlock_irqrestore(&old->list->lock, flags);
+ spin_lock_irqsave(&list->lock, flags);
+ __skb_insert(newsk, old->prev, old, list);
+ spin_unlock_irqrestore(&list->lock, flags);
}
#if 0
@@ -1654,6 +1698,78 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
return textsearch_find(config, state);
}
+/**
+ * skb_append_datato_frags: - append the user data to a skb
+ * @sk: sock structure
+ * @skb: skb structure to be appened with user data.
+ * @getfrag: call back function to be used for getting the user data
+ * @from: pointer to user message iov
+ * @length: length of the iov message
+ *
+ * Description: This procedure append the user data in the fragment part
+ * of the skb if any page alloc fails user this procedure returns -ENOMEM
+ */
+int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
+ int getfrag(void *from, char *to, int offset,
+ int len, int odd, struct sk_buff *skb),
+ void *from, int length)
+{
+ int frg_cnt = 0;
+ skb_frag_t *frag = NULL;
+ struct page *page = NULL;
+ int copy, left;
+ int offset = 0;
+ int ret;
+
+ do {
+ /* Return error if we don't have space for new frag */
+ frg_cnt = skb_shinfo(skb)->nr_frags;
+ if (frg_cnt >= MAX_SKB_FRAGS)
+ return -EFAULT;
+
+ /* allocate a new page for next frag */
+ page = alloc_pages(sk->sk_allocation, 0);
+
+ /* If alloc_page fails just return failure and caller will
+ * free previous allocated pages by doing kfree_skb()
+ */
+ if (page == NULL)
+ return -ENOMEM;
+
+ /* initialize the next frag */
+ sk->sk_sndmsg_page = page;
+ sk->sk_sndmsg_off = 0;
+ skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
+ skb->truesize += PAGE_SIZE;
+ atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
+
+ /* get the new initialized frag */
+ frg_cnt = skb_shinfo(skb)->nr_frags;
+ frag = &skb_shinfo(skb)->frags[frg_cnt - 1];
+
+ /* copy the user data to page */
+ left = PAGE_SIZE - frag->page_offset;
+ copy = (length > left)? left : length;
+
+ ret = getfrag(from, (page_address(frag->page) +
+ frag->page_offset + frag->size),
+ offset, copy, 0, skb);
+ if (ret < 0)
+ return -EFAULT;
+
+ /* copy was successful so update the size parameters */
+ sk->sk_sndmsg_off += copy;
+ frag->size += copy;
+ skb->len += copy;
+ skb->data_len += copy;
+ offset += copy;
+ length -= copy;
+
+ } while (length > 0);
+
+ return 0;
+}
+
void __init skb_init(void)
{
skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
@@ -1663,12 +1779,21 @@ void __init skb_init(void)
NULL, NULL);
if (!skbuff_head_cache)
panic("cannot create skbuff cache");
+
+ skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
+ (2*sizeof(struct sk_buff)) +
+ sizeof(atomic_t),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (!skbuff_fclone_cache)
+ panic("cannot create skbuff cache");
}
EXPORT_SYMBOL(___pskb_trim);
EXPORT_SYMBOL(__kfree_skb);
EXPORT_SYMBOL(__pskb_pull_tail);
-EXPORT_SYMBOL(alloc_skb);
+EXPORT_SYMBOL(__alloc_skb);
EXPORT_SYMBOL(pskb_copy);
EXPORT_SYMBOL(pskb_expand_head);
EXPORT_SYMBOL(skb_checksum);
@@ -1696,3 +1821,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
EXPORT_SYMBOL(skb_seq_read);
EXPORT_SYMBOL(skb_abort_seq_read);
EXPORT_SYMBOL(skb_find_text);
+EXPORT_SYMBOL(skb_append_datato_frags);
diff --git a/net/core/sock.c b/net/core/sock.c
index 12f6d9a2a522..9602ceb3bac9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -260,7 +260,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
if (val > sysctl_wmem_max)
val = sysctl_wmem_max;
-
+set_sndbuf:
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
if ((val * 2) < SOCK_MIN_SNDBUF)
sk->sk_sndbuf = SOCK_MIN_SNDBUF;
@@ -274,6 +274,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
sk->sk_write_space(sk);
break;
+ case SO_SNDBUFFORCE:
+ if (!capable(CAP_NET_ADMIN)) {
+ ret = -EPERM;
+ break;
+ }
+ goto set_sndbuf;
+
case SO_RCVBUF:
/* Don't error on this BSD doesn't and if you think
about it this is right. Otherwise apps have to
@@ -282,7 +289,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
if (val > sysctl_rmem_max)
val = sysctl_rmem_max;
-
+set_rcvbuf:
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
/* FIXME: is this lower bound the right one? */
if ((val * 2) < SOCK_MIN_RCVBUF)
@@ -291,6 +298,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
sk->sk_rcvbuf = val * 2;
break;
+ case SO_RCVBUFFORCE:
+ if (!capable(CAP_NET_ADMIN)) {
+ ret = -EPERM;
+ break;
+ }
+ goto set_rcvbuf;
+
case SO_KEEPALIVE:
#ifdef CONFIG_INET
if (sk->sk_protocol == IPPROTO_TCP)
@@ -327,11 +341,11 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
sock_reset_flag(sk, SOCK_LINGER);
else {
#if (BITS_PER_LONG == 32)
- if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
+ if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
else
#endif
- sk->sk_lingertime = ling.l_linger * HZ;
+ sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
sock_set_flag(sk, SOCK_LINGER);
}
break;
@@ -623,7 +637,7 @@ lenout:
* @prot: struct proto associated with this new sock instance
* @zero_it: if we should zero the newly allocated sock
*/
-struct sock *sk_alloc(int family, unsigned int __nocast priority,
+struct sock *sk_alloc(int family, gfp_t priority,
struct proto *prot, int zero_it)
{
struct sock *sk = NULL;
@@ -646,16 +660,20 @@ struct sock *sk_alloc(int family, unsigned int __nocast priority,
sock_lock_init(sk);
}
- if (security_sk_alloc(sk, family, priority)) {
- if (slab != NULL)
- kmem_cache_free(slab, sk);
- else
- kfree(sk);
- sk = NULL;
- } else
- __module_get(prot->owner);
+ if (security_sk_alloc(sk, family, priority))
+ goto out_free;
+
+ if (!try_module_get(prot->owner))
+ goto out_free;
}
return sk;
+
+out_free:
+ if (slab != NULL)
+ kmem_cache_free(slab, sk);
+ else
+ kfree(sk);
+ return NULL;
}
void sk_free(struct sock *sk)
@@ -686,6 +704,80 @@ void sk_free(struct sock *sk)
module_put(owner);
}
+struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
+{
+ struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
+
+ if (newsk != NULL) {
+ struct sk_filter *filter;
+
+ memcpy(newsk, sk, sk->sk_prot->obj_size);
+
+ /* SANITY */
+ sk_node_init(&newsk->sk_node);
+ sock_lock_init(newsk);
+ bh_lock_sock(newsk);
+
+ atomic_set(&newsk->sk_rmem_alloc, 0);
+ atomic_set(&newsk->sk_wmem_alloc, 0);
+ atomic_set(&newsk->sk_omem_alloc, 0);
+ skb_queue_head_init(&newsk->sk_receive_queue);
+ skb_queue_head_init(&newsk->sk_write_queue);
+
+ rwlock_init(&newsk->sk_dst_lock);
+ rwlock_init(&newsk->sk_callback_lock);
+
+ newsk->sk_dst_cache = NULL;
+ newsk->sk_wmem_queued = 0;
+ newsk->sk_forward_alloc = 0;
+ newsk->sk_send_head = NULL;
+ newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
+ newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+
+ sock_reset_flag(newsk, SOCK_DONE);
+ skb_queue_head_init(&newsk->sk_error_queue);
+
+ filter = newsk->sk_filter;
+ if (filter != NULL)
+ sk_filter_charge(newsk, filter);
+
+ if (unlikely(xfrm_sk_clone_policy(newsk))) {
+ /* It is still raw copy of parent, so invalidate
+ * destructor and make plain sk_free() */
+ newsk->sk_destruct = NULL;
+ sk_free(newsk);
+ newsk = NULL;
+ goto out;
+ }
+
+ newsk->sk_err = 0;
+ newsk->sk_priority = 0;
+ atomic_set(&newsk->sk_refcnt, 2);
+
+ /*
+ * Increment the counter in the same struct proto as the master
+ * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
+ * is the same as sk->sk_prot->socks, as this field was copied
+ * with memcpy).
+ *
+ * This _changes_ the previous behaviour, where
+ * tcp_create_openreq_child always was incrementing the
+ * equivalent to tcp_prot->socks (inet_sock_nr), so this have
+ * to be taken into account in all callers. -acme
+ */
+ sk_refcnt_debug_inc(newsk);
+ newsk->sk_socket = NULL;
+ newsk->sk_sleep = NULL;
+
+ if (newsk->sk_prot->sockets_allocated)
+ atomic_inc(newsk->sk_prot->sockets_allocated);
+ }
+out:
+ return newsk;
+}
+
+EXPORT_SYMBOL_GPL(sk_clone);
+
void __init sk_init(void)
{
if (num_physpages <= 4096) {
@@ -753,7 +845,7 @@ unsigned long sock_i_ino(struct sock *sk)
* Allocate a skb from the socket's send buffer.
*/
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
- unsigned int __nocast priority)
+ gfp_t priority)
{
if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
struct sk_buff * skb = alloc_skb(size, priority);
@@ -769,7 +861,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
* Allocate a skb from the socket's receive buffer.
*/
struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
- unsigned int __nocast priority)
+ gfp_t priority)
{
if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
struct sk_buff *skb = alloc_skb(size, priority);
@@ -784,7 +876,7 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
/*
* Allocate a memory block from the socket's option memory buffer.
*/
-void *sock_kmalloc(struct sock *sk, int size, unsigned int __nocast priority)
+void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
{
if ((unsigned)size <= sysctl_optmem_max &&
atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
@@ -848,7 +940,7 @@ static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
int noblock, int *errcode)
{
struct sk_buff *skb;
- unsigned int gfp_mask;
+ gfp_t gfp_mask;
long timeo;
int err;
@@ -1353,11 +1445,7 @@ void sk_common_release(struct sock *sk)
xfrm_sk_free_policy(sk);
-#ifdef INET_REFCNT_DEBUG
- if (atomic_read(&sk->sk_refcnt) != 1)
- printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n",
- sk, atomic_read(&sk->sk_refcnt));
-#endif
+ sk_refcnt_debug_release(sk);
sock_put(sk);
}
@@ -1368,7 +1456,8 @@ static LIST_HEAD(proto_list);
int proto_register(struct proto *prot, int alloc_slab)
{
- char *request_sock_slab_name;
+ char *request_sock_slab_name = NULL;
+ char *timewait_sock_slab_name;
int rc = -ENOBUFS;
if (alloc_slab) {
@@ -1399,6 +1488,23 @@ int proto_register(struct proto *prot, int alloc_slab)
goto out_free_request_sock_slab_name;
}
}
+
+ if (prot->twsk_obj_size) {
+ static const char mask[] = "tw_sock_%s";
+
+ timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
+
+ if (timewait_sock_slab_name == NULL)
+ goto out_free_request_sock_slab;
+
+ sprintf(timewait_sock_slab_name, mask, prot->name);
+ prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name,
+ prot->twsk_obj_size,
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (prot->twsk_slab == NULL)
+ goto out_free_timewait_sock_slab_name;
+ }
}
write_lock(&proto_list_lock);
@@ -1407,6 +1513,13 @@ int proto_register(struct proto *prot, int alloc_slab)
rc = 0;
out:
return rc;
+out_free_timewait_sock_slab_name:
+ kfree(timewait_sock_slab_name);
+out_free_request_sock_slab:
+ if (prot->rsk_prot && prot->rsk_prot->slab) {
+ kmem_cache_destroy(prot->rsk_prot->slab);
+ prot->rsk_prot->slab = NULL;
+ }
out_free_request_sock_slab_name:
kfree(request_sock_slab_name);
out_free_sock_slab:
@@ -1420,6 +1533,8 @@ EXPORT_SYMBOL(proto_register);
void proto_unregister(struct proto *prot)
{
write_lock(&proto_list_lock);
+ list_del(&prot->node);
+ write_unlock(&proto_list_lock);
if (prot->slab != NULL) {
kmem_cache_destroy(prot->slab);
@@ -1434,8 +1549,13 @@ void proto_unregister(struct proto *prot)
prot->rsk_prot->slab = NULL;
}
- list_del(&prot->node);
- write_unlock(&proto_list_lock);
+ if (prot->twsk_slab != NULL) {
+ const char *name = kmem_cache_name(prot->twsk_slab);
+
+ kmem_cache_destroy(prot->twsk_slab);
+ kfree(name);
+ prot->twsk_slab = NULL;
+ }
}
EXPORT_SYMBOL(proto_unregister);
@@ -1602,8 +1722,8 @@ EXPORT_SYMBOL(sock_wfree);
EXPORT_SYMBOL(sock_wmalloc);
EXPORT_SYMBOL(sock_i_uid);
EXPORT_SYMBOL(sock_i_ino);
-#ifdef CONFIG_SYSCTL
EXPORT_SYMBOL(sysctl_optmem_max);
+#ifdef CONFIG_SYSCTL
EXPORT_SYMBOL(sysctl_rmem_max);
EXPORT_SYMBOL(sysctl_wmem_max);
#endif
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 8f817ad9f546..2f278c8e4743 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -9,23 +9,18 @@
#include <linux/sysctl.h>
#include <linux/config.h>
#include <linux/module.h>
+#include <linux/socket.h>
+#include <net/sock.h>
#ifdef CONFIG_SYSCTL
extern int netdev_max_backlog;
-extern int netdev_budget;
extern int weight_p;
-extern int net_msg_cost;
-extern int net_msg_burst;
extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
-extern __u32 sysctl_wmem_default;
-extern __u32 sysctl_rmem_default;
extern int sysctl_core_destroy_delay;
-extern int sysctl_optmem_max;
-extern int sysctl_somaxconn;
#ifdef CONFIG_NET_DIVERT
extern char sysctl_divert_version[];
diff --git a/net/core/utils.c b/net/core/utils.c
index 88eb8b68e26b..7b5970fc9e40 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -16,7 +16,9 @@
#include <linux/module.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
+#include <linux/inet.h>
#include <linux/mm.h>
+#include <linux/net.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/random.h>
diff --git a/net/core/wireless.c b/net/core/wireless.c
index 3ff5639c0b78..271ddb35b0b2 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -58,6 +58,13 @@
* o Add wmb() in iw_handler_set_spy() for non-coherent archs/cpus
* Based on patch from Pavel Roskin <proski@gnu.org> :
* o Fix kernel data leak to user space in private handler handling
+ *
+ * v7 - 18.3.05 - Jean II
+ * o Remove (struct iw_point *)->pointer from events and streams
+ * o Remove spy_offset from struct iw_handler_def
+ * o Start deprecating dev->get_wireless_stats, output a warning
+ * o If IW_QUAL_DBM is set, show dBm values in /proc/net/wireless
+ * o Don't loose INVALID/DBM flags when clearing UPDATED flags (iwstats)
*/
/***************************** INCLUDES *****************************/
@@ -446,10 +453,19 @@ static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
(dev->wireless_handlers->get_wireless_stats != NULL))
return dev->wireless_handlers->get_wireless_stats(dev);
- /* Old location, will be phased out in next WE */
- return (dev->get_wireless_stats ?
- dev->get_wireless_stats(dev) :
- (struct iw_statistics *) NULL);
+ /* Old location, field to be removed in next WE */
+ if(dev->get_wireless_stats) {
+ static int printed_message;
+
+ if (!printed_message++)
+ printk(KERN_DEBUG "%s (WE) : Driver using old /proc/net/wireless support, please fix driver !\n",
+ dev->name);
+
+ return dev->get_wireless_stats(dev);
+ }
+
+ /* Not found */
+ return (struct iw_statistics *) NULL;
}
/* ---------------------------------------------------------------- */
@@ -541,16 +557,18 @@ static __inline__ void wireless_seq_printf_stats(struct seq_file *seq,
dev->name, stats->status, stats->qual.qual,
stats->qual.updated & IW_QUAL_QUAL_UPDATED
? '.' : ' ',
- ((__u8) stats->qual.level),
+ ((__s32) stats->qual.level) -
+ ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0),
stats->qual.updated & IW_QUAL_LEVEL_UPDATED
? '.' : ' ',
- ((__u8) stats->qual.noise),
+ ((__s32) stats->qual.noise) -
+ ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0),
stats->qual.updated & IW_QUAL_NOISE_UPDATED
? '.' : ' ',
stats->discard.nwid, stats->discard.code,
stats->discard.fragment, stats->discard.retries,
stats->discard.misc, stats->miss.beacon);
- stats->qual.updated = 0;
+ stats->qual.updated &= ~IW_QUAL_ALL_UPDATED;
}
}
@@ -571,10 +589,6 @@ static int wireless_seq_show(struct seq_file *seq, void *v)
return 0;
}
-extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
-extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
-extern void dev_seq_stop(struct seq_file *seq, void *v);
-
static struct seq_operations wireless_seq_ops = {
.start = dev_seq_start,
.next = dev_seq_next,
@@ -597,6 +611,7 @@ static struct file_operations wireless_seq_fops = {
int __init wireless_proc_init(void)
{
+ /* Create /proc/net/wireless entry */
if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops))
return -ENOMEM;
@@ -631,9 +646,9 @@ static inline int dev_iwstats(struct net_device *dev, struct ifreq *ifr)
sizeof(struct iw_statistics)))
return -EFAULT;
- /* Check if we need to clear the update flag */
+ /* Check if we need to clear the updated flag */
if(wrq->u.data.flags != 0)
- stats->qual.updated = 0;
+ stats->qual.updated &= ~IW_QUAL_ALL_UPDATED;
return 0;
} else
return -EOPNOTSUPP;
@@ -1144,8 +1159,8 @@ static inline void rtmsg_iwinfo(struct net_device * dev,
kfree_skb(skb);
return;
}
- NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
- netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_ATOMIC);
+ NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
+ netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC);
}
#endif /* WE_EVENT_NETLINK */
@@ -1165,10 +1180,11 @@ void wireless_send_event(struct net_device * dev,
struct iw_event *event; /* Mallocated whole event */
int event_len; /* Its size */
int hdr_len; /* Size of the event header */
+ int wrqu_off = 0; /* Offset in wrqu */
/* Don't "optimise" the following variable, it will crash */
unsigned cmd_index; /* *MUST* be unsigned */
- /* Get the description of the IOCTL */
+ /* Get the description of the Event */
if(cmd <= SIOCIWLAST) {
cmd_index = cmd - SIOCIWFIRST;
if(cmd_index < standard_ioctl_num)
@@ -1211,6 +1227,8 @@ void wireless_send_event(struct net_device * dev,
/* Calculate extra_len - extra is NULL for restricted events */
if(extra != NULL)
extra_len = wrqu->data.length * descr->token_size;
+ /* Always at an offset in wrqu */
+ wrqu_off = IW_EV_POINT_OFF;
#ifdef WE_EVENT_DEBUG
printk(KERN_DEBUG "%s (WE) : Event 0x%04X, tokens %d, extra_len %d\n", dev->name, cmd, wrqu->data.length, extra_len);
#endif /* WE_EVENT_DEBUG */
@@ -1221,7 +1239,7 @@ void wireless_send_event(struct net_device * dev,
event_len = hdr_len + extra_len;
#ifdef WE_EVENT_DEBUG
- printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, event_len %d\n", dev->name, cmd, hdr_len, event_len);
+ printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, wrqu_off %d, event_len %d\n", dev->name, cmd, hdr_len, wrqu_off, event_len);
#endif /* WE_EVENT_DEBUG */
/* Create temporary buffer to hold the event */
@@ -1232,7 +1250,7 @@ void wireless_send_event(struct net_device * dev,
/* Fill event */
event->len = event_len;
event->cmd = cmd;
- memcpy(&event->u, wrqu, hdr_len - IW_EV_LCP_LEN);
+ memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN);
if(extra != NULL)
memcpy(((char *) event) + hdr_len, extra, extra_len);
@@ -1253,7 +1271,7 @@ void wireless_send_event(struct net_device * dev,
* Now, the driver can delegate this task to Wireless Extensions.
* It needs to use those standard spy iw_handler in struct iw_handler_def,
* push data to us via wireless_spy_update() and include struct iw_spy_data
- * in its private part (and advertise it in iw_handler_def->spy_offset).
+ * in its private part (and export it in net_device->wireless_data->spy_data).
* One of the main advantage of centralising spy support here is that
* it becomes much easier to improve and extend it without having to touch
* the drivers. One example is the addition of the Spy-Threshold events.
@@ -1270,10 +1288,7 @@ static inline struct iw_spy_data * get_spydata(struct net_device *dev)
/* This is the new way */
if(dev->wireless_data)
return(dev->wireless_data->spy_data);
-
- /* This is the old way. Doesn't work for multi-headed drivers.
- * It will be removed in the next version of WE. */
- return (dev->priv + dev->wireless_handlers->spy_offset);
+ return NULL;
}
/*------------------------------------------------------------------*/
@@ -1288,10 +1303,6 @@ int iw_handler_set_spy(struct net_device * dev,
struct iw_spy_data * spydata = get_spydata(dev);
struct sockaddr * address = (struct sockaddr *) extra;
- if(!dev->wireless_data)
- /* Help user know that driver needs updating */
- printk(KERN_DEBUG "%s (WE) : Driver using old/buggy spy support, please fix driver !\n",
- dev->name);
/* Make sure driver is not buggy or using the old API */
if(!spydata)
return -EOPNOTSUPP;
@@ -1322,7 +1333,7 @@ int iw_handler_set_spy(struct net_device * dev,
sizeof(struct iw_quality) * IW_MAX_SPY);
#ifdef WE_SPY_DEBUG
- printk(KERN_DEBUG "iw_handler_set_spy() : offset %ld, spydata %p, num %d\n", dev->wireless_handlers->spy_offset, spydata, wrqu->data.length);
+ printk(KERN_DEBUG "iw_handler_set_spy() : wireless_data %p, spydata %p, num %d\n", dev->wireless_data, spydata, wrqu->data.length);
for (i = 0; i < wrqu->data.length; i++)
printk(KERN_DEBUG
"%02X:%02X:%02X:%02X:%02X:%02X \n",
@@ -1375,7 +1386,7 @@ int iw_handler_get_spy(struct net_device * dev,
sizeof(struct iw_quality) * spydata->spy_number);
/* Reset updated flags. */
for(i = 0; i < spydata->spy_number; i++)
- spydata->spy_stat[i].updated = 0;
+ spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED;
return 0;
}
@@ -1490,7 +1501,7 @@ void wireless_spy_update(struct net_device * dev,
return;
#ifdef WE_SPY_DEBUG
- printk(KERN_DEBUG "wireless_spy_update() : offset %ld, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_handlers->spy_offset, spydata, address[0], address[1], address[2], address[3], address[4], address[5]);
+ printk(KERN_DEBUG "wireless_spy_update() : wireless_data %p, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_data, spydata, address[0], address[1], address[2], address[3], address[4], address[5]);
#endif /* WE_SPY_DEBUG */
/* Update all records that match */