From 351638e7deeed2ec8ce451b53d33921b3da68f83 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 28 May 2013 01:30:21 +0000 Subject: net: pass info struct via netdevice notifier So far, only net_device * could be passed along with netdevice notifier event. This patch provides a possibility to pass custom structure able to provide info that event listener needs to know. Signed-off-by: Jiri Pirko v2->v3: fix typo on simeth shortened dev_getter shortened notifier_info struct name v1->v2: fix notifier_call parameter in call_netdevice_notifier() Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 59e9605de316..68efb91a5633 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -1053,7 +1053,7 @@ EXPORT_SYMBOL_GPL(macvtap_get_socket); static int macvtap_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct macvlan_dev *vlan; struct device *classdev; dev_t devt; -- cgit v1.2.3 From ed0483fa06e0efb86a82e382a00dbad02b62807c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 5 Jun 2013 23:54:33 +0000 Subject: macvtap: fix a possible race between queue selection and changing queues Complier may generate codes that re-read the vlan->numvtaps during macvtap_get_queue(). This may lead a race if vlan->numvtaps were changed in the same time and which can lead unexpected result (e.g. very huge value). We need prevent the compiler from generating such codes by adding an ACCESS_ONCE() to make sure vlan->numvtaps were only read once. Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 68efb91a5633..5e485e307424 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -172,7 +172,7 @@ static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, { struct macvlan_dev *vlan = netdev_priv(dev); struct macvtap_queue *tap = NULL; - int numvtaps = vlan->numvtaps; + int numvtaps = ACCESS_ONCE(vlan->numvtaps); __u32 rxq; if (!numvtaps) -- cgit v1.2.3 From 89cee917deb5bf0dc9da269a38622588d06cb6be Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 5 Jun 2013 23:54:34 +0000 Subject: macvtap: do not add self to waitqueue if doing a nonblock read There's no need to add self to waitqueue if doing a nonblock read. This could help to avoid the spinlock contention. Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 5e485e307424..8949631c080c 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -845,7 +845,9 @@ static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, ssize_t ret = 0; while (len) { - prepare_to_wait(sk_sleep(&q->sk), &wait, TASK_INTERRUPTIBLE); + if (!noblock) + prepare_to_wait(sk_sleep(&q->sk), &wait, + TASK_INTERRUPTIBLE); /* Read frames from the queue */ skb = skb_dequeue(&q->sk.sk_receive_queue); @@ -867,7 +869,8 @@ static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, break; } - finish_wait(sk_sleep(&q->sk), &wait); + if (!noblock) + finish_wait(sk_sleep(&q->sk), &wait); return ret; } -- cgit v1.2.3 From 8f475a318aaa37f0168a8ab99f2faa251e5638de Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 5 Jun 2013 23:54:36 +0000 Subject: macvtap: introduce macvtap_get_vlan() Factor out the device holding logic to a macvtap_get_vlan(), this will be also used by multiqueue API. Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 8949631c080c..d18130b3fe0d 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -893,6 +893,24 @@ out: return ret; } +static struct macvlan_dev *macvtap_get_vlan(struct macvtap_queue *q) +{ + struct macvlan_dev *vlan; + + rcu_read_lock_bh(); + vlan = rcu_dereference_bh(q->vlan); + if (vlan) + dev_hold(vlan->dev); + rcu_read_unlock_bh(); + + return vlan; +} + +static void macvtap_put_vlan(struct macvlan_dev *vlan) +{ + dev_put(vlan->dev); +} + /* * provide compatibility with generic tun/tap interface */ @@ -924,12 +942,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, return ret; case TUNGETIFF: - rcu_read_lock_bh(); - vlan = rcu_dereference_bh(q->vlan); - if (vlan) - dev_hold(vlan->dev); - rcu_read_unlock_bh(); - + vlan = macvtap_get_vlan(q); if (!vlan) return -ENOLINK; @@ -937,7 +950,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) || put_user(q->flags, &ifr->ifr_flags)) ret = -EFAULT; - dev_put(vlan->dev); + macvtap_put_vlan(vlan); return ret; case TUNGETFEATURES: -- cgit v1.2.3 From 376b1aabe1f53aad80615d05ddb8c43670be6a5c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 5 Jun 2013 23:54:38 +0000 Subject: macvtap: eliminate linear search Linear search were used in both get_slot() and macvtap_get_queue(), this is because: - macvtap didn't reshuffle the array of taps when create or destroy a queue, so when adding a new queue, macvtap must do linear search to find a location for the new queue. This will also complicate the TUNSETQUEUE implementation for multiqueue API. - the queue itself didn't track the queue index, so the we must do a linear search in the array to find the location of a existed queue. The solution is straightforward: reshuffle the array and introduce a queue_index to macvtap_queue. Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 64 ++++++++++++++++----------------------------------- 1 file changed, 20 insertions(+), 44 deletions(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index d18130b3fe0d..5ccba99b15f4 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -44,6 +44,7 @@ struct macvtap_queue { struct macvlan_dev __rcu *vlan; struct file *file; unsigned int flags; + u16 queue_index; }; static struct proto macvtap_proto = { @@ -84,30 +85,10 @@ static const struct proto_ops macvtap_socket_ops; */ static DEFINE_SPINLOCK(macvtap_lock); -/* - * get_slot: return a [unused/occupied] slot in vlan->taps[]: - * - if 'q' is NULL, return the first empty slot; - * - otherwise, return the slot this pointer occupies. - */ -static int get_slot(struct macvlan_dev *vlan, struct macvtap_queue *q) -{ - int i; - - for (i = 0; i < MAX_MACVTAP_QUEUES; i++) { - if (rcu_dereference_protected(vlan->taps[i], - lockdep_is_held(&macvtap_lock)) == q) - return i; - } - - /* Should never happen */ - BUG_ON(1); -} - static int macvtap_set_queue(struct net_device *dev, struct file *file, struct macvtap_queue *q) { struct macvlan_dev *vlan = netdev_priv(dev); - int index; int err = -EBUSY; spin_lock(&macvtap_lock); @@ -115,12 +96,12 @@ static int macvtap_set_queue(struct net_device *dev, struct file *file, goto out; err = 0; - index = get_slot(vlan, NULL); rcu_assign_pointer(q->vlan, vlan); - rcu_assign_pointer(vlan->taps[index], q); + rcu_assign_pointer(vlan->taps[vlan->numvtaps], q); sock_hold(&q->sk); q->file = file; + q->queue_index = vlan->numvtaps; file->private_data = q; vlan->numvtaps++; @@ -140,15 +121,22 @@ out: */ static void macvtap_put_queue(struct macvtap_queue *q) { + struct macvtap_queue *nq; struct macvlan_dev *vlan; spin_lock(&macvtap_lock); vlan = rcu_dereference_protected(q->vlan, lockdep_is_held(&macvtap_lock)); if (vlan) { - int index = get_slot(vlan, q); + int index = q->queue_index; + BUG_ON(index >= vlan->numvtaps); + + nq = rcu_dereference_protected(vlan->taps[vlan->numvtaps - 1], + lockdep_is_held(&macvtap_lock)); + rcu_assign_pointer(vlan->taps[index], nq); + nq->queue_index = index; - RCU_INIT_POINTER(vlan->taps[index], NULL); + RCU_INIT_POINTER(vlan->taps[vlan->numvtaps - 1], NULL); RCU_INIT_POINTER(q->vlan, NULL); sock_put(&q->sk); --vlan->numvtaps; @@ -182,8 +170,7 @@ static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, rxq = skb_get_rxhash(skb); if (rxq) { tap = rcu_dereference(vlan->taps[rxq % numvtaps]); - if (tap) - goto out; + goto out; } if (likely(skb_rx_queue_recorded(skb))) { @@ -193,17 +180,10 @@ static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, rxq -= numvtaps; tap = rcu_dereference(vlan->taps[rxq]); - if (tap) - goto out; - } - - /* Everything failed - find first available queue */ - for (rxq = 0; rxq < MAX_MACVTAP_QUEUES; rxq++) { - tap = rcu_dereference(vlan->taps[rxq]); - if (tap) - break; + goto out; } + tap = rcu_dereference(vlan->taps[0]); out: return tap; } @@ -219,19 +199,15 @@ static void macvtap_del_queues(struct net_device *dev) struct macvtap_queue *q, *qlist[MAX_MACVTAP_QUEUES]; int i, j = 0; - /* macvtap_put_queue can free some slots, so go through all slots */ spin_lock(&macvtap_lock); - for (i = 0; i < MAX_MACVTAP_QUEUES && vlan->numvtaps; i++) { + for (i = 0; i < vlan->numvtaps; i++) { q = rcu_dereference_protected(vlan->taps[i], lockdep_is_held(&macvtap_lock)); - if (q) { - qlist[j++] = q; - RCU_INIT_POINTER(vlan->taps[i], NULL); - RCU_INIT_POINTER(q->vlan, NULL); - vlan->numvtaps--; - } + BUG_ON(q == NULL); + qlist[j++] = q; + RCU_INIT_POINTER(vlan->taps[i], NULL); + RCU_INIT_POINTER(q->vlan, NULL); } - BUG_ON(vlan->numvtaps != 0); /* guarantee that any future macvtap_set_queue will fail */ vlan->numvtaps = MAX_MACVTAP_QUEUES; spin_unlock(&macvtap_lock); -- cgit v1.2.3 From 815f236d622721b54f3963ba59dad98b02cdeabf Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 5 Jun 2013 23:54:39 +0000 Subject: macvtap: add TUNSETQUEUE ioctl This patch adds TUNSETQUEUE ioctl to let userspace can temporarily disable or enable a queue of macvtap. This is used to be compatible at API layer of tuntap to simplify the userspace to manage the queues. This is done through introducing a linked list to track all taps while using vlan->taps array to only track active taps. Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 135 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 116 insertions(+), 19 deletions(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 5ccba99b15f4..d2d1d5578b61 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -45,6 +45,8 @@ struct macvtap_queue { struct file *file; unsigned int flags; u16 queue_index; + bool enabled; + struct list_head next; }; static struct proto macvtap_proto = { @@ -85,14 +87,36 @@ static const struct proto_ops macvtap_socket_ops; */ static DEFINE_SPINLOCK(macvtap_lock); -static int macvtap_set_queue(struct net_device *dev, struct file *file, +static int macvtap_enable_queue(struct net_device *dev, struct file *file, struct macvtap_queue *q) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + int err = -EINVAL; + + spin_lock(&macvtap_lock); + + if (q->enabled) + goto out; + + err = 0; + rcu_assign_pointer(vlan->taps[vlan->numvtaps], q); + q->queue_index = vlan->numvtaps; + q->enabled = true; + + vlan->numvtaps++; +out: + spin_unlock(&macvtap_lock); + return err; +} + +static int macvtap_set_queue(struct net_device *dev, struct file *file, + struct macvtap_queue *q) { struct macvlan_dev *vlan = netdev_priv(dev); int err = -EBUSY; spin_lock(&macvtap_lock); - if (vlan->numvtaps == MAX_MACVTAP_QUEUES) + if (vlan->numqueues == MAX_MACVTAP_QUEUES) goto out; err = 0; @@ -102,15 +126,57 @@ static int macvtap_set_queue(struct net_device *dev, struct file *file, q->file = file; q->queue_index = vlan->numvtaps; + q->enabled = true; file->private_data = q; + list_add_tail(&q->next, &vlan->queue_list); vlan->numvtaps++; + vlan->numqueues++; out: spin_unlock(&macvtap_lock); return err; } +static int __macvtap_disable_queue(struct macvtap_queue *q) +{ + struct macvlan_dev *vlan; + struct macvtap_queue *nq; + + vlan = rcu_dereference_protected(q->vlan, + lockdep_is_held(&macvtap_lock)); + + if (!q->enabled) + return -EINVAL; + + if (vlan) { + int index = q->queue_index; + BUG_ON(index >= vlan->numvtaps); + nq = rcu_dereference_protected(vlan->taps[vlan->numvtaps - 1], + lockdep_is_held(&macvtap_lock)); + nq->queue_index = index; + + rcu_assign_pointer(vlan->taps[index], nq); + RCU_INIT_POINTER(vlan->taps[vlan->numvtaps - 1], NULL); + q->enabled = false; + + vlan->numvtaps--; + } + + return 0; +} + +static int macvtap_disable_queue(struct macvtap_queue *q) +{ + int err; + + spin_lock(&macvtap_lock); + err = __macvtap_disable_queue(q); + spin_unlock(&macvtap_lock); + + return err; +} + /* * The file owning the queue got closed, give up both * the reference that the files holds as well as the @@ -121,25 +187,19 @@ out: */ static void macvtap_put_queue(struct macvtap_queue *q) { - struct macvtap_queue *nq; struct macvlan_dev *vlan; spin_lock(&macvtap_lock); vlan = rcu_dereference_protected(q->vlan, lockdep_is_held(&macvtap_lock)); if (vlan) { - int index = q->queue_index; - BUG_ON(index >= vlan->numvtaps); - - nq = rcu_dereference_protected(vlan->taps[vlan->numvtaps - 1], - lockdep_is_held(&macvtap_lock)); - rcu_assign_pointer(vlan->taps[index], nq); - nq->queue_index = index; + if (q->enabled) + BUG_ON(__macvtap_disable_queue(q)); - RCU_INIT_POINTER(vlan->taps[vlan->numvtaps - 1], NULL); + vlan->numqueues--; RCU_INIT_POINTER(q->vlan, NULL); sock_put(&q->sk); - --vlan->numvtaps; + list_del_init(&q->next); } spin_unlock(&macvtap_lock); @@ -160,6 +220,11 @@ static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, { struct macvlan_dev *vlan = netdev_priv(dev); struct macvtap_queue *tap = NULL; + /* Access to taps array is protected by rcu, but access to numvtaps + * isn't. Below we use it to lookup a queue, but treat it as a hint + * and validate that the result isn't NULL - in case we are + * racing against queue removal. + */ int numvtaps = ACCESS_ONCE(vlan->numvtaps); __u32 rxq; @@ -196,18 +261,22 @@ out: static void macvtap_del_queues(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); - struct macvtap_queue *q, *qlist[MAX_MACVTAP_QUEUES]; + struct macvtap_queue *q, *tmp, *qlist[MAX_MACVTAP_QUEUES]; int i, j = 0; spin_lock(&macvtap_lock); - for (i = 0; i < vlan->numvtaps; i++) { - q = rcu_dereference_protected(vlan->taps[i], - lockdep_is_held(&macvtap_lock)); - BUG_ON(q == NULL); + list_for_each_entry_safe(q, tmp, &vlan->queue_list, next) { + list_del_init(&q->next); qlist[j++] = q; - RCU_INIT_POINTER(vlan->taps[i], NULL); RCU_INIT_POINTER(q->vlan, NULL); + if (q->enabled) + vlan->numvtaps--; + vlan->numqueues--; } + for (i = 0; i < vlan->numvtaps; i++) + RCU_INIT_POINTER(vlan->taps[i], NULL); + BUG_ON(vlan->numvtaps); + BUG_ON(vlan->numqueues); /* guarantee that any future macvtap_set_queue will fail */ vlan->numvtaps = MAX_MACVTAP_QUEUES; spin_unlock(&macvtap_lock); @@ -298,6 +367,9 @@ static int macvtap_newlink(struct net *src_net, struct nlattr *tb[], struct nlattr *data[]) { + struct macvlan_dev *vlan = netdev_priv(dev); + INIT_LIST_HEAD(&vlan->queue_list); + /* Don't put anything that may fail after macvlan_common_newlink * because we can't undo what it does. */ @@ -887,6 +959,25 @@ static void macvtap_put_vlan(struct macvlan_dev *vlan) dev_put(vlan->dev); } +static int macvtap_ioctl_set_queue(struct file *file, unsigned int flags) +{ + struct macvtap_queue *q = file->private_data; + struct macvlan_dev *vlan; + int ret; + + vlan = macvtap_get_vlan(q); + if (!vlan) + return -EINVAL; + + if (flags & IFF_ATTACH_QUEUE) + ret = macvtap_enable_queue(vlan->dev, file, q); + else if (flags & IFF_DETACH_QUEUE) + ret = macvtap_disable_queue(q); + + macvtap_put_vlan(vlan); + return ret; +} + /* * provide compatibility with generic tun/tap interface */ @@ -910,7 +1001,8 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, return -EFAULT; ret = 0; - if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP)) + if ((u & ~(IFF_VNET_HDR | IFF_MULTI_QUEUE)) != + (IFF_NO_PI | IFF_TAP)) ret = -EINVAL; else q->flags = u; @@ -929,6 +1021,11 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, macvtap_put_vlan(vlan); return ret; + case TUNSETQUEUE: + if (get_user(u, &ifr->ifr_flags)) + return -EFAULT; + return macvtap_ioctl_set_queue(file, u); + case TUNGETFEATURES: if (put_user(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR, up)) return -EFAULT; -- cgit v1.2.3 From df09b36f2225b84571b3d5eda2e2683412320713 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 5 Jun 2013 23:54:40 +0000 Subject: macvtap: enable multiqueue flag To notify the userspace about our capability of multiqueue. Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index d2d1d5578b61..992151ce08a4 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -31,10 +31,6 @@ * macvtap_proto is used to allocate queues through the sock allocation * mechanism. * - * TODO: multiqueue support is currently not implemented, even though - * macvtap is basically prepared for that. We will need to add this - * here as well as in virtio-net and qemu to get line rate on 10gbit - * adapters from a guest. */ struct macvtap_queue { struct sock sk; @@ -1027,7 +1023,8 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, return macvtap_ioctl_set_queue(file, u); case TUNGETFEATURES: - if (put_user(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR, up)) + if (put_user(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR | + IFF_MULTI_QUEUE, up)) return -EFAULT; return 0; -- cgit v1.2.3 From d9a90a3105eb6ca89aab74223e6526ab4a5e44b5 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 13 Jun 2013 14:23:35 +0800 Subject: macvtap: slient sparse warnings This patch silents the following sparse warnings: drivers/net/macvtap.c:98:9: warning: incorrect type in assignment (different address spaces) drivers/net/macvtap.c:98:9: expected struct macvtap_queue * drivers/net/macvtap.c:98:9: got struct macvtap_queue [noderef] * drivers/net/macvtap.c:120:9: warning: incorrect type in assignment (different address spaces) drivers/net/macvtap.c:120:9: expected struct macvtap_queue * drivers/net/macvtap.c:120:9: got struct macvtap_queue [noderef] * drivers/net/macvtap.c:151:22: error: incompatible types in comparison expression (different address spaces) drivers/net/macvtap.c:233:23: error: incompatible types in comparison expression (different address spaces) drivers/net/macvtap.c:243:23: error: incompatible types in comparison expression (different address spaces) drivers/net/macvtap.c:247:15: error: incompatible types in comparison expression (different address spaces) CC [M] drivers/net/macvtap.o drivers/net/macvlan.c:232:24: error: incompatible types in comparison expression (different address spaces) Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 992151ce08a4..edcbf1c0d6ed 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -429,7 +429,7 @@ static int macvtap_open(struct inode *inode, struct file *file) if (!q) goto out; - q->sock.wq = &q->wq; + RCU_INIT_POINTER(q->sock.wq, &q->wq); init_waitqueue_head(&q->wq.wait); q->sock.type = SOCK_RAW; q->sock.state = SS_CONNECTED; -- cgit v1.2.3 From f57855a54fb1e7529b32821f021d351e01dcfe39 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 13 Jun 2013 14:23:36 +0800 Subject: macvtap: fix uninitialized return value macvtap_ioctl_set_queue() Return -EINVAL on illegal flag instead of uninitialized value. This fixes the kbuild test warning. Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index edcbf1c0d6ed..5a76f20776af 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -969,6 +969,8 @@ static int macvtap_ioctl_set_queue(struct file *file, unsigned int flags) ret = macvtap_enable_queue(vlan->dev, file, q); else if (flags & IFF_DETACH_QUEUE) ret = macvtap_disable_queue(q); + else + ret = -EINVAL; macvtap_put_vlan(vlan); return ret; -- cgit v1.2.3 From 441ac0fcaadc76ad09771812382345001dd2b813 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 25 Jun 2013 16:04:19 -0400 Subject: macvtap: Convert to using rtnl lock Macvtap uses a private lock to protect the relationship between macvtap_queue and macvlan_dev. The private lock is not needed since the relationship is managed by user via open(), release(), and dellink() calls. dellink() already happens under rtnl, so we can safely convert open() and release(), and use it in ioctl() as well. Suggested by Eric Dumazet. Signed-off-by: Vlad Yasevich Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 62 +++++++++++++++++++++------------------------------ 1 file changed, 25 insertions(+), 37 deletions(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 5a76f20776af..efbf2eb6ae78 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -69,7 +69,7 @@ static const struct proto_ops macvtap_socket_ops; * RCU usage: * The macvtap_queue and the macvlan_dev are loosely coupled, the * pointers from one to the other can only be read while rcu_read_lock - * or macvtap_lock is held. + * or rtnl is held. * * Both the file and the macvlan_dev hold a reference on the macvtap_queue * through sock_hold(&q->sk). When the macvlan_dev goes away first, @@ -81,7 +81,6 @@ static const struct proto_ops macvtap_socket_ops; * file or the dev. The data structure is freed through __sk_free * when both our references and any pending SKBs are gone. */ -static DEFINE_SPINLOCK(macvtap_lock); static int macvtap_enable_queue(struct net_device *dev, struct file *file, struct macvtap_queue *q) @@ -89,7 +88,7 @@ static int macvtap_enable_queue(struct net_device *dev, struct file *file, struct macvlan_dev *vlan = netdev_priv(dev); int err = -EINVAL; - spin_lock(&macvtap_lock); + ASSERT_RTNL(); if (q->enabled) goto out; @@ -101,7 +100,6 @@ static int macvtap_enable_queue(struct net_device *dev, struct file *file, vlan->numvtaps++; out: - spin_unlock(&macvtap_lock); return err; } @@ -111,7 +109,7 @@ static int macvtap_set_queue(struct net_device *dev, struct file *file, struct macvlan_dev *vlan = netdev_priv(dev); int err = -EBUSY; - spin_lock(&macvtap_lock); + rtnl_lock(); if (vlan->numqueues == MAX_MACVTAP_QUEUES) goto out; @@ -130,26 +128,25 @@ static int macvtap_set_queue(struct net_device *dev, struct file *file, vlan->numqueues++; out: - spin_unlock(&macvtap_lock); + rtnl_unlock(); return err; } -static int __macvtap_disable_queue(struct macvtap_queue *q) +static int macvtap_disable_queue(struct macvtap_queue *q) { struct macvlan_dev *vlan; struct macvtap_queue *nq; - vlan = rcu_dereference_protected(q->vlan, - lockdep_is_held(&macvtap_lock)); - + ASSERT_RTNL(); if (!q->enabled) return -EINVAL; + vlan = rtnl_dereference(q->vlan); + if (vlan) { int index = q->queue_index; BUG_ON(index >= vlan->numvtaps); - nq = rcu_dereference_protected(vlan->taps[vlan->numvtaps - 1], - lockdep_is_held(&macvtap_lock)); + nq = rtnl_dereference(vlan->taps[vlan->numvtaps - 1]); nq->queue_index = index; rcu_assign_pointer(vlan->taps[index], nq); @@ -162,17 +159,6 @@ static int __macvtap_disable_queue(struct macvtap_queue *q) return 0; } -static int macvtap_disable_queue(struct macvtap_queue *q) -{ - int err; - - spin_lock(&macvtap_lock); - err = __macvtap_disable_queue(q); - spin_unlock(&macvtap_lock); - - return err; -} - /* * The file owning the queue got closed, give up both * the reference that the files holds as well as the @@ -185,12 +171,12 @@ static void macvtap_put_queue(struct macvtap_queue *q) { struct macvlan_dev *vlan; - spin_lock(&macvtap_lock); - vlan = rcu_dereference_protected(q->vlan, - lockdep_is_held(&macvtap_lock)); + rtnl_lock(); + vlan = rtnl_dereference(q->vlan); + if (vlan) { if (q->enabled) - BUG_ON(__macvtap_disable_queue(q)); + BUG_ON(macvtap_disable_queue(q)); vlan->numqueues--; RCU_INIT_POINTER(q->vlan, NULL); @@ -198,7 +184,7 @@ static void macvtap_put_queue(struct macvtap_queue *q) list_del_init(&q->next); } - spin_unlock(&macvtap_lock); + rtnl_unlock(); synchronize_rcu(); sock_put(&q->sk); @@ -260,7 +246,7 @@ static void macvtap_del_queues(struct net_device *dev) struct macvtap_queue *q, *tmp, *qlist[MAX_MACVTAP_QUEUES]; int i, j = 0; - spin_lock(&macvtap_lock); + ASSERT_RTNL(); list_for_each_entry_safe(q, tmp, &vlan->queue_list, next) { list_del_init(&q->next); qlist[j++] = q; @@ -275,9 +261,6 @@ static void macvtap_del_queues(struct net_device *dev) BUG_ON(vlan->numqueues); /* guarantee that any future macvtap_set_queue will fail */ vlan->numvtaps = MAX_MACVTAP_QUEUES; - spin_unlock(&macvtap_lock); - - synchronize_rcu(); for (--j; j >= 0; j--) sock_put(&qlist[j]->sk); @@ -941,11 +924,10 @@ static struct macvlan_dev *macvtap_get_vlan(struct macvtap_queue *q) { struct macvlan_dev *vlan; - rcu_read_lock_bh(); - vlan = rcu_dereference_bh(q->vlan); + ASSERT_RTNL(); + vlan = rtnl_dereference(q->vlan); if (vlan) dev_hold(vlan->dev); - rcu_read_unlock_bh(); return vlan; } @@ -1008,21 +990,27 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, return ret; case TUNGETIFF: + rtnl_lock(); vlan = macvtap_get_vlan(q); - if (!vlan) + if (!vlan) { + rtnl_unlock(); return -ENOLINK; + } ret = 0; if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) || put_user(q->flags, &ifr->ifr_flags)) ret = -EFAULT; macvtap_put_vlan(vlan); + rtnl_unlock(); return ret; case TUNSETQUEUE: if (get_user(u, &ifr->ifr_flags)) return -EFAULT; - return macvtap_ioctl_set_queue(file, u); + rtnl_lock(); + ret = macvtap_ioctl_set_queue(file, u); + rtnl_unlock(); case TUNGETFEATURES: if (put_user(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR | -- cgit v1.2.3 From ac4e4af1e59e16a018527ffa58d9d3f30bb96ca9 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 25 Jun 2013 16:04:20 -0400 Subject: macvtap: Consistently use rcu functions Currently macvtap uses rcu_bh functions in its user facing fuction macvtap_get_user() and macvtap_put_user(). However, its packet handlers use normal rcu as the rcu_read_lock() is taken in netif_receive_skb(). We can safely discontinue the usage or rcu with bh disabled. Signed-off-by: Vlad Yasevich Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index efbf2eb6ae78..d7856a8f589a 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -754,8 +754,8 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, skb_probe_transport_header(skb, ETH_HLEN); - rcu_read_lock_bh(); - vlan = rcu_dereference_bh(q->vlan); + rcu_read_lock(); + vlan = rcu_dereference(q->vlan); /* copy skb_ubuf_info for callback when skb has no error */ if (zerocopy) { skb_shinfo(skb)->destructor_arg = m->msg_control; @@ -766,7 +766,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, macvlan_start_xmit(skb, vlan->dev); else kfree_skb(skb); - rcu_read_unlock_bh(); + rcu_read_unlock(); return total_len; @@ -774,11 +774,11 @@ err_kfree: kfree_skb(skb); err: - rcu_read_lock_bh(); - vlan = rcu_dereference_bh(q->vlan); + rcu_read_lock(); + vlan = rcu_dereference(q->vlan); if (vlan) vlan->dev->stats.tx_dropped++; - rcu_read_unlock_bh(); + rcu_read_unlock(); return err; } @@ -854,11 +854,11 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, copied += len; done: - rcu_read_lock_bh(); - vlan = rcu_dereference_bh(q->vlan); + rcu_read_lock(); + vlan = rcu_dereference(q->vlan); if (vlan) macvlan_count_rx(vlan, copied - vnet_hdr_len, ret == 0, 0); - rcu_read_unlock_bh(); + rcu_read_unlock(); return ret ? ret : copied; } -- cgit v1.2.3 From 2be5c76794b0e570aa87b012df5ac864ce668a74 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 25 Jun 2013 16:04:21 -0400 Subject: macvtap: Let TUNSETOFFLOAD actually controll offload features. When the user issues TUNSETOFFLOAD ioctl, macvtap does not do anything other then to verify arguments. This patch adds functionality to allow users to actually control offload features. NETIF_F_GSO and NETIF_F_GRO are always on, but the rest of the features can be controlled. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index d7856a8f589a..7eab01975ed1 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -65,6 +65,9 @@ static struct cdev macvtap_cdev; static const struct proto_ops macvtap_socket_ops; +#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ + NETIF_F_TSO6 | NETIF_F_UFO) +#define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO) /* * RCU usage: * The macvtap_queue and the macvlan_dev are loosely coupled, the @@ -349,6 +352,11 @@ static int macvtap_newlink(struct net *src_net, struct macvlan_dev *vlan = netdev_priv(dev); INIT_LIST_HEAD(&vlan->queue_list); + /* Since macvlan supports all offloads by default, make + * tap support all offloads also. + */ + vlan->tap_features = TUN_OFFLOADS; + /* Don't put anything that may fail after macvlan_common_newlink * because we can't undo what it does. */ @@ -958,6 +966,58 @@ static int macvtap_ioctl_set_queue(struct file *file, unsigned int flags) return ret; } +static int set_offload(struct macvtap_queue *q, unsigned long arg) +{ + struct macvlan_dev *vlan; + netdev_features_t features; + netdev_features_t feature_mask = 0; + + vlan = rtnl_dereference(q->vlan); + if (!vlan) + return -ENOLINK; + + features = vlan->dev->features; + + if (arg & TUN_F_CSUM) { + feature_mask = NETIF_F_HW_CSUM; + + if (arg & (TUN_F_TSO4 | TUN_F_TSO6)) { + if (arg & TUN_F_TSO_ECN) + feature_mask |= NETIF_F_TSO_ECN; + if (arg & TUN_F_TSO4) + feature_mask |= NETIF_F_TSO; + if (arg & TUN_F_TSO6) + feature_mask |= NETIF_F_TSO6; + } + + if (arg & TUN_F_UFO) + feature_mask |= NETIF_F_UFO; + } + + /* tun/tap driver inverts the usage for TSO offloads, where + * setting the TSO bit means that the userspace wants to + * accept TSO frames and turning it off means that user space + * does not support TSO. + * For macvtap, we have to invert it to mean the same thing. + * When user space turns off TSO, we turn off GSO/LRO so that + * user-space will not receive TSO frames. + */ + if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_UFO)) + features |= RX_OFFLOADS; + else + features &= ~RX_OFFLOADS; + + /* tap_features are the same as features on tun/tap and + * reflect user expectations. + */ + vlan->tap_features = vlan->dev->features & + (feature_mask | ~TUN_OFFLOADS); + vlan->set_features = features; + netdev_update_features(vlan->dev); + + return 0; +} + /* * provide compatibility with generic tun/tap interface */ @@ -1050,7 +1110,10 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, got enabled for forwarded frames */ if (!(q->flags & IFF_VNET_HDR)) return -EINVAL; - return 0; + rtnl_lock(); + ret = set_offload(q, arg); + rtnl_unlock(); + return ret; default: return -EINVAL; -- cgit v1.2.3 From 3e4f8b787370978733ca6cae452720a4f0c296b8 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 25 Jun 2013 16:04:22 -0400 Subject: macvtap: Perform GSO on forwarding path. When macvtap forwards skb to its tap, it needs to check if GSO needs to be performed. This is sometimes necessary when the HW device performed GRO, but the guest reading from the tap does not support it (ex: Windows 7). Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 7eab01975ed1..5bfaecdd2354 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -276,14 +276,44 @@ static void macvtap_del_queues(struct net_device *dev) */ static int macvtap_forward(struct net_device *dev, struct sk_buff *skb) { + struct macvlan_dev *vlan = netdev_priv(dev); struct macvtap_queue *q = macvtap_get_queue(dev, skb); + netdev_features_t features; if (!q) goto drop; if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len) goto drop; - skb_queue_tail(&q->sk.sk_receive_queue, skb); + skb->dev = dev; + /* Apply the forward feature mask so that we perform segmentation + * according to users wishes. + */ + features = netif_skb_features(skb) & vlan->tap_features; + if (netif_needs_gso(skb, features)) { + struct sk_buff *segs = __skb_gso_segment(skb, features, false); + + if (IS_ERR(segs)) + goto drop; + + if (!segs) { + skb_queue_tail(&q->sk.sk_receive_queue, skb); + goto wake_up; + } + + kfree_skb(skb); + while (segs) { + struct sk_buff *nskb = segs->next; + + segs->next = NULL; + skb_queue_tail(&q->sk.sk_receive_queue, segs); + segs = nskb; + } + } else { + skb_queue_tail(&q->sk.sk_receive_queue, skb); + } + +wake_up: wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND); return NET_RX_SUCCESS; -- cgit v1.2.3 From 61d46bf979d5cd7c164709a80ad5676a35494aae Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 10 Jul 2013 13:43:28 +0800 Subject: macvtap: correctly linearize skb when zerocopy is used Userspace may produce vectors greater than MAX_SKB_FRAGS. When we try to linearize parts of the skb to let the rest of iov to be fit in the frags, we need count copylen into linear when calling macvtap_alloc_skb() instead of partly counting it into data_len. Since this breaks zerocopy_sg_from_iovec() since its inner counter assumes nr_frags should be zero at beginning. This cause nr_frags to be increased wrongly without setting the correct frags. This bug were introduced from b92946e2919134ebe2a4083e4302236295ea2a73 (macvtap: zerocopy: validate vectors before building skb). Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index f2c4a3b218fc..876c72246ae9 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -712,6 +712,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, int vnet_hdr_len = 0; int copylen = 0; bool zerocopy = false; + size_t linear; if (q->flags & IFF_VNET_HDR) { vnet_hdr_len = q->vnet_hdr_sz; @@ -766,11 +767,14 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, copylen = vnet_hdr.hdr_len; if (!copylen) copylen = GOODCOPY_LEN; - } else + linear = copylen; + } else { copylen = len; + linear = vnet_hdr.hdr_len; + } skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen, - vnet_hdr.hdr_len, noblock, &err); + linear, noblock, &err); if (!skb) goto err; -- cgit v1.2.3 From 82a19eb8c02ab98bfe0bf6fa4915de370acb2858 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 16 Jul 2013 13:36:33 +0800 Subject: macvtap: fix the missing ret value of TUNSETQUEUE Commit 441ac0fcaadc76ad09771812382345001dd2b813 (macvtap: Convert to using rtnl lock) forget to return what macvtap_ioctl_set_queue() returns to its caller. This may break multiqueue API by always falling through to TUNGETFEATURES. Cc: Vlad Yasevich Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 876c72246ae9..0e5492ec753a 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -1107,6 +1107,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, rtnl_lock(); ret = macvtap_ioctl_set_queue(file, u); rtnl_unlock(); + return ret; case TUNGETFEATURES: if (put_user(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR | -- cgit v1.2.3 From 0fbe0d47b1ee2015c288abd4e7b32224f8bfa212 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 16 Jul 2013 13:36:34 +0800 Subject: macvtap: do not assume 802.1Q when send vlan packets The hard-coded 8021.q proto will break 802.1ad traffic. So switch to use vlan->proto. Cc: Basil Gor Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 0e5492ec753a..a7c5654a2e5c 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -873,7 +873,7 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, __be16 h_vlan_proto; __be16 h_vlan_TCI; } veth; - veth.h_vlan_proto = htons(ETH_P_8021Q); + veth.h_vlan_proto = skb->vlan_proto; veth.h_vlan_TCI = htons(vlan_tx_tag_get(skb)); vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto); -- cgit v1.2.3 From ece793fcfc417b3925844be88a6a6dc82ae8f7c6 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 18 Jul 2013 10:55:16 +0800 Subject: macvtap: do not zerocopy if iov needs more pages than MAX_SKB_FRAGS We try to linearize part of the skb when the number of iov is greater than MAX_SKB_FRAGS. This is not enough since each single vector may occupy more than one pages, so zerocopy_sg_fromiovec() may still fail and may break the guest network. Solve this problem by calculate the pages needed for iov before trying to do zerocopy and switch to use copy instead of zerocopy if it needs more than MAX_SKB_FRAGS. This is done through introducing a new helper to count the pages for iov, and call uarg->callback() manually when switching from zerocopy to copy to notify vhost. We can do further optimization on top. This bug were introduced from b92946e2919134ebe2a4083e4302236295ea2a73 (macvtap: zerocopy: validate vectors before building skb). Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 62 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 25 deletions(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index a7c5654a2e5c..a98fb0ed6aef 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -698,6 +698,28 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, return 0; } +static unsigned long iov_pages(const struct iovec *iv, int offset, + unsigned long nr_segs) +{ + unsigned long seg, base; + int pages = 0, len, size; + + while (nr_segs && (offset >= iv->iov_len)) { + offset -= iv->iov_len; + ++iv; + --nr_segs; + } + + for (seg = 0; seg < nr_segs; seg++) { + base = (unsigned long)iv[seg].iov_base + offset; + len = iv[seg].iov_len - offset; + size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; + pages += size; + offset = 0; + } + + return pages; +} /* Get packet from user space buffer */ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, @@ -744,31 +766,15 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (unlikely(count > UIO_MAXIOV)) goto err; - if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) - zerocopy = true; - - if (zerocopy) { - /* Userspace may produce vectors with count greater than - * MAX_SKB_FRAGS, so we need to linearize parts of the skb - * to let the rest of data to be fit in the frags. - */ - if (count > MAX_SKB_FRAGS) { - copylen = iov_length(iv, count - MAX_SKB_FRAGS); - if (copylen < vnet_hdr_len) - copylen = 0; - else - copylen -= vnet_hdr_len; - } - /* There are 256 bytes to be copied in skb, so there is enough - * room for skb expand head in case it is used. - * The rest buffer is mapped from userspace. - */ - if (copylen < vnet_hdr.hdr_len) - copylen = vnet_hdr.hdr_len; - if (!copylen) - copylen = GOODCOPY_LEN; + if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { + copylen = vnet_hdr.hdr_len ? vnet_hdr.hdr_len : GOODCOPY_LEN; linear = copylen; - } else { + if (iov_pages(iv, vnet_hdr_len + copylen, count) + <= MAX_SKB_FRAGS) + zerocopy = true; + } + + if (!zerocopy) { copylen = len; linear = vnet_hdr.hdr_len; } @@ -780,9 +786,15 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (zerocopy) err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count); - else + else { err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, len); + if (!err && m && m->msg_control) { + struct ubuf_info *uarg = m->msg_control; + uarg->callback(uarg, false); + } + } + if (err) goto err_kfree; -- cgit v1.2.3