From 274038f8c94c493e2977983e2aecb5f5f0778479 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 11 Jun 2013 14:41:24 +0400 Subject: tun: Report "persist" flag to userspace The TUN_PERSIST flag is not reported at all -- both TUNGETIFF, and sysfs "flags" attribute skip one. Knowing whether a device is persistent or not is critical for checkpoint-restore, thus I propose to add the read-only IFF_PERSIST one for this. Setting this new IFF_PERSIST is hardly possible, as TUNSETIFF doesn't check for unknown flags being zero and thus there can be trash. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- drivers/net/tun.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 89776c592151..b90a73165d30 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1530,6 +1530,9 @@ static int tun_flags(struct tun_struct *tun) if (tun->flags & TUN_TAP_MQ) flags |= IFF_MULTI_QUEUE; + if (tun->flags & TUN_PERSIST) + flags |= IFF_PERSIST; + return flags; } -- cgit v1.2.3 From 944a1376b6ca1f00cc67f7d319afd55d5d9afba5 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 11 Jun 2013 17:01:08 +0400 Subject: tun: Turn tun_flow_init() into void fn This routine doesn't fail since 9fdc6bef (tuntap: dont use a private kmem_cache) so it makes sense to compact the code a little bit. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- drivers/net/tun.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b90a73165d30..a34427065460 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -841,7 +841,7 @@ static const struct net_device_ops tap_netdev_ops = { #endif }; -static int tun_flow_init(struct tun_struct *tun) +static void tun_flow_init(struct tun_struct *tun) { int i; @@ -852,8 +852,6 @@ static int tun_flow_init(struct tun_struct *tun) setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun); mod_timer(&tun->flow_gc_timer, round_jiffies_up(jiffies + tun->ageing_time)); - - return 0; } static void tun_flow_uninit(struct tun_struct *tun) @@ -1662,10 +1660,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) goto err_free_dev; tun_net_init(dev); - - err = tun_flow_init(tun); - if (err < 0) - goto err_free_dev; + tun_flow_init(tun); dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | TUN_USER_FEATURES; -- cgit v1.2.3 From 3dd5c3308e8b671e8e8882ba972f51cefbe9fd0d Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 10 Jul 2013 13:43:27 +0800 Subject: tuntap: correctly linearize skb when zerocopy is used Userspace may produce vectors greater than MAX_SKB_FRAGS. When we try to linearize parts of the skb to let the rest of iov to be fit in the frags, we need count copylen into linear when calling tun_alloc_skb() instead of partly counting it into data_len. Since this breaks zerocopy_sg_from_iovec() since its inner counter assumes nr_frags should be zero at beginning. This cause nr_frags to be increased wrongly without setting the correct frags. This bug were introduced from 0690899b4d4501b3505be069b9a687e68ccbe15b (tun: experimental zero copy tx support) Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/tun.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 7eab5fcd064f..5cdcf92eb310 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1042,7 +1042,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, { struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; struct sk_buff *skb; - size_t len = total_len, align = NET_SKB_PAD; + size_t len = total_len, align = NET_SKB_PAD, linear; struct virtio_net_hdr gso = { 0 }; int offset = 0; int copylen; @@ -1106,10 +1106,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, copylen = gso.hdr_len; if (!copylen) copylen = GOODCOPY_LEN; - } else + linear = copylen; + } else { copylen = len; + linear = gso.hdr_len; + } - skb = tun_alloc_skb(tfile, align, copylen, gso.hdr_len, noblock); + skb = tun_alloc_skb(tfile, align, copylen, linear, noblock); if (IS_ERR(skb)) { if (PTR_ERR(skb) != -EAGAIN) tun->dev->stats.rx_dropped++; -- cgit v1.2.3 From 885291761dba2bfe04df4c0f7bb75e4c920ab82e Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 18 Jul 2013 10:55:15 +0800 Subject: tuntap: do not zerocopy if iov needs more pages than MAX_SKB_FRAGS We try to linearize part of the skb when the number of iov is greater than MAX_SKB_FRAGS. This is not enough since each single vector may occupy more than one pages, so zerocopy_sg_fromiovec() may still fail and may break the guest network. Solve this problem by calculate the pages needed for iov before trying to do zerocopy and switch to use copy instead of zerocopy if it needs more than MAX_SKB_FRAGS. This is done through introducing a new helper to count the pages for iov, and call uarg->callback() manually when switching from zerocopy to copy to notify vhost. We can do further optimization on top. The bug were introduced from commit 0690899b4d4501b3505be069b9a687e68ccbe15b (tun: experimental zero copy tx support) Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 62 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 24 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 5cdcf92eb310..db690a372260 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1035,6 +1035,29 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, return 0; } +static unsigned long iov_pages(const struct iovec *iv, int offset, + unsigned long nr_segs) +{ + unsigned long seg, base; + int pages = 0, len, size; + + while (nr_segs && (offset >= iv->iov_len)) { + offset -= iv->iov_len; + ++iv; + --nr_segs; + } + + for (seg = 0; seg < nr_segs; seg++) { + base = (unsigned long)iv[seg].iov_base + offset; + len = iv[seg].iov_len - offset; + size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; + pages += size; + offset = 0; + } + + return pages; +} + /* Get packet from user space buffer */ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, void *msg_control, const struct iovec *iv, @@ -1082,32 +1105,18 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, return -EINVAL; } - if (msg_control) - zerocopy = true; - - if (zerocopy) { - /* Userspace may produce vectors with count greater than - * MAX_SKB_FRAGS, so we need to linearize parts of the skb - * to let the rest of data to be fit in the frags. - */ - if (count > MAX_SKB_FRAGS) { - copylen = iov_length(iv, count - MAX_SKB_FRAGS); - if (copylen < offset) - copylen = 0; - else - copylen -= offset; - } else - copylen = 0; - /* There are 256 bytes to be copied in skb, so there is enough - * room for skb expand head in case it is used. + if (msg_control) { + /* There are 256 bytes to be copied in skb, so there is + * enough room for skb expand head in case it is used. * The rest of the buffer is mapped from userspace. */ - if (copylen < gso.hdr_len) - copylen = gso.hdr_len; - if (!copylen) - copylen = GOODCOPY_LEN; + copylen = gso.hdr_len ? gso.hdr_len : GOODCOPY_LEN; linear = copylen; - } else { + if (iov_pages(iv, offset + copylen, count) <= MAX_SKB_FRAGS) + zerocopy = true; + } + + if (!zerocopy) { copylen = len; linear = gso.hdr_len; } @@ -1121,8 +1130,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (zerocopy) err = zerocopy_sg_from_iovec(skb, iv, offset, count); - else + else { err = skb_copy_datagram_from_iovec(skb, 0, iv, offset, len); + if (!err && msg_control) { + struct ubuf_info *uarg = msg_control; + uarg->callback(uarg, false); + } + } if (err) { tun->dev->stats.rx_dropped++; -- cgit v1.2.3