From e174961ca1a0b28f7abf0be47973ad57cb74e5f0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 27 Oct 2008 15:59:26 -0700 Subject: net: convert print_mac to %pM This converts pretty much everything to print_mac. There were a few things that had conflicts which I have just dropped for now, no harm done. I've built an allyesconfig with this and looked at the files that weren't built very carefully, but it's a huge patch. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0196a0df9021..68f908a57ac3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -325,9 +325,7 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) sg_init_table(sg, 2+MAX_SKB_FRAGS); - pr_debug("%s: xmit %p " MAC_FMT "\n", vi->dev->name, skb, - dest[0], dest[1], dest[2], - dest[3], dest[4], dest[5]); + pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); /* Encode metadata header at front. */ hdr = skb_vnet_hdr(skb); -- cgit v1.2.3 From 8f15ea42b64941001a401cf855a0869e24f3a845 Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Wed, 12 Nov 2008 23:38:36 -0800 Subject: netdevice: safe convert to netdev_priv() #part-3 We have some reasons to kill netdev->priv: 1. netdev->priv is equal to netdev_priv(). 2. netdev_priv() wraps the calculation of netdev->priv's offset, obviously netdev_priv() is more flexible than netdev->priv. But we cann't kill netdev->priv, because so many drivers reference to it directly. This patch is a safe convert for netdev->priv to netdev_priv(netdev). Since all of the netdev->priv is only for read. But it is too big to be sent in one mail. I split it to 4 parts and make every part smaller than 100,000 bytes, which is max size allowed by vger. Signed-off-by: Wang Chen Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 68f908a57ac3..ede0a80045b3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -125,7 +125,8 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, unsigned int i; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - give_a_page(dev->priv, skb_shinfo(skb)->frags[i].page); + give_a_page(netdev_priv(dev), + skb_shinfo(skb)->frags[i].page); skb->data_len = 0; skb_shinfo(skb)->nr_frags = 0; } -- cgit v1.2.3 From 0a888fd1f6320d1d9318c58de9bca3cef41546d6 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Sun, 16 Nov 2008 22:39:18 -0800 Subject: virtio_net: Recycle some more rx buffer pages Each time we re-fill the recv queue with buffers, we allocate one too many skbs and free it again when adding fails. We should recycle the pages allocated in this case. A previous version of this patch made trim_pages() trim trailing unused pages from skbs with some paged data, but this actually caused a barely measurable slowdown. Signed-off-by: Mark McLoughlin Signed-off-by: Rusty Russell (use netdev_priv) Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ede0a80045b3..f5207ef21a91 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -82,6 +82,16 @@ static void give_a_page(struct virtnet_info *vi, struct page *page) vi->pages = page; } +static void trim_pages(struct virtnet_info *vi, struct sk_buff *skb) +{ + unsigned int i; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + give_a_page(vi, skb_shinfo(skb)->frags[i].page); + skb_shinfo(skb)->nr_frags = 0; + skb->data_len = 0; +} + static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask) { struct page *p = vi->pages; @@ -121,15 +131,8 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, } len -= sizeof(struct virtio_net_hdr); - if (len <= MAX_PACKET_LEN) { - unsigned int i; - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - give_a_page(netdev_priv(dev), - skb_shinfo(skb)->frags[i].page); - skb->data_len = 0; - skb_shinfo(skb)->nr_frags = 0; - } + if (len <= MAX_PACKET_LEN) + trim_pages(netdev_priv(dev), skb); err = pskb_trim(skb, len); if (err) { @@ -233,6 +236,7 @@ static void try_fill_recv(struct virtnet_info *vi) err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb); if (err) { skb_unlink(skb, &vi->recv); + trim_pages(vi, skb); kfree_skb(skb); break; } -- cgit v1.2.3 From 0276b4972e932ea8bf2941dcd37e9caac5652ed7 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Sun, 16 Nov 2008 22:40:36 -0800 Subject: virtio_net: hook up the set-tso ethtool op Seems like an oversight that we have set-tx-csum and set-sg hooked up, but not set-tso. Also leads to the strange situation that if you e.g. disable tx-csum, then tso doesn't get disabled. Signed-off-by: Mark McLoughlin Signed-off-by: Rusty Russell Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f5207ef21a91..27559c987d47 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -481,6 +481,7 @@ static int virtnet_set_tx_csum(struct net_device *dev, u32 data) static struct ethtool_ops virtnet_ethtool_ops = { .set_tx_csum = virtnet_set_tx_csum, .set_sg = ethtool_op_set_sg, + .set_tso = ethtool_op_set_tso, }; static int virtnet_probe(struct virtio_device *vdev) -- cgit v1.2.3 From 3f2c31d90327f21d76d296af34aa4ca547932ff4 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Sun, 16 Nov 2008 22:41:34 -0800 Subject: virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin Signed-off-by: Rusty Russell (use netdev_priv) Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 173 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 153 insertions(+), 20 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 27559c987d47..e6b5d6ef9ea8 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -34,6 +34,7 @@ module_param(gso, bool, 0444); /* FIXME: MTU in config. */ #define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN) +#define GOOD_COPY_LEN 128 struct virtnet_info { @@ -58,6 +59,9 @@ struct virtnet_info /* I like... big packets and I cannot lie! */ bool big_packets; + /* Host will merge rx buffers for big packets (shake it! shake it!) */ + bool mergeable_rx_bufs; + /* Receive & send queues. */ struct sk_buff_head recv; struct sk_buff_head send; @@ -66,16 +70,11 @@ struct virtnet_info struct page *pages; }; -static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb) +static inline void *skb_vnet_hdr(struct sk_buff *skb) { return (struct virtio_net_hdr *)skb->cb; } -static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb) -{ - sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr)); -} - static void give_a_page(struct virtnet_info *vi, struct page *page) { page->private = (unsigned long)vi->pages; @@ -121,25 +120,97 @@ static void skb_xmit_done(struct virtqueue *svq) static void receive_skb(struct net_device *dev, struct sk_buff *skb, unsigned len) { + struct virtnet_info *vi = netdev_priv(dev); struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); int err; + int i; if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); dev->stats.rx_length_errors++; goto drop; } - len -= sizeof(struct virtio_net_hdr); - if (len <= MAX_PACKET_LEN) - trim_pages(netdev_priv(dev), skb); + if (vi->mergeable_rx_bufs) { + struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb); + unsigned int copy; + char *p = page_address(skb_shinfo(skb)->frags[0].page); - err = pskb_trim(skb, len); - if (err) { - pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err); - dev->stats.rx_dropped++; - goto drop; + if (len > PAGE_SIZE) + len = PAGE_SIZE; + len -= sizeof(struct virtio_net_hdr_mrg_rxbuf); + + memcpy(hdr, p, sizeof(*mhdr)); + p += sizeof(*mhdr); + + copy = len; + if (copy > skb_tailroom(skb)) + copy = skb_tailroom(skb); + + memcpy(skb_put(skb, copy), p, copy); + + len -= copy; + + if (!len) { + give_a_page(vi, skb_shinfo(skb)->frags[0].page); + skb_shinfo(skb)->nr_frags--; + } else { + skb_shinfo(skb)->frags[0].page_offset += + sizeof(*mhdr) + copy; + skb_shinfo(skb)->frags[0].size = len; + skb->data_len += len; + skb->len += len; + } + + while (--mhdr->num_buffers) { + struct sk_buff *nskb; + + i = skb_shinfo(skb)->nr_frags; + if (i >= MAX_SKB_FRAGS) { + pr_debug("%s: packet too long %d\n", dev->name, + len); + dev->stats.rx_length_errors++; + goto drop; + } + + nskb = vi->rvq->vq_ops->get_buf(vi->rvq, &len); + if (!nskb) { + pr_debug("%s: rx error: %d buffers missing\n", + dev->name, mhdr->num_buffers); + dev->stats.rx_length_errors++; + goto drop; + } + + __skb_unlink(nskb, &vi->recv); + vi->num--; + + skb_shinfo(skb)->frags[i] = skb_shinfo(nskb)->frags[0]; + skb_shinfo(nskb)->nr_frags = 0; + kfree_skb(nskb); + + if (len > PAGE_SIZE) + len = PAGE_SIZE; + + skb_shinfo(skb)->frags[i].size = len; + skb_shinfo(skb)->nr_frags++; + skb->data_len += len; + skb->len += len; + } + } else { + len -= sizeof(struct virtio_net_hdr); + + if (len <= MAX_PACKET_LEN) + trim_pages(vi, skb); + + err = pskb_trim(skb, len); + if (err) { + pr_debug("%s: pskb_trim failed %i %d\n", dev->name, + len, err); + dev->stats.rx_dropped++; + goto drop; + } } + skb->truesize += skb->data_len; dev->stats.rx_bytes += skb->len; dev->stats.rx_packets++; @@ -198,7 +269,7 @@ drop: dev_kfree_skb(skb); } -static void try_fill_recv(struct virtnet_info *vi) +static void try_fill_recv_maxbufs(struct virtnet_info *vi) { struct sk_buff *skb; struct scatterlist sg[2+MAX_SKB_FRAGS]; @@ -206,12 +277,16 @@ static void try_fill_recv(struct virtnet_info *vi) sg_init_table(sg, 2+MAX_SKB_FRAGS); for (;;) { + struct virtio_net_hdr *hdr; + skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN); if (unlikely(!skb)) break; skb_put(skb, MAX_PACKET_LEN); - vnet_hdr_to_sg(sg, skb); + + hdr = skb_vnet_hdr(skb); + sg_init_one(sg, hdr, sizeof(*hdr)); if (vi->big_packets) { for (i = 0; i < MAX_SKB_FRAGS; i++) { @@ -247,6 +322,54 @@ static void try_fill_recv(struct virtnet_info *vi) vi->rvq->vq_ops->kick(vi->rvq); } +static void try_fill_recv(struct virtnet_info *vi) +{ + struct sk_buff *skb; + struct scatterlist sg[1]; + int err; + + if (!vi->mergeable_rx_bufs) { + try_fill_recv_maxbufs(vi); + return; + } + + for (;;) { + skb_frag_t *f; + + skb = netdev_alloc_skb(vi->dev, GOOD_COPY_LEN + NET_IP_ALIGN); + if (unlikely(!skb)) + break; + + skb_reserve(skb, NET_IP_ALIGN); + + f = &skb_shinfo(skb)->frags[0]; + f->page = get_a_page(vi, GFP_ATOMIC); + if (!f->page) { + kfree_skb(skb); + break; + } + + f->page_offset = 0; + f->size = PAGE_SIZE; + + skb_shinfo(skb)->nr_frags++; + + sg_init_one(sg, page_address(f->page), PAGE_SIZE); + skb_queue_head(&vi->recv, skb); + + err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 1, skb); + if (err) { + skb_unlink(skb, &vi->recv); + kfree_skb(skb); + break; + } + vi->num++; + } + if (unlikely(vi->num > vi->max)) + vi->max = vi->num; + vi->rvq->vq_ops->kick(vi->rvq); +} + static void skb_recv_done(struct virtqueue *rvq) { struct virtnet_info *vi = rvq->vdev->priv; @@ -325,15 +448,14 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) { int num, err; struct scatterlist sg[2+MAX_SKB_FRAGS]; - struct virtio_net_hdr *hdr; + struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb); + struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; sg_init_table(sg, 2+MAX_SKB_FRAGS); pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); - /* Encode metadata header at front. */ - hdr = skb_vnet_hdr(skb); if (skb->ip_summed == CHECKSUM_PARTIAL) { hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; hdr->csum_start = skb->csum_start - skb_headroom(skb); @@ -361,7 +483,14 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) hdr->gso_size = hdr->hdr_len = 0; } - vnet_hdr_to_sg(sg, skb); + mhdr->num_buffers = 0; + + /* Encode metadata header at front. */ + if (vi->mergeable_rx_bufs) + sg_init_one(sg, mhdr, sizeof(*mhdr)); + else + sg_init_one(sg, hdr, sizeof(*hdr)); + num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); @@ -551,6 +680,9 @@ static int virtnet_probe(struct virtio_device *vdev) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) vi->big_packets = true; + if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) + vi->mergeable_rx_bufs = true; + /* We expect two virtqueues, receive then send. */ vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done); if (IS_ERR(vi->rvq)) { @@ -643,6 +775,7 @@ static unsigned int features[] = { VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */ + VIRTIO_NET_F_MRG_RXBUF, VIRTIO_F_NOTIFY_ON_EMPTY, }; -- cgit v1.2.3 From 39da5814db81e8fe9782ae5ea24c0fdfcf2adc96 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Wed, 26 Nov 2008 13:58:11 +0000 Subject: virtio_net: large tx MTU support We don't really have a max tx packet size limit, so allow configuring the device with up to 64k tx MTU. Signed-off-by: Mark McLoughlin Signed-off-by: Rusty Russell --- drivers/net/virtio_net.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index e6b5d6ef9ea8..71ca29cc184d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -613,6 +613,17 @@ static struct ethtool_ops virtnet_ethtool_ops = { .set_tso = ethtool_op_set_tso, }; +#define MIN_MTU 68 +#define MAX_MTU 65535 + +static int virtnet_change_mtu(struct net_device *dev, int new_mtu) +{ + if (new_mtu < MIN_MTU || new_mtu > MAX_MTU) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + static int virtnet_probe(struct virtio_device *vdev) { int err; @@ -628,6 +639,7 @@ static int virtnet_probe(struct virtio_device *vdev) dev->open = virtnet_open; dev->stop = virtnet_close; dev->hard_start_xmit = start_xmit; + dev->change_mtu = virtnet_change_mtu; dev->features = NETIF_F_HIGHDMA; #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = virtnet_netpoll; -- cgit v1.2.3 From 908a7a16b852ffd618a9127be8d62432182d81b4 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 22 Dec 2008 20:43:12 -0800 Subject: net: Remove unused netdev arg from some NAPI interfaces. When the napi api was changed to separate its 1:1 binding to the net_device struct, the netif_rx_[prep|schedule|complete] api failed to remove the now vestigual net_device structure parameter. This patch cleans up that api by properly removing it.. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 71ca29cc184d..b7004ff36451 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -374,9 +374,9 @@ static void skb_recv_done(struct virtqueue *rvq) { struct virtnet_info *vi = rvq->vdev->priv; /* Schedule NAPI, Suppress further interrupts if successful. */ - if (netif_rx_schedule_prep(vi->dev, &vi->napi)) { + if (netif_rx_schedule_prep(&vi->napi)) { rvq->vq_ops->disable_cb(rvq); - __netif_rx_schedule(vi->dev, &vi->napi); + __netif_rx_schedule(&vi->napi); } } @@ -402,11 +402,11 @@ again: /* Out of packets? */ if (received < budget) { - netif_rx_complete(vi->dev, napi); + netif_rx_complete(napi); if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq)) && napi_schedule_prep(napi)) { vi->rvq->vq_ops->disable_cb(vi->rvq); - __netif_rx_schedule(vi->dev, napi); + __netif_rx_schedule(napi); goto again; } } @@ -580,9 +580,9 @@ static int virtnet_open(struct net_device *dev) * won't get another interrupt, so process any outstanding packets * now. virtnet_poll wants re-enable the queue, so we disable here. * We synchronize against interrupts via NAPI_STATE_SCHED */ - if (netif_rx_schedule_prep(dev, &vi->napi)) { + if (netif_rx_schedule_prep(&vi->napi)) { vi->rvq->vq_ops->disable_cb(vi->rvq); - __netif_rx_schedule(dev, &vi->napi); + __netif_rx_schedule(&vi->napi); } return 0; } -- cgit v1.2.3