From ca2f09f2b2c6c25047cfc545d057c4edfcfe561c Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Fri, 6 Dec 2013 16:36:07 +0000 Subject: xen-netback: improve guest-receive-side flow control The way that flow control works without this patch is that, in start_xmit() the code uses xenvif_count_skb_slots() to predict how many slots xenvif_gop_skb() will consume and then adds this to a 'req_cons_peek' counter which it then uses to determine if the shared ring has that amount of space available by checking whether 'req_prod' has passed that value. If the ring doesn't have space the tx queue is stopped. xenvif_gop_skb() will then consume slots and update 'req_cons' and issue responses, updating 'rsp_prod' as it goes. The frontend will consume those responses and post new requests, by updating req_prod. So, req_prod chases req_cons which chases rsp_prod, and can never exceed that value. Thus if xenvif_count_skb_slots() ever returns a number of slots greater than xenvif_gop_skb() uses, req_cons_peek will get to a value that req_prod cannot possibly achieve (since it's limited by the 'real' req_cons) and, if this happens enough times, req_cons_peek gets more than a ring size ahead of req_cons and the tx queue then remains stopped forever waiting for an unachievable amount of space to become available in the ring. Having two routines trying to calculate the same value is always going to be fragile, so this patch does away with that. All we essentially need to do is make sure that we have 'enough stuff' on our internal queue without letting it build up uncontrollably. So start_xmit() makes a cheap optimistic check of how much space is needed for an skb and only turns the queue off if that is unachievable. net_rx_action() is the place where we could do with an accurate predicition but, since that has proven tricky to calculate, a cheap worse-case (but not too bad) estimate is all we really need since the only thing we *must* prevent is xenvif_gop_skb() consuming more slots than are available. Without this patch I can trivially stall netback permanently by just doing a large guest to guest file copy between two Windows Server 2008R2 VMs on a single host. Patch tested with frontends in: - Windows Server 2008R2 - CentOS 6.0 - Debian Squeeze - Debian Wheezy - SLES11 Signed-off-by: Paul Durrant Cc: Wei Liu Cc: Ian Campbell Cc: David Vrabel Cc: Annie Li Cc: Konrad Rzeszutek Wilk Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/interface.c | 47 +++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 23 deletions(-) (limited to 'drivers/net/xen-netback/interface.c') diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 870f1fa58370..1dcb9606e6e0 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -46,11 +46,6 @@ int xenvif_schedulable(struct xenvif *vif) return netif_running(vif->dev) && netif_carrier_ok(vif->dev); } -static int xenvif_rx_schedulable(struct xenvif *vif) -{ - return xenvif_schedulable(vif) && !xenvif_rx_ring_full(vif); -} - static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id) { struct xenvif *vif = dev_id; @@ -104,8 +99,8 @@ static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) { struct xenvif *vif = dev_id; - if (xenvif_rx_schedulable(vif)) - netif_wake_queue(vif->dev); + vif->rx_event = true; + xenvif_kick_thread(vif); return IRQ_HANDLED; } @@ -121,24 +116,35 @@ static irqreturn_t xenvif_interrupt(int irq, void *dev_id) static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); + int min_slots_needed; BUG_ON(skb->dev != dev); /* Drop the packet if vif is not ready */ - if (vif->task == NULL) + if (vif->task == NULL || !xenvif_schedulable(vif)) goto drop; - /* Drop the packet if the target domain has no receive buffers. */ - if (!xenvif_rx_schedulable(vif)) - goto drop; + /* At best we'll need one slot for the header and one for each + * frag. + */ + min_slots_needed = 1 + skb_shinfo(skb)->nr_frags; - /* Reserve ring slots for the worst-case number of fragments. */ - vif->rx_req_cons_peek += xenvif_count_skb_slots(vif, skb); + /* If the skb is GSO then we'll also need an extra slot for the + * metadata. + */ + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 || + skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + min_slots_needed++; - if (vif->can_queue && xenvif_must_stop_queue(vif)) - netif_stop_queue(dev); + /* If the skb can't possibly fit in the remaining slots + * then turn off the queue to give the ring a chance to + * drain. + */ + if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) + xenvif_stop_queue(vif); - xenvif_queue_tx_skb(vif, skb); + skb_queue_tail(&vif->rx_queue, skb); + xenvif_kick_thread(vif); return NETDEV_TX_OK; @@ -148,12 +154,6 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -void xenvif_notify_tx_completion(struct xenvif *vif) -{ - if (netif_queue_stopped(vif->dev) && xenvif_rx_schedulable(vif)) - netif_wake_queue(vif->dev); -} - static struct net_device_stats *xenvif_get_stats(struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); @@ -378,6 +378,8 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, if (err < 0) goto err; + init_waitqueue_head(&vif->wq); + if (tx_evtchn == rx_evtchn) { /* feature-split-event-channels == 0 */ err = bind_interdomain_evtchn_to_irqhandler( @@ -410,7 +412,6 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, disable_irq(vif->rx_irq); } - init_waitqueue_head(&vif->wq); task = kthread_create(xenvif_kthread, (void *)vif, "%s", vif->dev->name); if (IS_ERR(task)) { -- cgit v1.2.3 From ac3d5ac277352fe6e27809286768e9f1f8aa388d Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Mon, 23 Dec 2013 09:27:17 +0000 Subject: xen-netback: fix guest-receive-side array sizes The sizes chosen for the metadata and grant_copy_op arrays on the guest receive size are wrong; - The meta array is needlessly twice the ring size, when we only ever consume a single array element per RX ring slot - The grant_copy_op array is way too small. It's sized based on a bogus assumption: that at most two copy ops will be used per ring slot. This may have been true at some point in the past but it's clear from looking at start_new_rx_buffer() that a new ring slot is only consumed if a frag would overflow the current slot (plus some other conditions) so the actual limit is MAX_SKB_FRAGS grant_copy_ops per ring slot. This patch fixes those two sizing issues and, because grant_copy_ops grows so much, it pulls it out into a separate chunk of vmalloc()ed memory. Signed-off-by: Paul Durrant Acked-by: Wei Liu Cc: Ian Campbell Cc: David Vrabel Signed-off-by: David S. Miller --- drivers/net/xen-netback/interface.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/net/xen-netback/interface.c') diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 870f1fa58370..34ca4e58a43d 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -307,6 +307,15 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, SET_NETDEV_DEV(dev, parent); vif = netdev_priv(dev); + + vif->grant_copy_op = vmalloc(sizeof(struct gnttab_copy) * + MAX_GRANT_COPY_OPS); + if (vif->grant_copy_op == NULL) { + pr_warn("Could not allocate grant copy space for %s\n", name); + free_netdev(dev); + return ERR_PTR(-ENOMEM); + } + vif->domid = domid; vif->handle = handle; vif->can_sg = 1; @@ -487,6 +496,7 @@ void xenvif_free(struct xenvif *vif) unregister_netdev(vif->dev); + vfree(vif->grant_copy_op); free_netdev(vif->dev); module_put(THIS_MODULE); -- cgit v1.2.3 From f35f76ee76df008131bbe01a2297de0c55ee2297 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Sun, 5 Jan 2014 10:24:01 -0500 Subject: xen-netback: Include header for vmalloc Commit ac3d5ac27735 ("xen-netback: fix guest-receive-side array sizes") added calls to vmalloc and vfree in the interface.c file without including . This causes build failures if the -Werror=implicit-function-declaration flag is passed. Signed-off-by: Josh Boyer Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/interface.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/xen-netback/interface.c') diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 34ca4e58a43d..fff8cddfed81 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3