From 50c2e4dd6749725338621fff456b26d3a592259f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 12 Jul 2015 01:20:55 +0300 Subject: net/xen-netback: off by one in BUG_ON() condition The > should be >=. I also added spaces around the '-' operations so the code is a little more consistent and matches the condition better. Fixes: f53c3fe8dad7 ('xen-netback: Introduce TX grant mapping') Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/xen-netback/netback.c') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 880d0d63e872..7d50711476fe 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1566,13 +1566,13 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) smp_rmb(); while (dc != dp) { - BUG_ON(gop - queue->tx_unmap_ops > MAX_PENDING_REQS); + BUG_ON(gop - queue->tx_unmap_ops >= MAX_PENDING_REQS); pending_idx = queue->dealloc_ring[pending_index(dc++)]; - pending_idx_release[gop-queue->tx_unmap_ops] = + pending_idx_release[gop - queue->tx_unmap_ops] = pending_idx; - queue->pages_to_unmap[gop-queue->tx_unmap_ops] = + queue->pages_to_unmap[gop - queue->tx_unmap_ops] = queue->mmap_pages[pending_idx]; gnttab_set_unmap_op(gop, idx_to_kaddr(queue, pending_idx), -- cgit v1.2.3 From 2475b22526d70234ecfe4a1ff88aed69badefba9 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Mon, 3 Aug 2015 15:38:03 +0100 Subject: xen-netback: Allocate fraglist early to avoid complex rollback Determine if a fraglist is needed in the tx path, and allocate it if necessary before setting up the copy and map operations. Otherwise, undoing the copy and map operations is tricky. This fixes a use-after-free: if allocating the fraglist failed, the copy and map operations that had been set up were still executed, writing over the data area of a freed skb. Signed-off-by: Ross Lagerwall Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 61 +++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 28 deletions(-) (limited to 'drivers/net/xen-netback/netback.c') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 7d50711476fe..1b406e706a01 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -810,23 +810,17 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue, struct sk_buff *skb, struct xen_netif_tx_request *txp, - struct gnttab_map_grant_ref *gop) + struct gnttab_map_grant_ref *gop, + unsigned int frag_overflow, + struct sk_buff *nskb) { struct skb_shared_info *shinfo = skb_shinfo(skb); skb_frag_t *frags = shinfo->frags; u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; int start; pending_ring_idx_t index; - unsigned int nr_slots, frag_overflow = 0; + unsigned int nr_slots; - /* At this point shinfo->nr_frags is in fact the number of - * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. - */ - if (shinfo->nr_frags > MAX_SKB_FRAGS) { - frag_overflow = shinfo->nr_frags - MAX_SKB_FRAGS; - BUG_ON(frag_overflow > MAX_SKB_FRAGS); - shinfo->nr_frags = MAX_SKB_FRAGS; - } nr_slots = shinfo->nr_frags; /* Skip first skb fragment if it is on same page as header fragment. */ @@ -841,13 +835,6 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *que } if (frag_overflow) { - struct sk_buff *nskb = xenvif_alloc_skb(0); - if (unlikely(nskb == NULL)) { - if (net_ratelimit()) - netdev_err(queue->vif->dev, - "Can't allocate the frag_list skb.\n"); - return NULL; - } shinfo = skb_shinfo(nskb); frags = shinfo->frags; @@ -1175,9 +1162,10 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, unsigned *copy_ops, unsigned *map_ops) { - struct gnttab_map_grant_ref *gop = queue->tx_map_ops, *request_gop; - struct sk_buff *skb; + struct gnttab_map_grant_ref *gop = queue->tx_map_ops; + struct sk_buff *skb, *nskb; int ret; + unsigned int frag_overflow; while (skb_queue_len(&queue->tx_queue) < budget) { struct xen_netif_tx_request txreq; @@ -1265,6 +1253,29 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, break; } + skb_shinfo(skb)->nr_frags = ret; + if (data_len < txreq.size) + skb_shinfo(skb)->nr_frags++; + /* At this point shinfo->nr_frags is in fact the number of + * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. + */ + frag_overflow = 0; + nskb = NULL; + if (skb_shinfo(skb)->nr_frags > MAX_SKB_FRAGS) { + frag_overflow = skb_shinfo(skb)->nr_frags - MAX_SKB_FRAGS; + BUG_ON(frag_overflow > MAX_SKB_FRAGS); + skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS; + nskb = xenvif_alloc_skb(0); + if (unlikely(nskb == NULL)) { + kfree_skb(skb); + xenvif_tx_err(queue, &txreq, idx); + if (net_ratelimit()) + netdev_err(queue->vif->dev, + "Can't allocate the frag_list skb.\n"); + break; + } + } + if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { struct xen_netif_extra_info *gso; gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; @@ -1272,6 +1283,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, if (xenvif_set_skb_gso(queue->vif, skb, gso)) { /* Failure in xenvif_set_skb_gso is fatal. */ kfree_skb(skb); + kfree_skb(nskb); break; } } @@ -1294,9 +1306,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, (*copy_ops)++; - skb_shinfo(skb)->nr_frags = ret; if (data_len < txreq.size) { - skb_shinfo(skb)->nr_frags++; frag_set_pending_idx(&skb_shinfo(skb)->frags[0], pending_idx); xenvif_tx_create_map_op(queue, pending_idx, &txreq, gop); @@ -1310,13 +1320,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, queue->pending_cons++; - request_gop = xenvif_get_requests(queue, skb, txfrags, gop); - if (request_gop == NULL) { - kfree_skb(skb); - xenvif_tx_err(queue, &txreq, idx); - break; - } - gop = request_gop; + gop = xenvif_get_requests(queue, skb, txfrags, gop, + frag_overflow, nskb); __skb_queue_tail(&queue->tx_queue, skb); -- cgit v1.2.3 From 57b229063ae6dc65036209018dc7f4290cc026bb Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Tue, 4 Aug 2015 15:40:59 +0100 Subject: xen/netback: Wake dealloc thread after completing zerocopy work Waking the dealloc thread before decrementing inflight_packets is racy because it means the thread may go to sleep before inflight_packets is decremented. If kthread_stop() has already been called, the dealloc thread may wait forever with nothing to wake it. Instead, wake the thread only after decrementing inflight_packets. Signed-off-by: Ross Lagerwall Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/net/xen-netback/netback.c') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 1b406e706a01..3f44b522b831 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1541,7 +1541,6 @@ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success) smp_wmb(); queue->dealloc_prod++; } while (ubuf); - wake_up(&queue->dealloc_wq); spin_unlock_irqrestore(&queue->callback_lock, flags); if (likely(zerocopy_success)) -- cgit v1.2.3 From 210c34dcd8d912dcc740f1f17625a7293af5cb56 Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Wed, 2 Sep 2015 17:58:36 +0100 Subject: xen-netback: add support for multicast control Xen's PV network protocol includes messages to add/remove ethernet multicast addresses to/from a filter list in the backend. This allows the frontend to request the backend only forward multicast packets which are of interest thus preventing unnecessary noise on the shared ring. The canonical netif header in git://xenbits.xen.org/xen.git specifies the message format (two more XEN_NETIF_EXTRA_TYPEs) so the minimal necessary changes have been pulled into include/xen/interface/io/netif.h. To prevent the frontend from extending the multicast filter list arbitrarily a limit (XEN_NETBK_MCAST_MAX) has been set to 64 entries. This limit is not specified by the protocol and so may change in future. If the limit is reached then the next XEN_NETIF_EXTRA_TYPE_MCAST_ADD sent by the frontend will be failed with NETIF_RSP_ERROR. Signed-off-by: Paul Durrant Cc: Ian Campbell Cc: Wei Liu Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 99 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) (limited to 'drivers/net/xen-netback/netback.c') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 3f44b522b831..42569b994ea8 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1157,6 +1157,80 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size) return false; } +/* No locking is required in xenvif_mcast_add/del() as they are + * only ever invoked from NAPI poll. An RCU list is used because + * xenvif_mcast_match() is called asynchronously, during start_xmit. + */ + +static int xenvif_mcast_add(struct xenvif *vif, const u8 *addr) +{ + struct xenvif_mcast_addr *mcast; + + if (vif->fe_mcast_count == XEN_NETBK_MCAST_MAX) { + if (net_ratelimit()) + netdev_err(vif->dev, + "Too many multicast addresses\n"); + return -ENOSPC; + } + + mcast = kzalloc(sizeof(*mcast), GFP_ATOMIC); + if (!mcast) + return -ENOMEM; + + ether_addr_copy(mcast->addr, addr); + list_add_tail_rcu(&mcast->entry, &vif->fe_mcast_addr); + vif->fe_mcast_count++; + + return 0; +} + +static void xenvif_mcast_del(struct xenvif *vif, const u8 *addr) +{ + struct xenvif_mcast_addr *mcast; + + list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) { + if (ether_addr_equal(addr, mcast->addr)) { + --vif->fe_mcast_count; + list_del_rcu(&mcast->entry); + kfree_rcu(mcast, rcu); + break; + } + } +} + +bool xenvif_mcast_match(struct xenvif *vif, const u8 *addr) +{ + struct xenvif_mcast_addr *mcast; + + rcu_read_lock(); + list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) { + if (ether_addr_equal(addr, mcast->addr)) { + rcu_read_unlock(); + return true; + } + } + rcu_read_unlock(); + + return false; +} + +void xenvif_mcast_addr_list_free(struct xenvif *vif) +{ + /* No need for locking or RCU here. NAPI poll and TX queue + * are stopped. + */ + while (!list_empty(&vif->fe_mcast_addr)) { + struct xenvif_mcast_addr *mcast; + + mcast = list_first_entry(&vif->fe_mcast_addr, + struct xenvif_mcast_addr, + entry); + --vif->fe_mcast_count; + list_del(&mcast->entry); + kfree(mcast); + } +} + static void xenvif_tx_build_gops(struct xenvif_queue *queue, int budget, unsigned *copy_ops, @@ -1215,6 +1289,31 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, break; } + if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - 1].type) { + struct xen_netif_extra_info *extra; + + extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - 1]; + ret = xenvif_mcast_add(queue->vif, extra->u.mcast.addr); + + make_tx_response(queue, &txreq, + (ret == 0) ? + XEN_NETIF_RSP_OKAY : + XEN_NETIF_RSP_ERROR); + push_tx_responses(queue); + continue; + } + + if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - 1].type) { + struct xen_netif_extra_info *extra; + + extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - 1]; + xenvif_mcast_del(queue->vif, extra->u.mcast.addr); + + make_tx_response(queue, &txreq, XEN_NETIF_RSP_OKAY); + push_tx_responses(queue); + continue; + } + ret = xenvif_count_requests(queue, &txreq, txfrags, work_to_do); if (unlikely(ret < 0)) break; -- cgit v1.2.3 From 0df4f266b3af90442bbeb5e685a84a80745beba0 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 7 Aug 2015 17:34:37 +0100 Subject: xen: Use correctly the Xen memory terminologies Based on include/xen/mm.h [1], Linux is mistakenly using MFN when GFN is meant, I suspect this is because the first support for Xen was for PV. This resulted in some misimplementation of helpers on ARM and confused developers about the expected behavior. For instance, with pfn_to_mfn, we expect to get an MFN based on the name. Although, if we look at the implementation on x86, it's returning a GFN. For clarity and avoid new confusion, replace any reference to mfn with gfn in any helpers used by PV drivers. The x86 code will still keep some reference of pfn_to_mfn which may be used by all kind of guests No changes as been made in the hypercall field, even though they may be invalid, in order to keep the same as the defintion in xen repo. Note that page_to_mfn has been renamed to xen_page_to_gfn to avoid a name to close to the KVM function gfn_to_page. Take also the opportunity to simplify simple construction such as pfn_to_mfn(page_to_pfn(page)) into xen_page_to_gfn. More complex clean up will come in follow-up patches. [1] http://xenbits.xen.org/gitweb/?p=xen.git;a=commitdiff;h=e758ed14f390342513405dd766e874934573e6cb Signed-off-by: Julien Grall Reviewed-by: Stefano Stabellini Acked-by: Dmitry Torokhov Acked-by: Wei Liu Signed-off-by: David Vrabel --- drivers/net/xen-netback/netback.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/xen-netback/netback.c') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 3f44b522b831..7c64c74711e8 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -314,7 +314,7 @@ static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb } else { copy_gop->source.domid = DOMID_SELF; copy_gop->source.u.gmfn = - virt_to_mfn(page_address(page)); + virt_to_gfn(page_address(page)); } copy_gop->source.offset = offset; @@ -1296,7 +1296,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset; queue->tx_copy_ops[*copy_ops].dest.u.gmfn = - virt_to_mfn(skb->data); + virt_to_gfn(skb->data); queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF; queue->tx_copy_ops[*copy_ops].dest.offset = offset_in_page(skb->data); -- cgit v1.2.3 From 1d5d48523900a4b0f25d6b52f1a93c84bd671186 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 8 Sep 2015 14:25:14 +0100 Subject: xen-netback: require fewer guest Rx slots when not using GSO Commit f48da8b14d04ca87ffcffe68829afd45f926ec6a (xen-netback: fix unlimited guest Rx internal queue and carrier flapping) introduced a regression. The PV frontend in IPXE only places 4 requests on the guest Rx ring. Since netback required at least (MAX_SKB_FRAGS + 1) slots, IPXE could not receive any packets. a) If GSO is not enabled on the VIF, fewer guest Rx slots are required for the largest possible packet. Calculate the required slots based on the maximum GSO size or the MTU. This calculation of the number of required slots relies on 1650d5455bd2 (xen-netback: always fully coalesce guest Rx packets) which present in 4.0-rc1 and later. b) Reduce the Rx stall detection to checking for at least one available Rx request. This is fine since we're predominately concerned with detecting interfaces which are down and thus have zero available Rx requests. Signed-off-by: David Vrabel Reviewed-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'drivers/net/xen-netback/netback.c') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 42569b994ea8..b588b1a08cd4 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -149,9 +149,20 @@ static inline pending_ring_idx_t pending_index(unsigned i) return i & (MAX_PENDING_REQS-1); } -bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue, int needed) +static int xenvif_rx_ring_slots_needed(struct xenvif *vif) +{ + if (vif->gso_mask) + return DIV_ROUND_UP(vif->dev->gso_max_size, PAGE_SIZE) + 1; + else + return DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); +} + +static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) { RING_IDX prod, cons; + int needed; + + needed = xenvif_rx_ring_slots_needed(queue->vif); do { prod = queue->rx.sring->req_prod; @@ -513,7 +524,7 @@ static void xenvif_rx_action(struct xenvif_queue *queue) skb_queue_head_init(&rxq); - while (xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX) + while (xenvif_rx_ring_slots_available(queue) && (skb = xenvif_rx_dequeue(queue)) != NULL) { queue->last_rx_time = jiffies; @@ -1938,8 +1949,7 @@ static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue) prod = queue->rx.sring->req_prod; cons = queue->rx.req_cons; - return !queue->stalled - && prod - cons < XEN_NETBK_RX_SLOTS_MAX + return !queue->stalled && prod - cons < 1 && time_after(jiffies, queue->last_rx_time + queue->vif->stall_timeout); } @@ -1951,14 +1961,13 @@ static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) prod = queue->rx.sring->req_prod; cons = queue->rx.req_cons; - return queue->stalled - && prod - cons >= XEN_NETBK_RX_SLOTS_MAX; + return queue->stalled && prod - cons >= 1; } static bool xenvif_have_rx_work(struct xenvif_queue *queue) { return (!skb_queue_empty(&queue->rx_queue) - && xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX)) + && xenvif_rx_ring_slots_available(queue)) || (queue->vif->stall_timeout && (xenvif_rx_queue_stalled(queue) || xenvif_rx_queue_ready(queue))) -- cgit v1.2.3 From 4c82ac3c37363e8c4ded6a5fe1ec5fa756b34df3 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 10 Sep 2015 11:18:57 +0100 Subject: xen-netback: respect user provided max_queues Originally that parameter was always reset to num_online_cpus during module initialisation, which renders it useless. The fix is to only set max_queues to num_online_cpus when user has not provided a value. Reported-by: Johnny Strom Signed-off-by: Wei Liu Reviewed-by: David Vrabel Acked-by: Ian Campbell Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/net/xen-netback/netback.c') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index b588b1a08cd4..abc1381264fc 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -2114,8 +2114,11 @@ static int __init netback_init(void) if (!xen_domain()) return -ENODEV; - /* Allow as many queues as there are CPUs, by default */ - xenvif_max_queues = num_online_cpus(); + /* Allow as many queues as there are CPUs if user has not + * specified a value. + */ + if (xenvif_max_queues == 0) + xenvif_max_queues = num_online_cpus(); if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", -- cgit v1.2.3