From 4a74dc65e3ad825a66dfbcb256f98c550f96445b Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 5 Mar 2013 20:13:54 +0000
Subject: sfc: Allow efx_channel_type::receive_skb() to reject a packet

Instead of having efx_ptp_rx() call netif_receive_skb() for an invalid
PTP packet, make it return false for rejected packets and have
efx_rx_deliver() pass them up.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/ethernet/sfc/rx.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'drivers/net/ethernet/sfc/rx.c')

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index bb579a6128c8..f31c23ea2a07 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -575,12 +575,14 @@ static void efx_rx_deliver(struct efx_channel *channel,
 	/* Record the rx_queue */
 	skb_record_rx_queue(skb, channel->rx_queue.core_index);
 
-	/* Pass the packet up */
 	if (channel->type->receive_skb)
-		channel->type->receive_skb(channel, skb);
-	else
-		netif_receive_skb(skb);
+		if (channel->type->receive_skb(channel, skb))
+			goto handled;
+
+	/* Pass the packet up */
+	netif_receive_skb(skb);
 
+handled:
 	/* Update allocation strategy method */
 	channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB;
 }
-- 
cgit v1.2.3


From 97d48a10c670f87bba9e5b2241e32f2eccd3fef0 Mon Sep 17 00:00:00 2001
From: Alexandre Rames <arames@solarflare.com>
Date: Fri, 11 Jan 2013 12:26:21 +0000
Subject: sfc: Remove rx_alloc_method SKB

[bwh: Remove more dead code, and make efx_ptp_rx() pull the data it
 needs into the header area.]
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/ethernet/sfc/rx.c | 330 ++++++++++++------------------------------
 1 file changed, 95 insertions(+), 235 deletions(-)

(limited to 'drivers/net/ethernet/sfc/rx.c')

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index f31c23ea2a07..e7aa28eb9327 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -33,46 +33,6 @@
 /* Size of buffer allocated for skb header area. */
 #define EFX_SKB_HEADERS  64u
 
-/*
- * rx_alloc_method - RX buffer allocation method
- *
- * This driver supports two methods for allocating and using RX buffers:
- * each RX buffer may be backed by an skb or by an order-n page.
- *
- * When GRO is in use then the second method has a lower overhead,
- * since we don't have to allocate then free skbs on reassembled frames.
- *
- * Values:
- *   - RX_ALLOC_METHOD_AUTO = 0
- *   - RX_ALLOC_METHOD_SKB  = 1
- *   - RX_ALLOC_METHOD_PAGE = 2
- *
- * The heuristic for %RX_ALLOC_METHOD_AUTO is a simple hysteresis count
- * controlled by the parameters below.
- *
- *   - Since pushing and popping descriptors are separated by the rx_queue
- *     size, so the watermarks should be ~rxd_size.
- *   - The performance win by using page-based allocation for GRO is less
- *     than the performance hit of using page-based allocation of non-GRO,
- *     so the watermarks should reflect this.
- *
- * Per channel we maintain a single variable, updated by each channel:
- *
- *   rx_alloc_level += (gro_performed ? RX_ALLOC_FACTOR_GRO :
- *                      RX_ALLOC_FACTOR_SKB)
- * Per NAPI poll interval, we constrain rx_alloc_level to 0..MAX (which
- * limits the hysteresis), and update the allocation strategy:
- *
- *   rx_alloc_method = (rx_alloc_level > RX_ALLOC_LEVEL_GRO ?
- *                      RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB)
- */
-static int rx_alloc_method = RX_ALLOC_METHOD_AUTO;
-
-#define RX_ALLOC_LEVEL_GRO 0x2000
-#define RX_ALLOC_LEVEL_MAX 0x3000
-#define RX_ALLOC_FACTOR_GRO 1
-#define RX_ALLOC_FACTOR_SKB (-2)
-
 /* This is the percentage fill level below which new RX descriptors
  * will be added to the RX descriptor ring.
  */
@@ -99,10 +59,7 @@ static inline unsigned int efx_rx_buf_size(struct efx_nic *efx)
 
 static u8 *efx_rx_buf_eh(struct efx_nic *efx, struct efx_rx_buffer *buf)
 {
-	if (buf->flags & EFX_RX_BUF_PAGE)
-		return page_address(buf->u.page) + efx_rx_buf_offset(efx, buf);
-	else
-		return (u8 *)buf->u.skb->data + efx->type->rx_buffer_hash_size;
+	return page_address(buf->page) + efx_rx_buf_offset(efx, buf);
 }
 
 static inline u32 efx_rx_buf_hash(const u8 *eh)
@@ -120,56 +77,7 @@ static inline u32 efx_rx_buf_hash(const u8 *eh)
 }
 
 /**
- * efx_init_rx_buffers_skb - create EFX_RX_BATCH skb-based RX buffers
- *
- * @rx_queue:		Efx RX queue
- *
- * This allocates EFX_RX_BATCH skbs, maps them for DMA, and populates a
- * struct efx_rx_buffer for each one. Return a negative error code or 0
- * on success. May fail having only inserted fewer than EFX_RX_BATCH
- * buffers.
- */
-static int efx_init_rx_buffers_skb(struct efx_rx_queue *rx_queue)
-{
-	struct efx_nic *efx = rx_queue->efx;
-	struct net_device *net_dev = efx->net_dev;
-	struct efx_rx_buffer *rx_buf;
-	struct sk_buff *skb;
-	int skb_len = efx->rx_buffer_len;
-	unsigned index, count;
-
-	for (count = 0; count < EFX_RX_BATCH; ++count) {
-		index = rx_queue->added_count & rx_queue->ptr_mask;
-		rx_buf = efx_rx_buffer(rx_queue, index);
-
-		rx_buf->u.skb = skb = netdev_alloc_skb(net_dev, skb_len);
-		if (unlikely(!skb))
-			return -ENOMEM;
-
-		/* Adjust the SKB for padding */
-		skb_reserve(skb, NET_IP_ALIGN);
-		rx_buf->len = skb_len - NET_IP_ALIGN;
-		rx_buf->flags = 0;
-
-		rx_buf->dma_addr = dma_map_single(&efx->pci_dev->dev,
-						  skb->data, rx_buf->len,
-						  DMA_FROM_DEVICE);
-		if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
-					       rx_buf->dma_addr))) {
-			dev_kfree_skb_any(skb);
-			rx_buf->u.skb = NULL;
-			return -EIO;
-		}
-
-		++rx_queue->added_count;
-		++rx_queue->alloc_skb_count;
-	}
-
-	return 0;
-}
-
-/**
- * efx_init_rx_buffers_page - create EFX_RX_BATCH page-based RX buffers
+ * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
  *
  * @rx_queue:		Efx RX queue
  *
@@ -178,7 +86,7 @@ static int efx_init_rx_buffers_skb(struct efx_rx_queue *rx_queue)
  * code or 0 on success. If a single page can be split between two buffers,
  * then the page will either be inserted fully, or not at at all.
  */
-static int efx_init_rx_buffers_page(struct efx_rx_queue *rx_queue)
+static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue)
 {
 	struct efx_nic *efx = rx_queue->efx;
 	struct efx_rx_buffer *rx_buf;
@@ -214,12 +122,11 @@ static int efx_init_rx_buffers_page(struct efx_rx_queue *rx_queue)
 		index = rx_queue->added_count & rx_queue->ptr_mask;
 		rx_buf = efx_rx_buffer(rx_queue, index);
 		rx_buf->dma_addr = dma_addr + EFX_PAGE_IP_ALIGN;
-		rx_buf->u.page = page;
+		rx_buf->page = page;
 		rx_buf->page_offset = page_offset + EFX_PAGE_IP_ALIGN;
 		rx_buf->len = efx->rx_buffer_len - EFX_PAGE_IP_ALIGN;
-		rx_buf->flags = EFX_RX_BUF_PAGE;
+		rx_buf->flags = 0;
 		++rx_queue->added_count;
-		++rx_queue->alloc_page_count;
 		++state->refcnt;
 
 		if ((~count & 1) && (efx->rx_buffer_len <= EFX_RX_HALF_PAGE)) {
@@ -239,10 +146,10 @@ static void efx_unmap_rx_buffer(struct efx_nic *efx,
 				struct efx_rx_buffer *rx_buf,
 				unsigned int used_len)
 {
-	if ((rx_buf->flags & EFX_RX_BUF_PAGE) && rx_buf->u.page) {
+	if (rx_buf->page) {
 		struct efx_rx_page_state *state;
 
-		state = page_address(rx_buf->u.page);
+		state = page_address(rx_buf->page);
 		if (--state->refcnt == 0) {
 			dma_unmap_page(&efx->pci_dev->dev,
 				       state->dma_addr,
@@ -253,21 +160,15 @@ static void efx_unmap_rx_buffer(struct efx_nic *efx,
 						rx_buf->dma_addr, used_len,
 						DMA_FROM_DEVICE);
 		}
-	} else if (!(rx_buf->flags & EFX_RX_BUF_PAGE) && rx_buf->u.skb) {
-		dma_unmap_single(&efx->pci_dev->dev, rx_buf->dma_addr,
-				 rx_buf->len, DMA_FROM_DEVICE);
 	}
 }
 
 static void efx_free_rx_buffer(struct efx_nic *efx,
 			       struct efx_rx_buffer *rx_buf)
 {
-	if ((rx_buf->flags & EFX_RX_BUF_PAGE) && rx_buf->u.page) {
-		__free_pages(rx_buf->u.page, efx->rx_buffer_order);
-		rx_buf->u.page = NULL;
-	} else if (!(rx_buf->flags & EFX_RX_BUF_PAGE) && rx_buf->u.skb) {
-		dev_kfree_skb_any(rx_buf->u.skb);
-		rx_buf->u.skb = NULL;
+	if (rx_buf->page) {
+		__free_pages(rx_buf->page, efx->rx_buffer_order);
+		rx_buf->page = NULL;
 	}
 }
 
@@ -283,7 +184,7 @@ static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
 static void efx_resurrect_rx_buffer(struct efx_rx_queue *rx_queue,
 				    struct efx_rx_buffer *rx_buf)
 {
-	struct efx_rx_page_state *state = page_address(rx_buf->u.page);
+	struct efx_rx_page_state *state = page_address(rx_buf->page);
 	struct efx_rx_buffer *new_buf;
 	unsigned fill_level, index;
 
@@ -298,14 +199,13 @@ static void efx_resurrect_rx_buffer(struct efx_rx_queue *rx_queue,
 	}
 
 	++state->refcnt;
-	get_page(rx_buf->u.page);
+	get_page(rx_buf->page);
 
 	index = rx_queue->added_count & rx_queue->ptr_mask;
 	new_buf = efx_rx_buffer(rx_queue, index);
 	new_buf->dma_addr = rx_buf->dma_addr ^ (PAGE_SIZE >> 1);
-	new_buf->u.page = rx_buf->u.page;
+	new_buf->page = rx_buf->page;
 	new_buf->len = rx_buf->len;
-	new_buf->flags = EFX_RX_BUF_PAGE;
 	++rx_queue->added_count;
 }
 
@@ -319,18 +219,17 @@ static void efx_recycle_rx_buffer(struct efx_channel *channel,
 	struct efx_rx_buffer *new_buf;
 	unsigned index;
 
-	rx_buf->flags &= EFX_RX_BUF_PAGE;
+	rx_buf->flags = 0;
 
-	if ((rx_buf->flags & EFX_RX_BUF_PAGE) &&
-	    efx->rx_buffer_len <= EFX_RX_HALF_PAGE &&
-	    page_count(rx_buf->u.page) == 1)
+	if (efx->rx_buffer_len <= EFX_RX_HALF_PAGE &&
+	    page_count(rx_buf->page) == 1)
 		efx_resurrect_rx_buffer(rx_queue, rx_buf);
 
 	index = rx_queue->added_count & rx_queue->ptr_mask;
 	new_buf = efx_rx_buffer(rx_queue, index);
 
 	memcpy(new_buf, rx_buf, sizeof(*new_buf));
-	rx_buf->u.page = NULL;
+	rx_buf->page = NULL;
 	++rx_queue->added_count;
 }
 
@@ -348,7 +247,6 @@ static void efx_recycle_rx_buffer(struct efx_channel *channel,
  */
 void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue)
 {
-	struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
 	unsigned fill_level;
 	int space, rc = 0;
 
@@ -369,16 +267,13 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue)
 
 	netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
 		   "RX queue %d fast-filling descriptor ring from"
-		   " level %d to level %d using %s allocation\n",
+		   " level %d to level %d\n",
 		   efx_rx_queue_index(rx_queue), fill_level,
-		   rx_queue->max_fill,
-		   channel->rx_alloc_push_pages ? "page" : "skb");
+		   rx_queue->max_fill);
+
 
 	do {
-		if (channel->rx_alloc_push_pages)
-			rc = efx_init_rx_buffers_page(rx_queue);
-		else
-			rc = efx_init_rx_buffers_skb(rx_queue);
+		rc = efx_init_rx_buffers(rx_queue);
 		if (unlikely(rc)) {
 			/* Ensure that we don't leave the rx queue empty */
 			if (rx_queue->added_count == rx_queue->removed_count)
@@ -408,7 +303,7 @@ void efx_rx_slow_fill(unsigned long context)
 
 static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
 				     struct efx_rx_buffer *rx_buf,
-				     int len, bool *leak_packet)
+				     int len)
 {
 	struct efx_nic *efx = rx_queue->efx;
 	unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding;
@@ -428,11 +323,6 @@ static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
 				  "RX event (0x%x > 0x%x+0x%x). Leaking\n",
 				  efx_rx_queue_index(rx_queue), len, max_len,
 				  efx->type->rx_buffer_padding);
-		/* If this buffer was skb-allocated, then the meta
-		 * data at the end of the skb will be trashed. So
-		 * we have no choice but to leak the fragment.
-		 */
-		*leak_packet = !(rx_buf->flags & EFX_RX_BUF_PAGE);
 		efx_schedule_reset(efx, RESET_TYPE_RX_RECOVERY);
 	} else {
 		if (net_ratelimit())
@@ -454,51 +344,78 @@ static void efx_rx_packet_gro(struct efx_channel *channel,
 {
 	struct napi_struct *napi = &channel->napi_str;
 	gro_result_t gro_result;
+	struct efx_nic *efx = channel->efx;
+	struct page *page = rx_buf->page;
+	struct sk_buff *skb;
 
-	if (rx_buf->flags & EFX_RX_BUF_PAGE) {
-		struct efx_nic *efx = channel->efx;
-		struct page *page = rx_buf->u.page;
-		struct sk_buff *skb;
-
-		rx_buf->u.page = NULL;
+	rx_buf->page = NULL;
 
-		skb = napi_get_frags(napi);
-		if (!skb) {
-			put_page(page);
-			return;
-		}
+	skb = napi_get_frags(napi);
+	if (!skb) {
+		put_page(page);
+		return;
+	}
 
-		if (efx->net_dev->features & NETIF_F_RXHASH)
-			skb->rxhash = efx_rx_buf_hash(eh);
+	if (efx->net_dev->features & NETIF_F_RXHASH)
+		skb->rxhash = efx_rx_buf_hash(eh);
 
-		skb_fill_page_desc(skb, 0, page,
-				   efx_rx_buf_offset(efx, rx_buf), rx_buf->len);
+	skb_fill_page_desc(skb, 0, page,
+			   efx_rx_buf_offset(efx, rx_buf), rx_buf->len);
 
-		skb->len = rx_buf->len;
-		skb->data_len = rx_buf->len;
-		skb->truesize += rx_buf->len;
-		skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
-				  CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
+	skb->len = rx_buf->len;
+	skb->data_len = rx_buf->len;
+	skb->truesize += rx_buf->len;
+	skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
+			  CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
 
-		skb_record_rx_queue(skb, channel->rx_queue.core_index);
+	skb_record_rx_queue(skb, channel->rx_queue.core_index);
 
 		gro_result = napi_gro_frags(napi);
-	} else {
-		struct sk_buff *skb = rx_buf->u.skb;
 
-		EFX_BUG_ON_PARANOID(!(rx_buf->flags & EFX_RX_PKT_CSUMMED));
-		rx_buf->u.skb = NULL;
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	if (gro_result != GRO_DROP)
+		channel->irq_mod_score += 2;
+}
 
-		gro_result = napi_gro_receive(napi, skb);
-	}
+/* Allocate and construct an SKB around a struct page.*/
+static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
+				     struct efx_rx_buffer *rx_buf,
+				     u8 *eh, int hdr_len)
+{
+	struct efx_nic *efx = channel->efx;
+	struct sk_buff *skb;
 
-	if (gro_result == GRO_NORMAL) {
-		channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB;
-	} else if (gro_result != GRO_DROP) {
-		channel->rx_alloc_level += RX_ALLOC_FACTOR_GRO;
-		channel->irq_mod_score += 2;
+	/* Allocate an SKB to store the headers */
+	skb = netdev_alloc_skb(efx->net_dev, hdr_len + EFX_PAGE_SKB_ALIGN);
+	if (unlikely(skb == NULL))
+		return NULL;
+
+	EFX_BUG_ON_PARANOID(rx_buf->len < hdr_len);
+
+	skb_reserve(skb, EFX_PAGE_SKB_ALIGN);
+
+	skb->len = rx_buf->len;
+	skb->truesize = rx_buf->len + sizeof(struct sk_buff);
+	memcpy(skb->data, eh, hdr_len);
+	skb->tail += hdr_len;
+
+	/* Append the remaining page onto the frag list */
+	if (rx_buf->len > hdr_len) {
+		skb->data_len = skb->len - hdr_len;
+		skb_fill_page_desc(skb, 0, rx_buf->page,
+				   efx_rx_buf_offset(efx, rx_buf) + hdr_len,
+				   skb->data_len);
+	} else {
+		__free_pages(rx_buf->page, efx->rx_buffer_order);
+		skb->data_len = 0;
 	}
+
+	/* Ownership has transferred from the rx_buf to skb */
+	rx_buf->page = NULL;
+
+	/* Move past the ethernet header */
+	skb->protocol = eth_type_trans(skb, efx->net_dev);
+
+	return skb;
 }
 
 void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
@@ -507,7 +424,6 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
 	struct efx_nic *efx = rx_queue->efx;
 	struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
 	struct efx_rx_buffer *rx_buf;
-	bool leak_packet = false;
 
 	rx_buf = efx_rx_buffer(rx_queue, index);
 	rx_buf->flags |= flags;
@@ -519,7 +435,7 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
 	rx_queue->removed_count++;
 
 	/* Validate the length encoded in the event vs the descriptor pushed */
-	efx_rx_packet__check_len(rx_queue, rx_buf, len, &leak_packet);
+	efx_rx_packet__check_len(rx_queue, rx_buf, len);
 
 	netif_vdbg(efx, rx_status, efx->net_dev,
 		   "RX queue %d received id %x at %llx+%x %s%s\n",
@@ -530,10 +446,7 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
 
 	/* Discard packet, if instructed to do so */
 	if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) {
-		if (unlikely(leak_packet))
-			channel->n_skbuff_leaks++;
-		else
-			efx_recycle_rx_buffer(channel, rx_buf);
+		efx_recycle_rx_buffer(channel, rx_buf);
 
 		/* Don't hold off the previous receive */
 		rx_buf = NULL;
@@ -560,31 +473,28 @@ out:
 	channel->rx_pkt = rx_buf;
 }
 
-static void efx_rx_deliver(struct efx_channel *channel,
+static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
 			   struct efx_rx_buffer *rx_buf)
 {
 	struct sk_buff *skb;
+	u16 hdr_len = min_t(u16, rx_buf->len, EFX_SKB_HEADERS);
 
-	/* We now own the SKB */
-	skb = rx_buf->u.skb;
-	rx_buf->u.skb = NULL;
+	skb = efx_rx_mk_skb(channel, rx_buf, eh, hdr_len);
+	if (unlikely(skb == NULL)) {
+		efx_free_rx_buffer(channel->efx, rx_buf);
+		return;
+	}
+	skb_record_rx_queue(skb, channel->rx_queue.core_index);
 
 	/* Set the SKB flags */
 	skb_checksum_none_assert(skb);
 
-	/* Record the rx_queue */
-	skb_record_rx_queue(skb, channel->rx_queue.core_index);
-
 	if (channel->type->receive_skb)
 		if (channel->type->receive_skb(channel, skb))
-			goto handled;
+			return;
 
 	/* Pass the packet up */
 	netif_receive_skb(skb);
-
-handled:
-	/* Update allocation strategy method */
-	channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB;
 }
 
 /* Handle a received packet.  Second half: Touches packet payload. */
@@ -602,60 +512,13 @@ void __efx_rx_packet(struct efx_channel *channel, struct efx_rx_buffer *rx_buf)
 		return;
 	}
 
-	if (!(rx_buf->flags & EFX_RX_BUF_PAGE)) {
-		struct sk_buff *skb = rx_buf->u.skb;
-
-		prefetch(skb_shinfo(skb));
-
-		skb_reserve(skb, efx->type->rx_buffer_hash_size);
-		skb_put(skb, rx_buf->len);
-
-		if (efx->net_dev->features & NETIF_F_RXHASH)
-			skb->rxhash = efx_rx_buf_hash(eh);
-
-		/* Move past the ethernet header. rx_buf->data still points
-		 * at the ethernet header */
-		skb->protocol = eth_type_trans(skb, efx->net_dev);
-
-		skb_record_rx_queue(skb, channel->rx_queue.core_index);
-	}
-
 	if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
 		rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;
 
-	if (likely(rx_buf->flags & (EFX_RX_BUF_PAGE | EFX_RX_PKT_CSUMMED)) &&
-	    !channel->type->receive_skb)
+	if (!channel->type->receive_skb)
 		efx_rx_packet_gro(channel, rx_buf, eh);
 	else
-		efx_rx_deliver(channel, rx_buf);
-}
-
-void efx_rx_strategy(struct efx_channel *channel)
-{
-	enum efx_rx_alloc_method method = rx_alloc_method;
-
-	if (channel->type->receive_skb) {
-		channel->rx_alloc_push_pages = false;
-		return;
-	}
-
-	/* Only makes sense to use page based allocation if GRO is enabled */
-	if (!(channel->efx->net_dev->features & NETIF_F_GRO)) {
-		method = RX_ALLOC_METHOD_SKB;
-	} else if (method == RX_ALLOC_METHOD_AUTO) {
-		/* Constrain the rx_alloc_level */
-		if (channel->rx_alloc_level < 0)
-			channel->rx_alloc_level = 0;
-		else if (channel->rx_alloc_level > RX_ALLOC_LEVEL_MAX)
-			channel->rx_alloc_level = RX_ALLOC_LEVEL_MAX;
-
-		/* Decide on the allocation method */
-		method = ((channel->rx_alloc_level > RX_ALLOC_LEVEL_GRO) ?
-			  RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB);
-	}
-
-	/* Push the option */
-	channel->rx_alloc_push_pages = (method == RX_ALLOC_METHOD_PAGE);
+		efx_rx_deliver(channel, eh, rx_buf);
 }
 
 int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
@@ -756,9 +619,6 @@ void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
 }
 
 
-module_param(rx_alloc_method, int, 0644);
-MODULE_PARM_DESC(rx_alloc_method, "Allocation method used for RX buffers");
-
 module_param(rx_refill_threshold, uint, 0444);
 MODULE_PARM_DESC(rx_refill_threshold,
 		 "RX descriptor ring refill threshold (%)");
-- 
cgit v1.2.3


From 272baeeb6a98f5f746c2eeab4973c2df89e9d7ea Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 29 Jan 2013 23:33:14 +0000
Subject: sfc: Properly distinguish RX buffer and DMA lengths

Replace efx_nic::rx_buffer_len with efx_nic::rx_dma_len, the maximum
RX DMA length.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/ethernet/sfc/rx.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

(limited to 'drivers/net/ethernet/sfc/rx.c')

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index e7aa28eb9327..31361db28f91 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -27,8 +27,9 @@
 /* Number of RX descriptors pushed at once. */
 #define EFX_RX_BATCH  8
 
-/* Maximum size of a buffer sharing a page */
-#define EFX_RX_HALF_PAGE ((PAGE_SIZE >> 1) - sizeof(struct efx_rx_page_state))
+/* Maximum length for an RX descriptor sharing a page */
+#define EFX_RX_HALF_PAGE ((PAGE_SIZE >> 1) - sizeof(struct efx_rx_page_state) \
+			  - EFX_PAGE_IP_ALIGN)
 
 /* Size of buffer allocated for skb header area. */
 #define EFX_SKB_HEADERS  64u
@@ -52,10 +53,6 @@ static inline unsigned int efx_rx_buf_offset(struct efx_nic *efx,
 {
 	return buf->page_offset + efx->type->rx_buffer_hash_size;
 }
-static inline unsigned int efx_rx_buf_size(struct efx_nic *efx)
-{
-	return PAGE_SIZE << efx->rx_buffer_order;
-}
 
 static u8 *efx_rx_buf_eh(struct efx_nic *efx, struct efx_rx_buffer *buf)
 {
@@ -105,7 +102,7 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue)
 		if (unlikely(page == NULL))
 			return -ENOMEM;
 		dma_addr = dma_map_page(&efx->pci_dev->dev, page, 0,
-					efx_rx_buf_size(efx),
+					PAGE_SIZE << efx->rx_buffer_order,
 					DMA_FROM_DEVICE);
 		if (unlikely(dma_mapping_error(&efx->pci_dev->dev, dma_addr))) {
 			__free_pages(page, efx->rx_buffer_order);
@@ -124,12 +121,12 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue)
 		rx_buf->dma_addr = dma_addr + EFX_PAGE_IP_ALIGN;
 		rx_buf->page = page;
 		rx_buf->page_offset = page_offset + EFX_PAGE_IP_ALIGN;
-		rx_buf->len = efx->rx_buffer_len - EFX_PAGE_IP_ALIGN;
+		rx_buf->len = efx->rx_dma_len;
 		rx_buf->flags = 0;
 		++rx_queue->added_count;
 		++state->refcnt;
 
-		if ((~count & 1) && (efx->rx_buffer_len <= EFX_RX_HALF_PAGE)) {
+		if ((~count & 1) && (efx->rx_dma_len <= EFX_RX_HALF_PAGE)) {
 			/* Use the second half of the page */
 			get_page(page);
 			dma_addr += (PAGE_SIZE >> 1);
@@ -153,7 +150,7 @@ static void efx_unmap_rx_buffer(struct efx_nic *efx,
 		if (--state->refcnt == 0) {
 			dma_unmap_page(&efx->pci_dev->dev,
 				       state->dma_addr,
-				       efx_rx_buf_size(efx),
+				       PAGE_SIZE << efx->rx_buffer_order,
 				       DMA_FROM_DEVICE);
 		} else if (used_len) {
 			dma_sync_single_for_cpu(&efx->pci_dev->dev,
@@ -221,7 +218,7 @@ static void efx_recycle_rx_buffer(struct efx_channel *channel,
 
 	rx_buf->flags = 0;
 
-	if (efx->rx_buffer_len <= EFX_RX_HALF_PAGE &&
+	if (efx->rx_dma_len <= EFX_RX_HALF_PAGE &&
 	    page_count(rx_buf->page) == 1)
 		efx_resurrect_rx_buffer(rx_queue, rx_buf);
 
-- 
cgit v1.2.3


From ff734ef4bca05fd5cd51b83d2e2a9f008b64f9a3 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 29 Jan 2013 23:33:14 +0000
Subject: sfc: Wrap __efx_rx_packet() with efx_rx_flush_packet()

The pipeline mechanism will need to change a bit for scattered
packets.  Add a wrapper to insulate efx_process_channel() from this.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/ethernet/sfc/rx.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'drivers/net/ethernet/sfc/rx.c')

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 31361db28f91..60f4eb7cebc6 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -465,8 +465,7 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
 	 */
 	rx_buf->len = len - efx->type->rx_buffer_hash_size;
 out:
-	if (channel->rx_pkt)
-		__efx_rx_packet(channel, channel->rx_pkt);
+	efx_rx_flush_packet(channel);
 	channel->rx_pkt = rx_buf;
 }
 
-- 
cgit v1.2.3


From b184f16b7feb9ede7d658ee6f2c77434d580d764 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 29 Jan 2013 23:33:15 +0000
Subject: sfc: Replace efx_rx_buf_eh() with simpler efx_rx_buf_va()

efx_rx_buf_va() returns the virtual address of the current start of
the buffer.  The callers must add the hash prefix size themselves.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/ethernet/sfc/rx.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers/net/ethernet/sfc/rx.c')

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 60f4eb7cebc6..23d67d1f136f 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -54,9 +54,9 @@ static inline unsigned int efx_rx_buf_offset(struct efx_nic *efx,
 	return buf->page_offset + efx->type->rx_buffer_hash_size;
 }
 
-static u8 *efx_rx_buf_eh(struct efx_nic *efx, struct efx_rx_buffer *buf)
+static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
 {
-	return page_address(buf->page) + efx_rx_buf_offset(efx, buf);
+	return page_address(buf->page) + buf->page_offset;
 }
 
 static inline u32 efx_rx_buf_hash(const u8 *eh)
@@ -458,7 +458,7 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
 	/* Prefetch nice and early so data will (hopefully) be in cache by
 	 * the time we look at it.
 	 */
-	prefetch(efx_rx_buf_eh(efx, rx_buf));
+	prefetch(efx_rx_buf_va(rx_buf) + efx->type->rx_buffer_hash_size);
 
 	/* Pipeline receives so that we give time for packet headers to be
 	 * prefetched into cache.
@@ -497,7 +497,7 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
 void __efx_rx_packet(struct efx_channel *channel, struct efx_rx_buffer *rx_buf)
 {
 	struct efx_nic *efx = channel->efx;
-	u8 *eh = efx_rx_buf_eh(efx, rx_buf);
+	u8 *eh = efx_rx_buf_va(rx_buf) + efx->type->rx_buffer_hash_size;
 
 	/* If we're in loopback test, then pass the packet directly to the
 	 * loopback layer, and free the rx_buf here
-- 
cgit v1.2.3


From 5036b7c7b9137bd084f28438396432348f20e0bc Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 29 Jan 2013 23:33:15 +0000
Subject: sfc: Explicitly prefetch RX hash prefix, not just Ethernet heade

Currently we prefetch from the Ethernet header, but we will also read
the hash prefix.  In practice they should be in the same cache line
and this won't hurt, but it is still pointless to add on the hash
prefix size.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/ethernet/sfc/rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/net/ethernet/sfc/rx.c')

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 23d67d1f136f..8e78a2fe7364 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -458,7 +458,7 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
 	/* Prefetch nice and early so data will (hopefully) be in cache by
 	 * the time we look at it.
 	 */
-	prefetch(efx_rx_buf_va(rx_buf) + efx->type->rx_buffer_hash_size);
+	prefetch(efx_rx_buf_va(rx_buf));
 
 	/* Pipeline receives so that we give time for packet headers to be
 	 * prefetched into cache.
-- 
cgit v1.2.3


From b74e3e8cd6f952faf8797fca81a5a2ceace6b9aa Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 29 Jan 2013 23:33:15 +0000
Subject: sfc: Update RX buffer address together with length

Adjust rx_buf->page_offset when we eat the RX hash prefix.  Remove
efx_rx_buf_offset(), which is now redundant.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/ethernet/sfc/rx.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

(limited to 'drivers/net/ethernet/sfc/rx.c')

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 8e78a2fe7364..04518722ac1d 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -47,13 +47,6 @@ static unsigned int rx_refill_threshold;
  */
 #define EFX_RXD_HEAD_ROOM 2
 
-/* Offset of ethernet header within page */
-static inline unsigned int efx_rx_buf_offset(struct efx_nic *efx,
-					     struct efx_rx_buffer *buf)
-{
-	return buf->page_offset + efx->type->rx_buffer_hash_size;
-}
-
 static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
 {
 	return page_address(buf->page) + buf->page_offset;
@@ -356,8 +349,7 @@ static void efx_rx_packet_gro(struct efx_channel *channel,
 	if (efx->net_dev->features & NETIF_F_RXHASH)
 		skb->rxhash = efx_rx_buf_hash(eh);
 
-	skb_fill_page_desc(skb, 0, page,
-			   efx_rx_buf_offset(efx, rx_buf), rx_buf->len);
+	skb_fill_page_desc(skb, 0, page, rx_buf->page_offset, rx_buf->len);
 
 	skb->len = rx_buf->len;
 	skb->data_len = rx_buf->len;
@@ -399,7 +391,7 @@ static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
 	if (rx_buf->len > hdr_len) {
 		skb->data_len = skb->len - hdr_len;
 		skb_fill_page_desc(skb, 0, rx_buf->page,
-				   efx_rx_buf_offset(efx, rx_buf) + hdr_len,
+				   rx_buf->page_offset + hdr_len,
 				   skb->data_len);
 	} else {
 		__free_pages(rx_buf->page, efx->rx_buffer_order);
@@ -460,10 +452,12 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
 	 */
 	prefetch(efx_rx_buf_va(rx_buf));
 
+	rx_buf->page_offset += efx->type->rx_buffer_hash_size;
+	rx_buf->len = len - efx->type->rx_buffer_hash_size;
+
 	/* Pipeline receives so that we give time for packet headers to be
 	 * prefetched into cache.
 	 */
-	rx_buf->len = len - efx->type->rx_buffer_hash_size;
 out:
 	efx_rx_flush_packet(channel);
 	channel->rx_pkt = rx_buf;
@@ -497,7 +491,7 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
 void __efx_rx_packet(struct efx_channel *channel, struct efx_rx_buffer *rx_buf)
 {
 	struct efx_nic *efx = channel->efx;
-	u8 *eh = efx_rx_buf_va(rx_buf) + efx->type->rx_buffer_hash_size;
+	u8 *eh = efx_rx_buf_va(rx_buf);
 
 	/* If we're in loopback test, then pass the packet directly to the
 	 * loopback layer, and free the rx_buf here
-- 
cgit v1.2.3


From 85740cdf0b84224a9fce62dc9150008ef8d6ab4e Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 29 Jan 2013 23:33:15 +0000
Subject: sfc: Enable RX DMA scattering where possible

Enable RX DMA scattering iff an RX buffer large enough for the current
MTU will not fit into a single page and the NIC supports DMA
scattering for kernel-mode RX queues.

On Falcon and Siena, the RX_USR_BUF_SIZE field is used as the DMA
limit for both all RX queues with scatter enabled.  Set it to 1824,
matching what Onload uses now.

Maintain a statistic for frames truncated due to lack of descriptors
(rx_nodesc_trunc).  This is distinct from rx_frm_trunc which may be
incremented when scattering is disabled and implies an over-length
frame.

Whenever an MTU change causes scattering to be turned on or off,
update filters that point to the PF queues, but leave others
unchanged, as VF drivers assume scattering is off.

Add n_frags parameters to various functions, and make them iterate:
- efx_rx_packet()
- efx_recycle_rx_buffers()
- efx_rx_mk_skb()
- efx_rx_deliver()

Make efx_handle_rx_event() responsible for updating
efx_rx_queue::removed_count.

Change the RX pipeline state to a starting ring index and number of
fragments, and make __efx_rx_packet() responsible for clearing it.

Based on earlier versions by David Riddoch and Jon Cooper.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/ethernet/sfc/rx.c | 211 +++++++++++++++++++++++++++---------------
 1 file changed, 138 insertions(+), 73 deletions(-)

(limited to 'drivers/net/ethernet/sfc/rx.c')

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 04518722ac1d..88aa1ff01e3f 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -39,13 +39,17 @@
  */
 static unsigned int rx_refill_threshold;
 
+/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
+#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
+				      EFX_RX_USR_BUF_SIZE)
+
 /*
  * RX maximum head room required.
  *
- * This must be at least 1 to prevent overflow and at least 2 to allow
- * pipelined receives.
+ * This must be at least 1 to prevent overflow, plus one packet-worth
+ * to allow pipelined receives.
  */
-#define EFX_RXD_HEAD_ROOM 2
+#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
 
 static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
 {
@@ -66,6 +70,15 @@ static inline u32 efx_rx_buf_hash(const u8 *eh)
 #endif
 }
 
+static inline struct efx_rx_buffer *
+efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf)
+{
+	if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask)))
+		return efx_rx_buffer(rx_queue, 0);
+	else
+		return rx_buf + 1;
+}
+
 /**
  * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
  *
@@ -199,28 +212,34 @@ static void efx_resurrect_rx_buffer(struct efx_rx_queue *rx_queue,
 	++rx_queue->added_count;
 }
 
-/* Recycle the given rx buffer directly back into the rx_queue. There is
- * always room to add this buffer, because we've just popped a buffer. */
-static void efx_recycle_rx_buffer(struct efx_channel *channel,
-				  struct efx_rx_buffer *rx_buf)
+/* Recycle buffers directly back into the rx_queue. There is always
+ * room to add these buffer, because we've just popped them.
+ */
+static void efx_recycle_rx_buffers(struct efx_channel *channel,
+				   struct efx_rx_buffer *rx_buf,
+				   unsigned int n_frags)
 {
 	struct efx_nic *efx = channel->efx;
 	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
 	struct efx_rx_buffer *new_buf;
 	unsigned index;
 
-	rx_buf->flags = 0;
+	do {
+		rx_buf->flags = 0;
 
-	if (efx->rx_dma_len <= EFX_RX_HALF_PAGE &&
-	    page_count(rx_buf->page) == 1)
-		efx_resurrect_rx_buffer(rx_queue, rx_buf);
+		if (efx->rx_dma_len <= EFX_RX_HALF_PAGE &&
+		    page_count(rx_buf->page) == 1)
+			efx_resurrect_rx_buffer(rx_queue, rx_buf);
 
-	index = rx_queue->added_count & rx_queue->ptr_mask;
-	new_buf = efx_rx_buffer(rx_queue, index);
+		index = rx_queue->added_count & rx_queue->ptr_mask;
+		new_buf = efx_rx_buffer(rx_queue, index);
 
-	memcpy(new_buf, rx_buf, sizeof(*new_buf));
-	rx_buf->page = NULL;
-	++rx_queue->added_count;
+		memcpy(new_buf, rx_buf, sizeof(*new_buf));
+		rx_buf->page = NULL;
+		++rx_queue->added_count;
+
+		rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
+	} while (--n_frags);
 }
 
 /**
@@ -328,46 +347,56 @@ static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
 /* Pass a received packet up through GRO.  GRO can handle pages
  * regardless of checksum state and skbs with a good checksum.
  */
-static void efx_rx_packet_gro(struct efx_channel *channel,
-			      struct efx_rx_buffer *rx_buf,
-			      const u8 *eh)
+static void
+efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
+		  unsigned int n_frags, u8 *eh)
 {
 	struct napi_struct *napi = &channel->napi_str;
 	gro_result_t gro_result;
 	struct efx_nic *efx = channel->efx;
-	struct page *page = rx_buf->page;
 	struct sk_buff *skb;
 
-	rx_buf->page = NULL;
-
 	skb = napi_get_frags(napi);
-	if (!skb) {
-		put_page(page);
+	if (unlikely(!skb)) {
+		while (n_frags--) {
+			put_page(rx_buf->page);
+			rx_buf->page = NULL;
+			rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
+		}
 		return;
 	}
 
 	if (efx->net_dev->features & NETIF_F_RXHASH)
 		skb->rxhash = efx_rx_buf_hash(eh);
-
-	skb_fill_page_desc(skb, 0, page, rx_buf->page_offset, rx_buf->len);
-
-	skb->len = rx_buf->len;
-	skb->data_len = rx_buf->len;
-	skb->truesize += rx_buf->len;
 	skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
 			  CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
 
-	skb_record_rx_queue(skb, channel->rx_queue.core_index);
+	for (;;) {
+		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
+				   rx_buf->page, rx_buf->page_offset,
+				   rx_buf->len);
+		rx_buf->page = NULL;
+		skb->len += rx_buf->len;
+		if (skb_shinfo(skb)->nr_frags == n_frags)
+			break;
 
-		gro_result = napi_gro_frags(napi);
+		rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
+	}
+
+	skb->data_len = skb->len;
+	skb->truesize += n_frags * efx->rx_buffer_truesize;
+
+	skb_record_rx_queue(skb, channel->rx_queue.core_index);
 
+	gro_result = napi_gro_frags(napi);
 	if (gro_result != GRO_DROP)
 		channel->irq_mod_score += 2;
 }
 
-/* Allocate and construct an SKB around a struct page.*/
+/* Allocate and construct an SKB around page fragments */
 static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
 				     struct efx_rx_buffer *rx_buf,
+				     unsigned int n_frags,
 				     u8 *eh, int hdr_len)
 {
 	struct efx_nic *efx = channel->efx;
@@ -381,25 +410,32 @@ static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
 	EFX_BUG_ON_PARANOID(rx_buf->len < hdr_len);
 
 	skb_reserve(skb, EFX_PAGE_SKB_ALIGN);
+	memcpy(__skb_put(skb, hdr_len), eh, hdr_len);
 
-	skb->len = rx_buf->len;
-	skb->truesize = rx_buf->len + sizeof(struct sk_buff);
-	memcpy(skb->data, eh, hdr_len);
-	skb->tail += hdr_len;
-
-	/* Append the remaining page onto the frag list */
+	/* Append the remaining page(s) onto the frag list */
 	if (rx_buf->len > hdr_len) {
-		skb->data_len = skb->len - hdr_len;
-		skb_fill_page_desc(skb, 0, rx_buf->page,
-				   rx_buf->page_offset + hdr_len,
-				   skb->data_len);
+		rx_buf->page_offset += hdr_len;
+		rx_buf->len -= hdr_len;
+
+		for (;;) {
+			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
+					   rx_buf->page, rx_buf->page_offset,
+					   rx_buf->len);
+			rx_buf->page = NULL;
+			skb->len += rx_buf->len;
+			skb->data_len += rx_buf->len;
+			if (skb_shinfo(skb)->nr_frags == n_frags)
+				break;
+
+			rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
+		}
 	} else {
 		__free_pages(rx_buf->page, efx->rx_buffer_order);
-		skb->data_len = 0;
+		rx_buf->page = NULL;
+		n_frags = 0;
 	}
 
-	/* Ownership has transferred from the rx_buf to skb */
-	rx_buf->page = NULL;
+	skb->truesize += n_frags * efx->rx_buffer_truesize;
 
 	/* Move past the ethernet header */
 	skb->protocol = eth_type_trans(skb, efx->net_dev);
@@ -408,7 +444,7 @@ static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
 }
 
 void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
-		   unsigned int len, u16 flags)
+		   unsigned int n_frags, unsigned int len, u16 flags)
 {
 	struct efx_nic *efx = rx_queue->efx;
 	struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
@@ -417,35 +453,43 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
 	rx_buf = efx_rx_buffer(rx_queue, index);
 	rx_buf->flags |= flags;
 
-	/* This allows the refill path to post another buffer.
-	 * EFX_RXD_HEAD_ROOM ensures that the slot we are using
-	 * isn't overwritten yet.
-	 */
-	rx_queue->removed_count++;
-
-	/* Validate the length encoded in the event vs the descriptor pushed */
-	efx_rx_packet__check_len(rx_queue, rx_buf, len);
+	/* Validate the number of fragments and completed length */
+	if (n_frags == 1) {
+		efx_rx_packet__check_len(rx_queue, rx_buf, len);
+	} else if (unlikely(n_frags > EFX_RX_MAX_FRAGS) ||
+		   unlikely(len <= (n_frags - 1) * EFX_RX_USR_BUF_SIZE) ||
+		   unlikely(len > n_frags * EFX_RX_USR_BUF_SIZE) ||
+		   unlikely(!efx->rx_scatter)) {
+		/* If this isn't an explicit discard request, either
+		 * the hardware or the driver is broken.
+		 */
+		WARN_ON(!(len == 0 && rx_buf->flags & EFX_RX_PKT_DISCARD));
+		rx_buf->flags |= EFX_RX_PKT_DISCARD;
+	}
 
 	netif_vdbg(efx, rx_status, efx->net_dev,
-		   "RX queue %d received id %x at %llx+%x %s%s\n",
+		   "RX queue %d received ids %x-%x len %d %s%s\n",
 		   efx_rx_queue_index(rx_queue), index,
-		   (unsigned long long)rx_buf->dma_addr, len,
+		   (index + n_frags - 1) & rx_queue->ptr_mask, len,
 		   (rx_buf->flags & EFX_RX_PKT_CSUMMED) ? " [SUMMED]" : "",
 		   (rx_buf->flags & EFX_RX_PKT_DISCARD) ? " [DISCARD]" : "");
 
-	/* Discard packet, if instructed to do so */
+	/* Discard packet, if instructed to do so.  Process the
+	 * previous receive first.
+	 */
 	if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) {
-		efx_recycle_rx_buffer(channel, rx_buf);
-
-		/* Don't hold off the previous receive */
-		rx_buf = NULL;
-		goto out;
+		efx_rx_flush_packet(channel);
+		efx_recycle_rx_buffers(channel, rx_buf, n_frags);
+		return;
 	}
 
+	if (n_frags == 1)
+		rx_buf->len = len;
+
 	/* Release and/or sync DMA mapping - assumes all RX buffers
 	 * consumed in-order per RX queue
 	 */
-	efx_unmap_rx_buffer(efx, rx_buf, len);
+	efx_unmap_rx_buffer(efx, rx_buf, rx_buf->len);
 
 	/* Prefetch nice and early so data will (hopefully) be in cache by
 	 * the time we look at it.
@@ -453,23 +497,40 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
 	prefetch(efx_rx_buf_va(rx_buf));
 
 	rx_buf->page_offset += efx->type->rx_buffer_hash_size;
-	rx_buf->len = len - efx->type->rx_buffer_hash_size;
+	rx_buf->len -= efx->type->rx_buffer_hash_size;
+
+	if (n_frags > 1) {
+		/* Release/sync DMA mapping for additional fragments.
+		 * Fix length for last fragment.
+		 */
+		unsigned int tail_frags = n_frags - 1;
+
+		for (;;) {
+			rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
+			if (--tail_frags == 0)
+				break;
+			efx_unmap_rx_buffer(efx, rx_buf, EFX_RX_USR_BUF_SIZE);
+		}
+		rx_buf->len = len - (n_frags - 1) * EFX_RX_USR_BUF_SIZE;
+		efx_unmap_rx_buffer(efx, rx_buf, rx_buf->len);
+	}
 
 	/* Pipeline receives so that we give time for packet headers to be
 	 * prefetched into cache.
 	 */
-out:
 	efx_rx_flush_packet(channel);
-	channel->rx_pkt = rx_buf;
+	channel->rx_pkt_n_frags = n_frags;
+	channel->rx_pkt_index = index;
 }
 
 static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
-			   struct efx_rx_buffer *rx_buf)
+			   struct efx_rx_buffer *rx_buf,
+			   unsigned int n_frags)
 {
 	struct sk_buff *skb;
 	u16 hdr_len = min_t(u16, rx_buf->len, EFX_SKB_HEADERS);
 
-	skb = efx_rx_mk_skb(channel, rx_buf, eh, hdr_len);
+	skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len);
 	if (unlikely(skb == NULL)) {
 		efx_free_rx_buffer(channel->efx, rx_buf);
 		return;
@@ -488,9 +549,11 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
 }
 
 /* Handle a received packet.  Second half: Touches packet payload. */
-void __efx_rx_packet(struct efx_channel *channel, struct efx_rx_buffer *rx_buf)
+void __efx_rx_packet(struct efx_channel *channel)
 {
 	struct efx_nic *efx = channel->efx;
+	struct efx_rx_buffer *rx_buf =
+		efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index);
 	u8 *eh = efx_rx_buf_va(rx_buf);
 
 	/* If we're in loopback test, then pass the packet directly to the
@@ -499,16 +562,18 @@ void __efx_rx_packet(struct efx_channel *channel, struct efx_rx_buffer *rx_buf)
 	if (unlikely(efx->loopback_selftest)) {
 		efx_loopback_rx_packet(efx, eh, rx_buf->len);
 		efx_free_rx_buffer(efx, rx_buf);
-		return;
+		goto out;
 	}
 
 	if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
 		rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;
 
 	if (!channel->type->receive_skb)
-		efx_rx_packet_gro(channel, rx_buf, eh);
+		efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh);
 	else
-		efx_rx_deliver(channel, eh, rx_buf);
+		efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags);
+out:
+	channel->rx_pkt_n_frags = 0;
 }
 
 int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
-- 
cgit v1.2.3


From 2768935a46603bb9bdd121864b1f2b2e8a71cccc Mon Sep 17 00:00:00 2001
From: Daniel Pieczko <dpieczko@solarflare.com>
Date: Wed, 13 Feb 2013 10:54:41 +0000
Subject: sfc: reuse pages to avoid DMA mapping/unmapping costs

On POWER systems, DMA mapping/unmapping operations are very expensive.
These changes reduce these costs by trying to reuse DMA mapped pages.

After all the buffers associated with a page have been processed and
passed up, the page is placed into a ring (if there is room).  For
each page that is required for a refill operation, a page in the ring
is examined to determine if its page count has fallen to 1, ie. the
kernel has released its reference to these packets.  If this is the
case, the page can be immediately added back into the RX descriptor
ring, without having to re-map it for DMA.

If the kernel is still holding a reference to this page, it is removed
from the ring and unmapped for DMA.  Then a new page, which can
immediately be used by RX buffers in the descriptor ring, is allocated
and DMA mapped.

The time a page needs to spend in the recycle ring before the kernel
has released its page references is based on the number of buffers
that use this page.  As large pages can hold more RX buffers, the RX
recycle ring can be shorter.  This reduces memory usage on POWER
systems, while maintaining the performance gain achieved by recycling
pages, following the driver change to pack more than two RX buffers
into large pages.

When an IOMMU is not present, the recycle ring can be small to reduce
memory usage, since DMA mapping operations are inexpensive.

With a small recycle ring, attempting to refill the descriptor queue
with more buffers than the equivalent size of the recycle ring could
ultimately lead to memory leaks if page entries in the recycle ring
were overwritten.  To prevent this, the check to see if the recycle
ring is full is changed to check if the next entry to be written is
NULL.

[bwh: Combine and rebase several commits so this is complete
 before the following buffer-packing changes.  Remove module
 parameter.]
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/ethernet/sfc/rx.c | 299 +++++++++++++++++++++++++++++-------------
 1 file changed, 205 insertions(+), 94 deletions(-)

(limited to 'drivers/net/ethernet/sfc/rx.c')

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 88aa1ff01e3f..eea56f3ec81c 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -16,6 +16,7 @@
 #include <linux/udp.h>
 #include <linux/prefetch.h>
 #include <linux/moduleparam.h>
+#include <linux/iommu.h>
 #include <net/ip.h>
 #include <net/checksum.h>
 #include "net_driver.h"
@@ -27,6 +28,13 @@
 /* Number of RX descriptors pushed at once. */
 #define EFX_RX_BATCH  8
 
+/* Number of RX buffers to recycle pages for.  When creating the RX page recycle
+ * ring, this number is divided by the number of buffers per page to calculate
+ * the number of pages to store in the RX page recycle ring.
+ */
+#define EFX_RECYCLE_RING_SIZE_IOMMU 4096
+#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_BATCH)
+
 /* Maximum length for an RX descriptor sharing a page */
 #define EFX_RX_HALF_PAGE ((PAGE_SIZE >> 1) - sizeof(struct efx_rx_page_state) \
 			  - EFX_PAGE_IP_ALIGN)
@@ -79,6 +87,56 @@ efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf)
 		return rx_buf + 1;
 }
 
+static inline void efx_sync_rx_buffer(struct efx_nic *efx,
+				      struct efx_rx_buffer *rx_buf,
+				      unsigned int len)
+{
+	dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, len,
+				DMA_FROM_DEVICE);
+}
+
+/* Return true if this is the last RX buffer using a page. */
+static inline bool efx_rx_is_last_buffer(struct efx_nic *efx,
+					 struct efx_rx_buffer *rx_buf)
+{
+	return (rx_buf->page_offset >= (PAGE_SIZE >> 1) ||
+		efx->rx_dma_len > EFX_RX_HALF_PAGE);
+}
+
+/* Check the RX page recycle ring for a page that can be reused. */
+static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
+{
+	struct efx_nic *efx = rx_queue->efx;
+	struct page *page;
+	struct efx_rx_page_state *state;
+	unsigned index;
+
+	index = rx_queue->page_remove & rx_queue->page_ptr_mask;
+	page = rx_queue->page_ring[index];
+	if (page == NULL)
+		return NULL;
+
+	rx_queue->page_ring[index] = NULL;
+	/* page_remove cannot exceed page_add. */
+	if (rx_queue->page_remove != rx_queue->page_add)
+		++rx_queue->page_remove;
+
+	/* If page_count is 1 then we hold the only reference to this page. */
+	if (page_count(page) == 1) {
+		++rx_queue->page_recycle_count;
+		return page;
+	} else {
+		state = page_address(page);
+		dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
+			       PAGE_SIZE << efx->rx_buffer_order,
+			       DMA_FROM_DEVICE);
+		put_page(page);
+		++rx_queue->page_recycle_failed;
+	}
+
+	return NULL;
+}
+
 /**
  * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
  *
@@ -103,20 +161,28 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue)
 	BUILD_BUG_ON(EFX_RX_BATCH & 1);
 
 	for (count = 0; count < EFX_RX_BATCH; ++count) {
-		page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC,
-				   efx->rx_buffer_order);
-		if (unlikely(page == NULL))
-			return -ENOMEM;
-		dma_addr = dma_map_page(&efx->pci_dev->dev, page, 0,
-					PAGE_SIZE << efx->rx_buffer_order,
-					DMA_FROM_DEVICE);
-		if (unlikely(dma_mapping_error(&efx->pci_dev->dev, dma_addr))) {
-			__free_pages(page, efx->rx_buffer_order);
-			return -EIO;
+		page = efx_reuse_page(rx_queue);
+		if (page == NULL) {
+			page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC,
+					   efx->rx_buffer_order);
+			if (unlikely(page == NULL))
+				return -ENOMEM;
+			dma_addr =
+				dma_map_page(&efx->pci_dev->dev, page, 0,
+					     PAGE_SIZE << efx->rx_buffer_order,
+					     DMA_FROM_DEVICE);
+			if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
+						       dma_addr))) {
+				__free_pages(page, efx->rx_buffer_order);
+				return -EIO;
+			}
+			state = page_address(page);
+			state->dma_addr = dma_addr;
+		} else {
+			state = page_address(page);
+			dma_addr = state->dma_addr;
 		}
-		state = page_address(page);
-		state->refcnt = 0;
-		state->dma_addr = dma_addr;
+		get_page(page);
 
 		dma_addr += sizeof(struct efx_rx_page_state);
 		page_offset = sizeof(struct efx_rx_page_state);
@@ -128,9 +194,7 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue)
 		rx_buf->page = page;
 		rx_buf->page_offset = page_offset + EFX_PAGE_IP_ALIGN;
 		rx_buf->len = efx->rx_dma_len;
-		rx_buf->flags = 0;
 		++rx_queue->added_count;
-		++state->refcnt;
 
 		if ((~count & 1) && (efx->rx_dma_len <= EFX_RX_HALF_PAGE)) {
 			/* Use the second half of the page */
@@ -145,99 +209,91 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue)
 	return 0;
 }
 
+/* Unmap a DMA-mapped page.  This function is only called for the final RX
+ * buffer in a page.
+ */
 static void efx_unmap_rx_buffer(struct efx_nic *efx,
-				struct efx_rx_buffer *rx_buf,
-				unsigned int used_len)
+				struct efx_rx_buffer *rx_buf)
 {
-	if (rx_buf->page) {
-		struct efx_rx_page_state *state;
-
-		state = page_address(rx_buf->page);
-		if (--state->refcnt == 0) {
-			dma_unmap_page(&efx->pci_dev->dev,
-				       state->dma_addr,
-				       PAGE_SIZE << efx->rx_buffer_order,
-				       DMA_FROM_DEVICE);
-		} else if (used_len) {
-			dma_sync_single_for_cpu(&efx->pci_dev->dev,
-						rx_buf->dma_addr, used_len,
-						DMA_FROM_DEVICE);
-		}
+	struct page *page = rx_buf->page;
+
+	if (page) {
+		struct efx_rx_page_state *state = page_address(page);
+		dma_unmap_page(&efx->pci_dev->dev,
+			       state->dma_addr,
+			       PAGE_SIZE << efx->rx_buffer_order,
+			       DMA_FROM_DEVICE);
 	}
 }
 
-static void efx_free_rx_buffer(struct efx_nic *efx,
-			       struct efx_rx_buffer *rx_buf)
+static void efx_free_rx_buffer(struct efx_rx_buffer *rx_buf)
 {
 	if (rx_buf->page) {
-		__free_pages(rx_buf->page, efx->rx_buffer_order);
+		put_page(rx_buf->page);
 		rx_buf->page = NULL;
 	}
 }
 
-static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
-			       struct efx_rx_buffer *rx_buf)
+/* Attempt to recycle the page if there is an RX recycle ring; the page can
+ * only be added if this is the final RX buffer, to prevent pages being used in
+ * the descriptor ring and appearing in the recycle ring simultaneously.
+ */
+static void efx_recycle_rx_page(struct efx_channel *channel,
+				struct efx_rx_buffer *rx_buf)
 {
-	efx_unmap_rx_buffer(rx_queue->efx, rx_buf, 0);
-	efx_free_rx_buffer(rx_queue->efx, rx_buf);
-}
+	struct page *page = rx_buf->page;
+	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
+	struct efx_nic *efx = rx_queue->efx;
+	unsigned index;
 
-/* Attempt to resurrect the other receive buffer that used to share this page,
- * which had previously been passed up to the kernel and freed. */
-static void efx_resurrect_rx_buffer(struct efx_rx_queue *rx_queue,
-				    struct efx_rx_buffer *rx_buf)
-{
-	struct efx_rx_page_state *state = page_address(rx_buf->page);
-	struct efx_rx_buffer *new_buf;
-	unsigned fill_level, index;
-
-	/* +1 because efx_rx_packet() incremented removed_count. +1 because
-	 * we'd like to insert an additional descriptor whilst leaving
-	 * EFX_RXD_HEAD_ROOM for the non-recycle path */
-	fill_level = (rx_queue->added_count - rx_queue->removed_count + 2);
-	if (unlikely(fill_level > rx_queue->max_fill)) {
-		/* We could place "state" on a list, and drain the list in
-		 * efx_fast_push_rx_descriptors(). For now, this will do. */
+	/* Only recycle the page after processing the final buffer. */
+	if (!efx_rx_is_last_buffer(efx, rx_buf))
 		return;
-	}
 
-	++state->refcnt;
-	get_page(rx_buf->page);
+	index = rx_queue->page_add & rx_queue->page_ptr_mask;
+	if (rx_queue->page_ring[index] == NULL) {
+		unsigned read_index = rx_queue->page_remove &
+			rx_queue->page_ptr_mask;
 
-	index = rx_queue->added_count & rx_queue->ptr_mask;
-	new_buf = efx_rx_buffer(rx_queue, index);
-	new_buf->dma_addr = rx_buf->dma_addr ^ (PAGE_SIZE >> 1);
-	new_buf->page = rx_buf->page;
-	new_buf->len = rx_buf->len;
-	++rx_queue->added_count;
+		/* The next slot in the recycle ring is available, but
+		 * increment page_remove if the read pointer currently
+		 * points here.
+		 */
+		if (read_index == index)
+			++rx_queue->page_remove;
+		rx_queue->page_ring[index] = page;
+		++rx_queue->page_add;
+		return;
+	}
+	++rx_queue->page_recycle_full;
+	efx_unmap_rx_buffer(efx, rx_buf);
+	put_page(rx_buf->page);
 }
 
-/* Recycle buffers directly back into the rx_queue. There is always
- * room to add these buffer, because we've just popped them.
- */
+static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
+			       struct efx_rx_buffer *rx_buf)
+{
+	/* Release the page reference we hold for the buffer. */
+	if (rx_buf->page)
+		put_page(rx_buf->page);
+
+	/* If this is the last buffer in a page, unmap and free it. */
+	if (efx_rx_is_last_buffer(rx_queue->efx, rx_buf)) {
+		efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
+		efx_free_rx_buffer(rx_buf);
+	}
+	rx_buf->page = NULL;
+}
+
+/* Recycle the pages that are used by buffers that have just been received. */
 static void efx_recycle_rx_buffers(struct efx_channel *channel,
 				   struct efx_rx_buffer *rx_buf,
 				   unsigned int n_frags)
 {
-	struct efx_nic *efx = channel->efx;
 	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
-	struct efx_rx_buffer *new_buf;
-	unsigned index;
 
 	do {
-		rx_buf->flags = 0;
-
-		if (efx->rx_dma_len <= EFX_RX_HALF_PAGE &&
-		    page_count(rx_buf->page) == 1)
-			efx_resurrect_rx_buffer(rx_queue, rx_buf);
-
-		index = rx_queue->added_count & rx_queue->ptr_mask;
-		new_buf = efx_rx_buffer(rx_queue, index);
-
-		memcpy(new_buf, rx_buf, sizeof(*new_buf));
-		rx_buf->page = NULL;
-		++rx_queue->added_count;
-
+		efx_recycle_rx_page(channel, rx_buf);
 		rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
 	} while (--n_frags);
 }
@@ -451,7 +507,7 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
 	struct efx_rx_buffer *rx_buf;
 
 	rx_buf = efx_rx_buffer(rx_queue, index);
-	rx_buf->flags |= flags;
+	rx_buf->flags = flags;
 
 	/* Validate the number of fragments and completed length */
 	if (n_frags == 1) {
@@ -479,6 +535,7 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
 	 */
 	if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) {
 		efx_rx_flush_packet(channel);
+		put_page(rx_buf->page);
 		efx_recycle_rx_buffers(channel, rx_buf, n_frags);
 		return;
 	}
@@ -486,10 +543,10 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
 	if (n_frags == 1)
 		rx_buf->len = len;
 
-	/* Release and/or sync DMA mapping - assumes all RX buffers
-	 * consumed in-order per RX queue
+	/* Release and/or sync the DMA mapping - assumes all RX buffers
+	 * consumed in-order per RX queue.
 	 */
-	efx_unmap_rx_buffer(efx, rx_buf, rx_buf->len);
+	efx_sync_rx_buffer(efx, rx_buf, rx_buf->len);
 
 	/* Prefetch nice and early so data will (hopefully) be in cache by
 	 * the time we look at it.
@@ -509,12 +566,16 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
 			rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
 			if (--tail_frags == 0)
 				break;
-			efx_unmap_rx_buffer(efx, rx_buf, EFX_RX_USR_BUF_SIZE);
+			efx_sync_rx_buffer(efx, rx_buf, EFX_RX_USR_BUF_SIZE);
 		}
 		rx_buf->len = len - (n_frags - 1) * EFX_RX_USR_BUF_SIZE;
-		efx_unmap_rx_buffer(efx, rx_buf, rx_buf->len);
+		efx_sync_rx_buffer(efx, rx_buf, rx_buf->len);
 	}
 
+	/* All fragments have been DMA-synced, so recycle buffers and pages. */
+	rx_buf = efx_rx_buffer(rx_queue, index);
+	efx_recycle_rx_buffers(channel, rx_buf, n_frags);
+
 	/* Pipeline receives so that we give time for packet headers to be
 	 * prefetched into cache.
 	 */
@@ -532,7 +593,7 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
 
 	skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len);
 	if (unlikely(skb == NULL)) {
-		efx_free_rx_buffer(channel->efx, rx_buf);
+		efx_free_rx_buffer(rx_buf);
 		return;
 	}
 	skb_record_rx_queue(skb, channel->rx_queue.core_index);
@@ -561,7 +622,7 @@ void __efx_rx_packet(struct efx_channel *channel)
 	 */
 	if (unlikely(efx->loopback_selftest)) {
 		efx_loopback_rx_packet(efx, eh, rx_buf->len);
-		efx_free_rx_buffer(efx, rx_buf);
+		efx_free_rx_buffer(rx_buf);
 		goto out;
 	}
 
@@ -603,9 +664,32 @@ int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
 		kfree(rx_queue->buffer);
 		rx_queue->buffer = NULL;
 	}
+
 	return rc;
 }
 
+void efx_init_rx_recycle_ring(struct efx_nic *efx,
+			      struct efx_rx_queue *rx_queue)
+{
+	unsigned int bufs_in_recycle_ring, page_ring_size;
+
+	/* Set the RX recycle ring size */
+#ifdef CONFIG_PPC64
+	bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
+#else
+	if (efx->pci_dev->dev.iommu_group)
+		bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
+	else
+		bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU;
+#endif /* CONFIG_PPC64 */
+
+	page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
+					    efx->rx_bufs_per_page);
+	rx_queue->page_ring = kcalloc(page_ring_size,
+				      sizeof(*rx_queue->page_ring), GFP_KERNEL);
+	rx_queue->page_ptr_mask = page_ring_size - 1;
+}
+
 void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
 {
 	struct efx_nic *efx = rx_queue->efx;
@@ -619,6 +703,13 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
 	rx_queue->notified_count = 0;
 	rx_queue->removed_count = 0;
 	rx_queue->min_fill = -1U;
+	efx_init_rx_recycle_ring(efx, rx_queue);
+
+	rx_queue->page_remove = 0;
+	rx_queue->page_add = rx_queue->page_ptr_mask + 1;
+	rx_queue->page_recycle_count = 0;
+	rx_queue->page_recycle_failed = 0;
+	rx_queue->page_recycle_full = 0;
 
 	/* Initialise limit fields */
 	max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
@@ -642,6 +733,7 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
 void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
 {
 	int i;
+	struct efx_nic *efx = rx_queue->efx;
 	struct efx_rx_buffer *rx_buf;
 
 	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
@@ -653,13 +745,32 @@ void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
 	del_timer_sync(&rx_queue->slow_fill);
 	efx_nic_fini_rx(rx_queue);
 
-	/* Release RX buffers NB start at index 0 not current HW ptr */
+	/* Release RX buffers from the current read ptr to the write ptr */
 	if (rx_queue->buffer) {
-		for (i = 0; i <= rx_queue->ptr_mask; i++) {
-			rx_buf = efx_rx_buffer(rx_queue, i);
+		for (i = rx_queue->removed_count; i < rx_queue->added_count;
+		     i++) {
+			unsigned index = i & rx_queue->ptr_mask;
+			rx_buf = efx_rx_buffer(rx_queue, index);
 			efx_fini_rx_buffer(rx_queue, rx_buf);
 		}
 	}
+
+	/* Unmap and release the pages in the recycle ring. Remove the ring. */
+	for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
+		struct page *page = rx_queue->page_ring[i];
+		struct efx_rx_page_state *state;
+
+		if (page == NULL)
+			continue;
+
+		state = page_address(page);
+		dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
+			       PAGE_SIZE << efx->rx_buffer_order,
+			       DMA_FROM_DEVICE);
+		put_page(page);
+	}
+	kfree(rx_queue->page_ring);
+	rx_queue->page_ring = NULL;
 }
 
 void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
-- 
cgit v1.2.3


From 179ea7f039f68ae4247a340bfb59fd861e7def12 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 7 Mar 2013 16:31:17 +0000
Subject: sfc: Replace efx_rx_is_last_buffer() with a flag

This condition is brittle and we have lots of flags to spare.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/ethernet/sfc/rx.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'drivers/net/ethernet/sfc/rx.c')

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index eea56f3ec81c..4cc2ba48a912 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -95,14 +95,6 @@ static inline void efx_sync_rx_buffer(struct efx_nic *efx,
 				DMA_FROM_DEVICE);
 }
 
-/* Return true if this is the last RX buffer using a page. */
-static inline bool efx_rx_is_last_buffer(struct efx_nic *efx,
-					 struct efx_rx_buffer *rx_buf)
-{
-	return (rx_buf->page_offset >= (PAGE_SIZE >> 1) ||
-		efx->rx_dma_len > EFX_RX_HALF_PAGE);
-}
-
 /* Check the RX page recycle ring for a page that can be reused. */
 static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
 {
@@ -199,11 +191,14 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue)
 		if ((~count & 1) && (efx->rx_dma_len <= EFX_RX_HALF_PAGE)) {
 			/* Use the second half of the page */
 			get_page(page);
+			rx_buf->flags = 0;
 			dma_addr += (PAGE_SIZE >> 1);
 			page_offset += (PAGE_SIZE >> 1);
 			++count;
 			goto split;
 		}
+
+		rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE;
 	}
 
 	return 0;
@@ -247,7 +242,7 @@ static void efx_recycle_rx_page(struct efx_channel *channel,
 	unsigned index;
 
 	/* Only recycle the page after processing the final buffer. */
-	if (!efx_rx_is_last_buffer(efx, rx_buf))
+	if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE))
 		return;
 
 	index = rx_queue->page_add & rx_queue->page_ptr_mask;
@@ -278,7 +273,7 @@ static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
 		put_page(rx_buf->page);
 
 	/* If this is the last buffer in a page, unmap and free it. */
-	if (efx_rx_is_last_buffer(rx_queue->efx, rx_buf)) {
+	if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) {
 		efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
 		efx_free_rx_buffer(rx_buf);
 	}
@@ -507,7 +502,7 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
 	struct efx_rx_buffer *rx_buf;
 
 	rx_buf = efx_rx_buffer(rx_queue, index);
-	rx_buf->flags = flags;
+	rx_buf->flags |= flags;
 
 	/* Validate the number of fragments and completed length */
 	if (n_frags == 1) {
-- 
cgit v1.2.3


From 1648a23fa159e5c433aac06dc5e0d9db36146016 Mon Sep 17 00:00:00 2001
From: Daniel Pieczko <dpieczko@solarflare.com>
Date: Wed, 13 Feb 2013 10:54:41 +0000
Subject: sfc: allocate more RX buffers per page

Allocating 2 buffers per page is insanely inefficient when MTU is 1500
and PAGE_SIZE is 64K (as it usually is on POWER).  Allocate as many as
we can fit, and choose the refill batch size at run-time so that we
still always use a whole page at once.

[bwh: Fix loop condition to allow for compound pages; rebase]
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/ethernet/sfc/rx.c | 80 +++++++++++++++++++++++--------------------
 1 file changed, 42 insertions(+), 38 deletions(-)

(limited to 'drivers/net/ethernet/sfc/rx.c')

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 4cc2ba48a912..a948b36c1910 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -25,19 +25,15 @@
 #include "selftest.h"
 #include "workarounds.h"
 
-/* Number of RX descriptors pushed at once. */
-#define EFX_RX_BATCH  8
+/* Preferred number of descriptors to fill at once */
+#define EFX_RX_PREFERRED_BATCH 8U
 
 /* Number of RX buffers to recycle pages for.  When creating the RX page recycle
  * ring, this number is divided by the number of buffers per page to calculate
  * the number of pages to store in the RX page recycle ring.
  */
 #define EFX_RECYCLE_RING_SIZE_IOMMU 4096
-#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_BATCH)
-
-/* Maximum length for an RX descriptor sharing a page */
-#define EFX_RX_HALF_PAGE ((PAGE_SIZE >> 1) - sizeof(struct efx_rx_page_state) \
-			  - EFX_PAGE_IP_ALIGN)
+#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH)
 
 /* Size of buffer allocated for skb header area. */
 #define EFX_SKB_HEADERS  64u
@@ -95,6 +91,19 @@ static inline void efx_sync_rx_buffer(struct efx_nic *efx,
 				DMA_FROM_DEVICE);
 }
 
+void efx_rx_config_page_split(struct efx_nic *efx)
+{
+	efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + EFX_PAGE_IP_ALIGN,
+				      L1_CACHE_BYTES);
+	efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
+		((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
+		 efx->rx_page_buf_step);
+	efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
+		efx->rx_bufs_per_page;
+	efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
+					       efx->rx_bufs_per_page);
+}
+
 /* Check the RX page recycle ring for a page that can be reused. */
 static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
 {
@@ -134,10 +143,10 @@ static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
  *
  * @rx_queue:		Efx RX queue
  *
- * This allocates memory for EFX_RX_BATCH receive buffers, maps them for DMA,
- * and populates struct efx_rx_buffers for each one. Return a negative error
- * code or 0 on success. If a single page can be split between two buffers,
- * then the page will either be inserted fully, or not at at all.
+ * This allocates a batch of pages, maps them for DMA, and populates
+ * struct efx_rx_buffers for each one. Return a negative error code or
+ * 0 on success. If a single page can be used for multiple buffers,
+ * then the page will either be inserted fully, or not at all.
  */
 static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue)
 {
@@ -149,10 +158,8 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue)
 	dma_addr_t dma_addr;
 	unsigned index, count;
 
-	/* We can split a page between two buffers */
-	BUILD_BUG_ON(EFX_RX_BATCH & 1);
-
-	for (count = 0; count < EFX_RX_BATCH; ++count) {
+	count = 0;
+	do {
 		page = efx_reuse_page(rx_queue);
 		if (page == NULL) {
 			page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC,
@@ -174,32 +181,26 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue)
 			state = page_address(page);
 			dma_addr = state->dma_addr;
 		}
-		get_page(page);
 
 		dma_addr += sizeof(struct efx_rx_page_state);
 		page_offset = sizeof(struct efx_rx_page_state);
 
-	split:
-		index = rx_queue->added_count & rx_queue->ptr_mask;
-		rx_buf = efx_rx_buffer(rx_queue, index);
-		rx_buf->dma_addr = dma_addr + EFX_PAGE_IP_ALIGN;
-		rx_buf->page = page;
-		rx_buf->page_offset = page_offset + EFX_PAGE_IP_ALIGN;
-		rx_buf->len = efx->rx_dma_len;
-		++rx_queue->added_count;
-
-		if ((~count & 1) && (efx->rx_dma_len <= EFX_RX_HALF_PAGE)) {
-			/* Use the second half of the page */
-			get_page(page);
+		do {
+			index = rx_queue->added_count & rx_queue->ptr_mask;
+			rx_buf = efx_rx_buffer(rx_queue, index);
+			rx_buf->dma_addr = dma_addr + EFX_PAGE_IP_ALIGN;
+			rx_buf->page = page;
+			rx_buf->page_offset = page_offset + EFX_PAGE_IP_ALIGN;
+			rx_buf->len = efx->rx_dma_len;
 			rx_buf->flags = 0;
-			dma_addr += (PAGE_SIZE >> 1);
-			page_offset += (PAGE_SIZE >> 1);
-			++count;
-			goto split;
-		}
+			++rx_queue->added_count;
+			get_page(page);
+			dma_addr += efx->rx_page_buf_step;
+			page_offset += efx->rx_page_buf_step;
+		} while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE);
 
 		rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE;
-	}
+	} while (++count < efx->rx_pages_per_batch);
 
 	return 0;
 }
@@ -307,7 +308,8 @@ static void efx_recycle_rx_buffers(struct efx_channel *channel,
  */
 void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue)
 {
-	unsigned fill_level;
+	struct efx_nic *efx = rx_queue->efx;
+	unsigned int fill_level, batch_size;
 	int space, rc = 0;
 
 	/* Calculate current fill level, and exit if we don't need to fill */
@@ -322,8 +324,9 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue)
 			rx_queue->min_fill = fill_level;
 	}
 
+	batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
 	space = rx_queue->max_fill - fill_level;
-	EFX_BUG_ON_PARANOID(space < EFX_RX_BATCH);
+	EFX_BUG_ON_PARANOID(space < batch_size);
 
 	netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
 		   "RX queue %d fast-filling descriptor ring from"
@@ -340,7 +343,7 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue)
 				efx_schedule_slow_fill(rx_queue);
 			goto out;
 		}
-	} while ((space -= EFX_RX_BATCH) >= EFX_RX_BATCH);
+	} while ((space -= batch_size) >= batch_size);
 
 	netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
 		   "RX queue %d fast-filled descriptor ring "
@@ -708,7 +711,8 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
 
 	/* Initialise limit fields */
 	max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
-	max_trigger = max_fill - EFX_RX_BATCH;
+	max_trigger =
+		max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page;
 	if (rx_refill_threshold != 0) {
 		trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
 		if (trigger > max_trigger)
-- 
cgit v1.2.3


From debd0034de1f68585ab5d1d3d693d38f19d48e5d Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Sat, 16 Mar 2013 06:57:51 +0000
Subject: sfc: make local functions static

Trivial sparse detected functions that should be static.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/rx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/net/ethernet/sfc/rx.c')

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index a948b36c1910..e73e30bac10e 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -666,8 +666,8 @@ int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
 	return rc;
 }
 
-void efx_init_rx_recycle_ring(struct efx_nic *efx,
-			      struct efx_rx_queue *rx_queue)
+static void efx_init_rx_recycle_ring(struct efx_nic *efx,
+				     struct efx_rx_queue *rx_queue)
 {
 	unsigned int bufs_in_recycle_ring, page_ring_size;
 
-- 
cgit v1.2.3