From 1a37e412a0225fcba5587f24c0dfc7636efc8b69 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 23 May 2013 21:02:51 +0000 Subject: net: Use 16bits for *_headers fields of struct skbuff In order to mitigate ongoing incresase in the size of struct skbuff use 16 bit integer offsets rather than pointers for inner_*_headers. This appears to reduce the size of struct skbuff from 0xd0 to 0xc0 bytes on x86_64 with the following all unset. CONFIG_XFRM CONFIG_NF_CONNTRACK CONFIG_NF_CONNTRACK_MODULE NET_SKBUFF_NF_DEFRAG_NEEDED CONFIG_BRIDGE_NETFILTER CONFIG_NET_SCHED CONFIG_IPV6_NDISC_NODETYPE CONFIG_NET_DMA CONFIG_NETWORK_SECMARK Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/skbuff.h | 119 +++---------------------------------------------- 1 file changed, 6 insertions(+), 113 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2e0ced1af3b1..5663e3592784 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -509,12 +509,12 @@ struct sk_buff { __u32 reserved_tailroom; }; - sk_buff_data_t inner_transport_header; - sk_buff_data_t inner_network_header; - sk_buff_data_t inner_mac_header; - sk_buff_data_t transport_header; - sk_buff_data_t network_header; - sk_buff_data_t mac_header; + __u16 inner_transport_header; + __u16 inner_network_header; + __u16 inner_mac_header; + __u16 transport_header; + __u16 network_header; + __u16 mac_header; /* These elements must be at the end, see alloc_skb() for details. */ sk_buff_data_t tail; sk_buff_data_t end; @@ -1527,7 +1527,6 @@ static inline void skb_reset_mac_len(struct sk_buff *skb) skb->mac_len = skb->network_header - skb->mac_header; } -#ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_inner_transport_header(const struct sk_buff *skb) { @@ -1638,112 +1637,6 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) skb->mac_header += offset; } -#else /* NET_SKBUFF_DATA_USES_OFFSET */ -static inline unsigned char *skb_inner_transport_header(const struct sk_buff - *skb) -{ - return skb->inner_transport_header; -} - -static inline void skb_reset_inner_transport_header(struct sk_buff *skb) -{ - skb->inner_transport_header = skb->data; -} - -static inline void skb_set_inner_transport_header(struct sk_buff *skb, - const int offset) -{ - skb->inner_transport_header = skb->data + offset; -} - -static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb) -{ - return skb->inner_network_header; -} - -static inline void skb_reset_inner_network_header(struct sk_buff *skb) -{ - skb->inner_network_header = skb->data; -} - -static inline void skb_set_inner_network_header(struct sk_buff *skb, - const int offset) -{ - skb->inner_network_header = skb->data + offset; -} - -static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) -{ - return skb->inner_mac_header; -} - -static inline void skb_reset_inner_mac_header(struct sk_buff *skb) -{ - skb->inner_mac_header = skb->data; -} - -static inline void skb_set_inner_mac_header(struct sk_buff *skb, - const int offset) -{ - skb->inner_mac_header = skb->data + offset; -} -static inline bool skb_transport_header_was_set(const struct sk_buff *skb) -{ - return skb->transport_header != NULL; -} - -static inline unsigned char *skb_transport_header(const struct sk_buff *skb) -{ - return skb->transport_header; -} - -static inline void skb_reset_transport_header(struct sk_buff *skb) -{ - skb->transport_header = skb->data; -} - -static inline void skb_set_transport_header(struct sk_buff *skb, - const int offset) -{ - skb->transport_header = skb->data + offset; -} - -static inline unsigned char *skb_network_header(const struct sk_buff *skb) -{ - return skb->network_header; -} - -static inline void skb_reset_network_header(struct sk_buff *skb) -{ - skb->network_header = skb->data; -} - -static inline void skb_set_network_header(struct sk_buff *skb, const int offset) -{ - skb->network_header = skb->data + offset; -} - -static inline unsigned char *skb_mac_header(const struct sk_buff *skb) -{ - return skb->mac_header; -} - -static inline int skb_mac_header_was_set(const struct sk_buff *skb) -{ - return skb->mac_header != NULL; -} - -static inline void skb_reset_mac_header(struct sk_buff *skb) -{ - skb->mac_header = skb->data; -} - -static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) -{ - skb->mac_header = skb->data + offset; -} -#endif /* NET_SKBUFF_DATA_USES_OFFSET */ - static inline void skb_probe_transport_header(struct sk_buff *skb, const int offset_hint) { -- cgit v1.2.3 From 0d89d2035fe063461a5ddb609b2c12e7fb006e44 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 23 May 2013 21:02:52 +0000 Subject: MPLS: Add limited GSO support In the case where a non-MPLS packet is received and an MPLS stack is added it may well be the case that the original skb is GSO but the NIC used for transmit does not support GSO of MPLS packets. The aim of this code is to provide GSO in software for MPLS packets whose skbs are GSO. SKB Usage: When an implementation adds an MPLS stack to a non-MPLS packet it should do the following to skb metadata: * Set skb->inner_protocol to the old non-MPLS ethertype of the packet. skb->inner_protocol is added by this patch. * Set skb->protocol to the new MPLS ethertype of the packet. * Set skb->network_header to correspond to the end of the L3 header, including the MPLS label stack. I have posted a patch, "[PATCH v3.29] datapath: Add basic MPLS support to kernel" which adds MPLS support to the kernel datapath of Open vSwtich. That patch sets the above requirements in datapath/actions.c:push_mpls() and was used to exercise this code. The datapath patch is against the Open vSwtich tree but it is intended that it be added to the Open vSwtich code present in the mainline Linux kernel at some point. Features: I believe that the approach that I have taken is at least partially consistent with the handling of other protocols. Jesse, I understand that you have some ideas here. I am more than happy to change my implementation. This patch adds dev->mpls_features which may be used by devices to advertise features supported for MPLS packets. A new NETIF_F_MPLS_GSO feature is added for devices which support hardware MPLS GSO offload. Currently no devices support this and MPLS GSO always falls back to software. Alternate Implementation: One possible alternate implementation is to teach netif_skb_features() and skb_network_protocol() about MPLS, in a similar way to their understanding of VLANs. I believe this would avoid the need for net/mpls/mpls_gso.c and in particular the calls to __skb_push() and __skb_push() in mpls_gso_segment(). I have decided on the implementation in this patch as it should not introduce any overhead in the case where mpls_gso is not compiled into the kernel or inserted as a module. MPLS GSO suggested by Jesse Gross. Based in part on "v4 GRE: Add TCP segmentation offload for GRE" by Pravin B Shelar. Cc: Jesse Gross Cc: Pravin B Shelar Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5663e3592784..8f2b830772a8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -319,6 +319,8 @@ enum { SKB_GSO_GRE = 1 << 6, SKB_GSO_UDP_TUNNEL = 1 << 7, + + SKB_GSO_MPLS = 1 << 8, }; #if BITS_PER_LONG > 32 @@ -389,6 +391,7 @@ typedef unsigned char *sk_buff_data_t; * @dropcount: total number of sk_receive_queue overflows * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information + * @inner_protocol: Protocol (encapsulation) * @inner_transport_header: Inner transport layer header (encapsulation) * @inner_network_header: Network layer header (encapsulation) * @inner_mac_header: Link layer header (encapsulation) @@ -509,6 +512,7 @@ struct sk_buff { __u32 reserved_tailroom; }; + __be16 inner_protocol; __u16 inner_transport_header; __u16 inner_network_header; __u16 inner_mac_header; -- cgit v1.2.3 From 7cc461900549fc480eb133948649a1edb7eaaa6f Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 28 May 2013 20:34:29 +0000 Subject: net, ipv4, ipv6: Correct assignment of skb->network_header to skb->tail This corrects an regression introduced by "net: Use 16bits for *_headers fields of struct skbuff" when NET_SKBUFF_DATA_USES_OFFSET is not set. In that case skb->tail will be a pointer however skb->network_header is now an offset. This patch corrects the problem by adding a wrapper to return skb tail as an offset regardless of the value of NET_SKBUFF_DATA_USES_OFFSET. It seems that skb->tail that this offset may be more than 64k and some care has been taken to treat such cases as an error. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8f2b830772a8..5f931191cf57 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1391,6 +1391,11 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) skb_reset_tail_pointer(skb); skb->tail += offset; } + +static inline unsigned long skb_tail_offset(const struct sk_buff *skb) +{ + return skb->tail; +} #else /* NET_SKBUFF_DATA_USES_OFFSET */ static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb) { @@ -1407,6 +1412,10 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) skb->tail = skb->data + offset; } +static inline unsigned long skb_tail_offset(const struct sk_buff *skb) +{ + return skb->tail - skb->head; +} #endif /* NET_SKBUFF_DATA_USES_OFFSET */ /* -- cgit v1.2.3 From 35d0461061f27eeb62de63174959edbbb9e434de Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 29 May 2013 15:16:05 +0800 Subject: net: clean up skb headers code commit 1a37e412a0225fcba5587 (net: Use 16bits for *_headers fields of struct skbuff) converts skb->*_header to u16, some #if NET_SKBUFF_DATA_USES_OFFSET are now useless, and to be safe, we could just use "X = (typeof(X)) ~0U;" as suggested by David. Cc: David S. Miller Cc: Simon Horman Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5f931191cf57..b9997907a0f1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1593,7 +1593,7 @@ static inline void skb_set_inner_mac_header(struct sk_buff *skb, } static inline bool skb_transport_header_was_set(const struct sk_buff *skb) { - return skb->transport_header != ~0U; + return skb->transport_header != (typeof(skb->transport_header))~0U; } static inline unsigned char *skb_transport_header(const struct sk_buff *skb) @@ -1636,7 +1636,7 @@ static inline unsigned char *skb_mac_header(const struct sk_buff *skb) static inline int skb_mac_header_was_set(const struct sk_buff *skb) { - return skb->mac_header != ~0U; + return skb->mac_header != (typeof(skb->mac_header))~0U; } static inline void skb_reset_mac_header(struct sk_buff *skb) -- cgit v1.2.3 From 060212928670593fb89243640bf05cf89560b023 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Mon, 10 Jun 2013 11:39:50 +0300 Subject: net: add low latency socket poll Adds an ndo_ll_poll method and the code that supports it. This method can be used by low latency applications to busy-poll Ethernet device queues directly from the socket code. sysctl_net_ll_poll controls how many microseconds to poll. Default is zero (disabled). Individual protocol support will be added by subsequent patches. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Signed-off-by: Eliezer Tamir Acked-by: Eric Dumazet Tested-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9995834d2cb6..400d82ae2b03 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -386,6 +386,7 @@ typedef unsigned char *sk_buff_data_t; * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @dma_cookie: a cookie to one of several possible DMA operations * done by skb DMA functions + * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking * @mark: Generic packet mark * @dropcount: total number of sk_receive_queue overflows @@ -500,8 +501,11 @@ struct sk_buff { /* 7/9 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); -#ifdef CONFIG_NET_DMA - dma_cookie_t dma_cookie; +#if defined CONFIG_NET_DMA || defined CONFIG_NET_LL_RX_POLL + union { + unsigned int napi_id; + dma_cookie_t dma_cookie; + }; #endif #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; -- cgit v1.2.3 From 30f3a40f9a2a2869a560a9cb9ef488d10c803e14 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 5 Jun 2013 20:14:10 +0800 Subject: net: remove last caller of skb_tail_offset() and itself Similar to the following commits: commit 00f97da17a0c8d656d0c9 (netpoll: fix position of network header) commit 525cebedb32a87fa48584 (pktgen: Fix position of ip and udp header) using skb_tail_offset() seems not correct since the offset is based on head pointer. With the last caller removed, skb_tail_offset() can be killed finally. Cc: Thomas Graf Cc: Daniel Borkmann Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 400d82ae2b03..a7393adea0b5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1396,10 +1396,6 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) skb->tail += offset; } -static inline unsigned long skb_tail_offset(const struct sk_buff *skb) -{ - return skb->tail; -} #else /* NET_SKBUFF_DATA_USES_OFFSET */ static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb) { @@ -1416,10 +1412,6 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) skb->tail = skb->data + offset; } -static inline unsigned long skb_tail_offset(const struct sk_buff *skb) -{ - return skb->tail - skb->head; -} #endif /* NET_SKBUFF_DATA_USES_OFFSET */ /* -- cgit v1.2.3 From 621e84d6f373dcb273ebfd772638b8e7dc3c2c48 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 26 Jun 2013 16:11:27 +0200 Subject: dev: introduce skb_scrub_packet() The goal of this new function is to perform all needed cleanup before sending an skb into another netns. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a7393adea0b5..6b06023e8a08 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2384,6 +2384,7 @@ extern void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); +extern void skb_scrub_packet(struct sk_buff *skb); extern struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); -- cgit v1.2.3 From e0d1095ae3405404d247afb00233ef837d58da83 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 1 Aug 2013 11:10:25 +0800 Subject: net: rename CONFIG_NET_LL_RX_POLL to CONFIG_NET_RX_BUSY_POLL Eliezer renames several *ll_poll to *busy_poll, but forgets CONFIG_NET_LL_RX_POLL, so in case of confusion, rename it too. Cc: Eliezer Tamir Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5afefa01a13c..3b71a4e83642 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -501,7 +501,7 @@ struct sk_buff { /* 7/9 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); -#if defined CONFIG_NET_DMA || defined CONFIG_NET_LL_RX_POLL +#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL union { unsigned int napi_id; dma_cookie_t dma_cookie; -- cgit v1.2.3