From d826eb14ecef3574b6b3be55e5f4329f4a76fbf3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Nov 2011 07:24:35 +0000 Subject: ipv4: PKTINFO doesnt need dst reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Le lundi 07 novembre 2011 à 15:33 +0100, Eric Dumazet a écrit : > At least, in recent kernels we dont change dst->refcnt in forwarding > patch (usinf NOREF skb->dst) > > One particular point is the atomic_inc(dst->refcnt) we have to perform > when queuing an UDP packet if socket asked PKTINFO stuff (for example a > typical DNS server has to setup this option) > > I have one patch somewhere that stores the information in skb->cb[] and > avoid the atomic_{inc|dec}(dst->refcnt). > OK I found it, I did some extra tests and believe its ready. [PATCH net-next] ipv4: IP_PKTINFO doesnt need dst reference When a socket uses IP_PKTINFO notifications, we currently force a dst reference for each received skb. Reader has to access dst to get needed information (rt_iif & rt_spec_dst) and must release dst reference. We also forced a dst reference if skb was put in socket backlog, even without IP_PKTINFO handling. This happens under stress/load. We can instead store the needed information in skb->cb[], so that only softirq handler really access dst, improving cache hit ratios. This removes two atomic operations per packet, and false sharing as well. On a benchmark using a mono threaded receiver (doing only recvmsg() calls), I can reach 720.000 pps instead of 570.000 pps. IP_PKTINFO is typically used by DNS servers, and any multihomed aware UDP application. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/raw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv6/raw.c') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 331af3b882ac..204f2e833f04 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -383,7 +383,8 @@ static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb) } /* Charge it to the socket. */ - if (ip_queue_rcv_skb(sk, skb) < 0) { + skb_dst_drop(skb); + if (sock_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); return NET_RX_DROP; } -- cgit v1.2.3 From a7ae1992248e5cf9dc5bd35695ab846d27efe15f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Nov 2011 02:20:04 +0000 Subject: ipv6: Remove all uses of LL_ALLOCATED_SPACE ipv6: Remove all uses of LL_ALLOCATED_SPACE The macro LL_ALLOCATED_SPACE was ill-conceived. It applies the alignment to the sum of needed_headroom and needed_tailroom. As the amount that is then reserved for head room is needed_headroom with alignment, this means that the tail room left may be too small. This patch replaces all uses of LL_ALLOCATED_SPACE in net/ipv6 with the macro LL_RESERVED_SPACE and direct reference to needed_tailroom. This also fixes the problem with needed_headroom changing between allocating the skb and reserving the head room. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/raw.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv6/raw.c') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 204f2e833f04..a1aa869a9ce7 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -611,6 +611,8 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, struct sk_buff *skb; int err; struct rt6_info *rt = (struct rt6_info *)*dstp; + int hlen = LL_RESERVED_SPACE(rt->dst.dev); + int tlen = rt->dst.dev->needed_tailroom; if (length > rt->dst.dev->mtu) { ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu); @@ -620,11 +622,11 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, goto out; skb = sock_alloc_send_skb(sk, - length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15, + length + hlen + tlen + 15, flags & MSG_DONTWAIT, &err); if (skb == NULL) goto error; - skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev)); + skb_reserve(skb, hlen); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; -- cgit v1.2.3 From 4e3fd7a06dc20b2d8ec6892233ad2012968fe7b6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 21 Nov 2011 03:39:03 +0000 Subject: net: remove ipv6_addr_copy() C assignment can handle struct in6_addr copying. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/raw.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv6/raw.c') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a1aa869a9ce7..a4894f4f1944 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -299,9 +299,9 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) } inet->inet_rcv_saddr = inet->inet_saddr = v4addr; - ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr); + np->rcv_saddr = addr->sin6_addr; if (!(addr_type & IPV6_ADDR_MULTICAST)) - ipv6_addr_copy(&np->saddr, &addr->sin6_addr); + np->saddr = addr->sin6_addr; err = 0; out_unlock: rcu_read_unlock(); @@ -495,7 +495,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, if (sin6) { sin6->sin6_family = AF_INET6; sin6->sin6_port = 0; - ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr); + sin6->sin6_addr = ipv6_hdr(skb)->saddr; sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = 0; if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) @@ -846,11 +846,11 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, goto out; if (!ipv6_addr_any(daddr)) - ipv6_addr_copy(&fl6.daddr, daddr); + fl6.daddr = *daddr; else fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.saddr = np->saddr; final_p = fl6_update_dst(&fl6, opt, &final); -- cgit v1.2.3 From cf778b00e96df6d64f8e21b8395d1f8a859ecdc7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 12 Jan 2012 04:41:32 +0000 Subject: net: reintroduce missing rcu_assign_pointer() calls commit a9b3cd7f32 (rcu: convert uses of rcu_assign_pointer(x, NULL) to RCU_INIT_POINTER) did a lot of incorrect changes, since it did a complete conversion of rcu_assign_pointer(x, y) to RCU_INIT_POINTER(x, y). We miss needed barriers, even on x86, when y is not NULL. Signed-off-by: Eric Dumazet CC: Stephen Hemminger CC: Paul E. McKenney Signed-off-by: David S. Miller --- net/ipv6/raw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6/raw.c') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a4894f4f1944..d02f7e4dd611 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -131,7 +131,7 @@ static mh_filter_t __rcu *mh_filter __read_mostly; int rawv6_mh_filter_register(mh_filter_t filter) { - RCU_INIT_POINTER(mh_filter, filter); + rcu_assign_pointer(mh_filter, filter); return 0; } EXPORT_SYMBOL(rawv6_mh_filter_register); -- cgit v1.2.3