From 49bd8fb0b18944fc0b6b11d999619d3687c3914a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Jan 2013 22:48:55 +0000 Subject: netpoll: remove usage of dev->master Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/netpoll.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 3151acf5ec13..d2bda8eb08ec 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -210,9 +210,12 @@ static void netpoll_poll_dev(struct net_device *dev) if (dev->flags & IFF_SLAVE) { if (ni) { - struct net_device *bond_dev = dev->master; + struct net_device *bond_dev; struct sk_buff *skb; - struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo); + struct netpoll_info *bond_ni; + + bond_dev = netdev_master_upper_dev_get_rcu(dev); + bond_ni = rcu_dereference_bh(bond_dev->npinfo); while ((skb = skb_dequeue(&ni->arp_tx))) { skb->dev = bond_dev; skb_queue_tail(&bond_ni->arp_tx, skb); @@ -815,7 +818,7 @@ int netpoll_setup(struct netpoll *np) return -ENODEV; } - if (ndev->master) { + if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", np->dev_name); err = -EBUSY; goto put; -- cgit v1.2.3 From b7394d2429c198b1da3d46ac39192e891029ec0f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 7 Jan 2013 20:52:39 +0000 Subject: netpoll: prepare for ipv6 This patch adjusts some struct and functions, to prepare for supporting IPv6. Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 402 +++++++++++++++++++++++++++++------------------------ 1 file changed, 224 insertions(+), 178 deletions(-) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index d2bda8eb08ec..6bd073688f68 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -55,7 +55,7 @@ static atomic_t trapped; MAX_UDP_CHUNK) static void zap_completion_queue(void); -static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo); +static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo); static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644); @@ -181,13 +181,13 @@ static void poll_napi(struct net_device *dev) } } -static void service_arp_queue(struct netpoll_info *npi) +static void service_neigh_queue(struct netpoll_info *npi) { if (npi) { struct sk_buff *skb; - while ((skb = skb_dequeue(&npi->arp_tx))) - netpoll_arp_reply(skb, npi); + while ((skb = skb_dequeue(&npi->neigh_tx))) + netpoll_neigh_reply(skb, npi); } } @@ -216,14 +216,14 @@ static void netpoll_poll_dev(struct net_device *dev) bond_dev = netdev_master_upper_dev_get_rcu(dev); bond_ni = rcu_dereference_bh(bond_dev->npinfo); - while ((skb = skb_dequeue(&ni->arp_tx))) { + while ((skb = skb_dequeue(&ni->neigh_tx))) { skb->dev = bond_dev; - skb_queue_tail(&bond_ni->arp_tx, skb); + skb_queue_tail(&bond_ni->neigh_tx, skb); } } } - service_arp_queue(ni); + service_neigh_queue(ni); zap_completion_queue(); } @@ -386,7 +386,9 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) static atomic_t ip_ident; udp_len = len + sizeof(*udph); - ip_len = udp_len + sizeof(*iph); + if (!np->ipv6) + ip_len = udp_len + sizeof(*iph); + total_len = ip_len + LL_RESERVED_SPACE(np->dev); skb = find_skb(np, total_len + np->dev->needed_tailroom, @@ -403,34 +405,38 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) udph->source = htons(np->local_port); udph->dest = htons(np->remote_port); udph->len = htons(udp_len); - udph->check = 0; - udph->check = csum_tcpudp_magic(np->local_ip, - np->remote_ip, - udp_len, IPPROTO_UDP, - csum_partial(udph, udp_len, 0)); - if (udph->check == 0) - udph->check = CSUM_MANGLED_0; - - skb_push(skb, sizeof(*iph)); - skb_reset_network_header(skb); - iph = ip_hdr(skb); - - /* iph->version = 4; iph->ihl = 5; */ - put_unaligned(0x45, (unsigned char *)iph); - iph->tos = 0; - put_unaligned(htons(ip_len), &(iph->tot_len)); - iph->id = htons(atomic_inc_return(&ip_ident)); - iph->frag_off = 0; - iph->ttl = 64; - iph->protocol = IPPROTO_UDP; - iph->check = 0; - put_unaligned(np->local_ip, &(iph->saddr)); - put_unaligned(np->remote_ip, &(iph->daddr)); - iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); - - eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); - skb_reset_mac_header(skb); - skb->protocol = eth->h_proto = htons(ETH_P_IP); + + if (!np->ipv6) { + udph->check = 0; + udph->check = csum_tcpudp_magic(np->local_ip.ip, + np->remote_ip.ip, + udp_len, IPPROTO_UDP, + csum_partial(udph, udp_len, 0)); + if (udph->check == 0) + udph->check = CSUM_MANGLED_0; + + skb_push(skb, sizeof(*iph)); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + + /* iph->version = 4; iph->ihl = 5; */ + put_unaligned(0x45, (unsigned char *)iph); + iph->tos = 0; + put_unaligned(htons(ip_len), &(iph->tot_len)); + iph->id = htons(atomic_inc_return(&ip_ident)); + iph->frag_off = 0; + iph->ttl = 64; + iph->protocol = IPPROTO_UDP; + iph->check = 0; + put_unaligned(np->local_ip.ip, &(iph->saddr)); + put_unaligned(np->remote_ip.ip, &(iph->daddr)); + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + + eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + skb->protocol = eth->h_proto = htons(ETH_P_IP); + } + memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN); memcpy(eth->h_dest, np->remote_mac, ETH_ALEN); @@ -440,7 +446,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) } EXPORT_SYMBOL(netpoll_send_udp); -static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo) +static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo) { struct arphdr *arp; unsigned char *arp_ptr; @@ -451,7 +457,7 @@ static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo) struct netpoll *np, *tmp; unsigned long flags; int hlen, tlen; - int hits = 0; + int hits = 0, proto; if (list_empty(&npinfo->rx_np)) return; @@ -469,94 +475,97 @@ static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo) if (!hits) return; - /* No arp on this interface */ - if (skb->dev->flags & IFF_NOARP) - return; - - if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) - return; + proto = ntohs(eth_hdr(skb)->h_proto); + if (proto == ETH_P_IP) { + /* No arp on this interface */ + if (skb->dev->flags & IFF_NOARP) + return; - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - arp = arp_hdr(skb); + if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) + return; - if ((arp->ar_hrd != htons(ARPHRD_ETHER) && - arp->ar_hrd != htons(ARPHRD_IEEE802)) || - arp->ar_pro != htons(ETH_P_IP) || - arp->ar_op != htons(ARPOP_REQUEST)) - return; + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + arp = arp_hdr(skb); - arp_ptr = (unsigned char *)(arp+1); - /* save the location of the src hw addr */ - sha = arp_ptr; - arp_ptr += skb->dev->addr_len; - memcpy(&sip, arp_ptr, 4); - arp_ptr += 4; - /* If we actually cared about dst hw addr, - it would get copied here */ - arp_ptr += skb->dev->addr_len; - memcpy(&tip, arp_ptr, 4); - - /* Should we ignore arp? */ - if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) - return; + if ((arp->ar_hrd != htons(ARPHRD_ETHER) && + arp->ar_hrd != htons(ARPHRD_IEEE802)) || + arp->ar_pro != htons(ETH_P_IP) || + arp->ar_op != htons(ARPOP_REQUEST)) + return; - size = arp_hdr_len(skb->dev); + arp_ptr = (unsigned char *)(arp+1); + /* save the location of the src hw addr */ + sha = arp_ptr; + arp_ptr += skb->dev->addr_len; + memcpy(&sip, arp_ptr, 4); + arp_ptr += 4; + /* If we actually cared about dst hw addr, + it would get copied here */ + arp_ptr += skb->dev->addr_len; + memcpy(&tip, arp_ptr, 4); - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (tip != np->local_ip) - continue; + /* Should we ignore arp? */ + if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) + return; - hlen = LL_RESERVED_SPACE(np->dev); - tlen = np->dev->needed_tailroom; - send_skb = find_skb(np, size + hlen + tlen, hlen); - if (!send_skb) - continue; + size = arp_hdr_len(skb->dev); - skb_reset_network_header(send_skb); - arp = (struct arphdr *) skb_put(send_skb, size); - send_skb->dev = skb->dev; - send_skb->protocol = htons(ETH_P_ARP); + spin_lock_irqsave(&npinfo->rx_lock, flags); + list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + if (tip != np->local_ip.ip) + continue; + + hlen = LL_RESERVED_SPACE(np->dev); + tlen = np->dev->needed_tailroom; + send_skb = find_skb(np, size + hlen + tlen, hlen); + if (!send_skb) + continue; + + skb_reset_network_header(send_skb); + arp = (struct arphdr *) skb_put(send_skb, size); + send_skb->dev = skb->dev; + send_skb->protocol = htons(ETH_P_ARP); + + /* Fill the device header for the ARP frame */ + if (dev_hard_header(send_skb, skb->dev, ptype, + sha, np->dev->dev_addr, + send_skb->len) < 0) { + kfree_skb(send_skb); + continue; + } - /* Fill the device header for the ARP frame */ - if (dev_hard_header(send_skb, skb->dev, ptype, - sha, np->dev->dev_addr, - send_skb->len) < 0) { - kfree_skb(send_skb); - continue; + /* + * Fill out the arp protocol part. + * + * we only support ethernet device type, + * which (according to RFC 1390) should + * always equal 1 (Ethernet). + */ + + arp->ar_hrd = htons(np->dev->type); + arp->ar_pro = htons(ETH_P_IP); + arp->ar_hln = np->dev->addr_len; + arp->ar_pln = 4; + arp->ar_op = htons(type); + + arp_ptr = (unsigned char *)(arp + 1); + memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); + arp_ptr += np->dev->addr_len; + memcpy(arp_ptr, &tip, 4); + arp_ptr += 4; + memcpy(arp_ptr, sha, np->dev->addr_len); + arp_ptr += np->dev->addr_len; + memcpy(arp_ptr, &sip, 4); + + netpoll_send_skb(np, send_skb); + + /* If there are several rx_hooks for the same address, + we're fine by sending a single reply */ + break; } - - /* - * Fill out the arp protocol part. - * - * we only support ethernet device type, - * which (according to RFC 1390) should - * always equal 1 (Ethernet). - */ - - arp->ar_hrd = htons(np->dev->type); - arp->ar_pro = htons(ETH_P_IP); - arp->ar_hln = np->dev->addr_len; - arp->ar_pln = 4; - arp->ar_op = htons(type); - - arp_ptr = (unsigned char *)(arp + 1); - memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &tip, 4); - arp_ptr += 4; - memcpy(arp_ptr, sha, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &sip, 4); - - netpoll_send_skb(np, send_skb); - - /* If there are several rx_hooks for the same address, - we're fine by sending a single reply */ - break; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); } int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) @@ -576,7 +585,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) /* check if netpoll clients need ARP */ if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) { - skb_queue_tail(&npinfo->arp_tx, skb); + skb_queue_tail(&npinfo->neigh_tx, skb); return 1; } @@ -587,60 +596,61 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) } proto = ntohs(eth_hdr(skb)->h_proto); - if (proto != ETH_P_IP) + if (proto != ETH_P_IP && proto != ETH_P_IPV6) goto out; if (skb->pkt_type == PACKET_OTHERHOST) goto out; if (skb_shared(skb)) goto out; - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto out; - iph = (struct iphdr *)skb->data; - if (iph->ihl < 5 || iph->version != 4) - goto out; - if (!pskb_may_pull(skb, iph->ihl*4)) - goto out; - iph = (struct iphdr *)skb->data; - if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) - goto out; - - len = ntohs(iph->tot_len); - if (skb->len < len || len < iph->ihl*4) - goto out; - - /* - * Our transport medium may have padded the buffer out. - * Now We trim to the true length of the frame. - */ - if (pskb_trim_rcsum(skb, len)) - goto out; + if (proto == ETH_P_IP) { + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto out; + iph = (struct iphdr *)skb->data; + if (iph->ihl < 5 || iph->version != 4) + goto out; + if (!pskb_may_pull(skb, iph->ihl*4)) + goto out; + iph = (struct iphdr *)skb->data; + if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) + goto out; - iph = (struct iphdr *)skb->data; - if (iph->protocol != IPPROTO_UDP) - goto out; + len = ntohs(iph->tot_len); + if (skb->len < len || len < iph->ihl*4) + goto out; - len -= iph->ihl*4; - uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); - ulen = ntohs(uh->len); + /* + * Our transport medium may have padded the buffer out. + * Now We trim to the true length of the frame. + */ + if (pskb_trim_rcsum(skb, len)) + goto out; - if (ulen != len) - goto out; - if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) - goto out; + iph = (struct iphdr *)skb->data; + if (iph->protocol != IPPROTO_UDP) + goto out; - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (np->local_ip && np->local_ip != iph->daddr) - continue; - if (np->remote_ip && np->remote_ip != iph->saddr) - continue; - if (np->local_port && np->local_port != ntohs(uh->dest)) - continue; + len -= iph->ihl*4; + uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); + ulen = ntohs(uh->len); - np->rx_hook(np, ntohs(uh->source), - (char *)(uh+1), - ulen - sizeof(struct udphdr)); - hits++; + if (ulen != len) + goto out; + if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) + goto out; + list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + if (np->local_ip.ip && np->local_ip.ip != iph->daddr) + continue; + if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr) + continue; + if (np->local_port && np->local_port != ntohs(uh->dest)) + continue; + + np->rx_hook(np, ntohs(uh->source), + (char *)(uh+1), + ulen - sizeof(struct udphdr)); + hits++; + } } if (!hits) @@ -661,17 +671,40 @@ out: void netpoll_print_options(struct netpoll *np) { np_info(np, "local port %d\n", np->local_port); - np_info(np, "local IP %pI4\n", &np->local_ip); + if (!np->ipv6) + np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip); np_info(np, "interface '%s'\n", np->dev_name); np_info(np, "remote port %d\n", np->remote_port); - np_info(np, "remote IP %pI4\n", &np->remote_ip); + if (!np->ipv6) + np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip); np_info(np, "remote ethernet address %pM\n", np->remote_mac); } EXPORT_SYMBOL(netpoll_print_options); +static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr) +{ + const char *end; + + if (!strchr(str, ':') && + in4_pton(str, -1, (void *)addr, -1, &end) > 0) { + if (!*end) + return 0; + } + if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) { +#if IS_ENABLED(CONFIG_IPV6) + if (!*end) + return 1; +#else + return -1; +#endif + } + return -1; +} + int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; + int ipv6; if (*cur != '@') { if ((delim = strchr(cur, '@')) == NULL) @@ -687,7 +720,11 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; *delim = 0; - np->local_ip = in_aton(cur); + ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip); + if (ipv6 < 0) + goto parse_failed; + else + np->ipv6 = (bool)ipv6; cur = delim; } cur++; @@ -719,7 +756,13 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; *delim = 0; - np->remote_ip = in_aton(cur); + ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip); + if (ipv6 < 0) + goto parse_failed; + else if (np->ipv6 != (bool)ipv6) + goto parse_failed; + else + np->ipv6 = (bool)ipv6; cur = delim + 1; if (*cur != 0) { @@ -767,7 +810,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) INIT_LIST_HEAD(&npinfo->rx_np); spin_lock_init(&npinfo->rx_lock); - skb_queue_head_init(&npinfo->arp_tx); + skb_queue_head_init(&npinfo->neigh_tx); skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); @@ -859,21 +902,24 @@ int netpoll_setup(struct netpoll *np) } } - if (!np->local_ip) { - rcu_read_lock(); - in_dev = __in_dev_get_rcu(ndev); + if (!np->local_ip.ip) { + if (!np->ipv6) { + rcu_read_lock(); + in_dev = __in_dev_get_rcu(ndev); - if (!in_dev || !in_dev->ifa_list) { + + if (!in_dev || !in_dev->ifa_list) { + rcu_read_unlock(); + np_err(np, "no IP address for %s, aborting\n", + np->dev_name); + err = -EDESTADDRREQ; + goto put; + } + + np->local_ip.ip = in_dev->ifa_list->ifa_local; rcu_read_unlock(); - np_err(np, "no IP address for %s, aborting\n", - np->dev_name); - err = -EDESTADDRREQ; - goto put; + np_info(np, "local IP %pI4\n", &np->local_ip.ip); } - - np->local_ip = in_dev->ifa_list->ifa_local; - rcu_read_unlock(); - np_info(np, "local IP %pI4\n", &np->local_ip); } /* fill up the skb queue */ @@ -906,7 +952,7 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) struct netpoll_info *npinfo = container_of(rcu_head, struct netpoll_info, rcu); - skb_queue_purge(&npinfo->arp_tx); + skb_queue_purge(&npinfo->neigh_tx); skb_queue_purge(&npinfo->txq); /* we can't call cancel_delayed_work_sync here, as we are in softirq */ -- cgit v1.2.3 From b3d936f3ea1c97c32680e0cd235474cf9dadb762 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 7 Jan 2013 20:52:41 +0000 Subject: netpoll: add IPv6 support Currently, netpoll only supports IPv4. This patch adds IPv6 support to netpoll so that we can run netconsole over IPv6 network. Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 246 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 236 insertions(+), 10 deletions(-) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 6bd073688f68..9f0506726101 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -29,6 +29,9 @@ #include #include #include +#include +#include +#include #include #include @@ -384,9 +387,12 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) struct iphdr *iph; struct ethhdr *eth; static atomic_t ip_ident; + struct ipv6hdr *ip6h; udp_len = len + sizeof(*udph); - if (!np->ipv6) + if (np->ipv6) + ip_len = udp_len + sizeof(*ip6h); + else ip_len = udp_len + sizeof(*iph); total_len = ip_len + LL_RESERVED_SPACE(np->dev); @@ -406,7 +412,35 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) udph->dest = htons(np->remote_port); udph->len = htons(udp_len); - if (!np->ipv6) { + if (np->ipv6) { + udph->check = 0; + udph->check = csum_ipv6_magic(&np->local_ip.in6, + &np->remote_ip.in6, + udp_len, IPPROTO_UDP, + csum_partial(udph, udp_len, 0)); + if (udph->check == 0) + udph->check = CSUM_MANGLED_0; + + skb_push(skb, sizeof(*ip6h)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + + /* ip6h->version = 6; ip6h->priority = 0; */ + put_unaligned(0x60, (unsigned char *)ip6h); + ip6h->flow_lbl[0] = 0; + ip6h->flow_lbl[1] = 0; + ip6h->flow_lbl[2] = 0; + + ip6h->payload_len = htons(sizeof(struct udphdr) + len); + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = 32; + ip6h->saddr = np->local_ip.in6; + ip6h->daddr = np->remote_ip.in6; + + eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + skb->protocol = eth->h_proto = htons(ETH_P_IPV6); + } else { udph->check = 0; udph->check = csum_tcpudp_magic(np->local_ip.ip, np->remote_ip.ip, @@ -448,9 +482,7 @@ EXPORT_SYMBOL(netpoll_send_udp); static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo) { - struct arphdr *arp; - unsigned char *arp_ptr; - int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; + int size, type = ARPOP_REPLY; __be32 sip, tip; unsigned char *sha; struct sk_buff *send_skb; @@ -477,6 +509,8 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo proto = ntohs(eth_hdr(skb)->h_proto); if (proto == ETH_P_IP) { + struct arphdr *arp; + unsigned char *arp_ptr; /* No arp on this interface */ if (skb->dev->flags & IFF_NOARP) return; @@ -528,7 +562,7 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo send_skb->protocol = htons(ETH_P_ARP); /* Fill the device header for the ARP frame */ - if (dev_hard_header(send_skb, skb->dev, ptype, + if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP, sha, np->dev->dev_addr, send_skb->len) < 0) { kfree_skb(send_skb); @@ -565,9 +599,124 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo break; } spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } else if( proto == ETH_P_IPV6) { +#if IS_ENABLED(CONFIG_IPV6) + struct nd_msg *msg; + u8 *lladdr = NULL; + struct ipv6hdr *hdr; + struct icmp6hdr *icmp6h; + const struct in6_addr *saddr; + const struct in6_addr *daddr; + struct inet6_dev *in6_dev = NULL; + struct in6_addr *target; + + in6_dev = in6_dev_get(skb->dev); + if (!in6_dev || !in6_dev->cnf.accept_ra) + return; + + if (!pskb_may_pull(skb, skb->len)) + return; + + msg = (struct nd_msg *)skb_transport_header(skb); + + __skb_push(skb, skb->data - skb_transport_header(skb)); + + if (ipv6_hdr(skb)->hop_limit != 255) + return; + if (msg->icmph.icmp6_code != 0) + return; + if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) + return; + + saddr = &ipv6_hdr(skb)->saddr; + daddr = &ipv6_hdr(skb)->daddr; + + size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr); + + spin_lock_irqsave(&npinfo->rx_lock, flags); + list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + if (memcmp(daddr, &np->local_ip, sizeof(*daddr))) + continue; + + hlen = LL_RESERVED_SPACE(np->dev); + tlen = np->dev->needed_tailroom; + send_skb = find_skb(np, size + hlen + tlen, hlen); + if (!send_skb) + continue; + + send_skb->protocol = htons(ETH_P_IPV6); + send_skb->dev = skb->dev; + + skb_reset_network_header(send_skb); + skb_put(send_skb, sizeof(struct ipv6hdr)); + hdr = ipv6_hdr(send_skb); + + *(__be32*)hdr = htonl(0x60000000); + + hdr->payload_len = htons(size); + hdr->nexthdr = IPPROTO_ICMPV6; + hdr->hop_limit = 255; + hdr->saddr = *saddr; + hdr->daddr = *daddr; + + send_skb->transport_header = send_skb->tail; + skb_put(send_skb, size); + + icmp6h = (struct icmp6hdr *)skb_transport_header(skb); + icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; + icmp6h->icmp6_router = 0; + icmp6h->icmp6_solicited = 1; + target = (struct in6_addr *)skb_transport_header(send_skb) + sizeof(struct icmp6hdr); + *target = msg->target; + icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, + IPPROTO_ICMPV6, + csum_partial(icmp6h, + size, 0)); + + if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6, + lladdr, np->dev->dev_addr, + send_skb->len) < 0) { + kfree_skb(send_skb); + continue; + } + + netpoll_send_skb(np, send_skb); + + /* If there are several rx_hooks for the same address, + we're fine by sending a single reply */ + break; + } + spin_unlock_irqrestore(&npinfo->rx_lock, flags); +#endif } } +static bool pkt_is_ns(struct sk_buff *skb) +{ + struct nd_msg *msg; + struct ipv6hdr *hdr; + + if (skb->protocol != htons(ETH_P_ARP)) + return false; + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg))) + return false; + + msg = (struct nd_msg *)skb_transport_header(skb); + __skb_push(skb, skb->data - skb_transport_header(skb)); + hdr = ipv6_hdr(skb); + + if (hdr->nexthdr != IPPROTO_ICMPV6) + return false; + if (hdr->hop_limit != 255) + return false; + if (msg->icmph.icmp6_code != 0) + return false; + if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) + return false; + + return true; +} + int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) { int proto, len, ulen; @@ -583,8 +732,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) goto out; /* check if netpoll clients need ARP */ - if (skb->protocol == htons(ETH_P_ARP) && - atomic_read(&trapped)) { + if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) { + skb_queue_tail(&npinfo->neigh_tx, skb); + return 1; + } else if (pkt_is_ns(skb) && atomic_read(&trapped)) { skb_queue_tail(&npinfo->neigh_tx, skb); return 1; } @@ -651,6 +802,45 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) ulen - sizeof(struct udphdr)); hits++; } + } else { +#if IS_ENABLED(CONFIG_IPV6) + const struct ipv6hdr *ip6h; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto out; + ip6h = (struct ipv6hdr *)skb->data; + if (ip6h->version != 6) + goto out; + len = ntohs(ip6h->payload_len); + if (!len) + goto out; + if (len + sizeof(struct ipv6hdr) > skb->len) + goto out; + if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr))) + goto out; + ip6h = ipv6_hdr(skb); + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto out; + uh = udp_hdr(skb); + ulen = ntohs(uh->len); + if (ulen != skb->len) + goto out; + if (udp6_csum_init(skb, uh, IPPROTO_UDP)) + goto out; + list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + if (memcmp(&np->local_ip.in6, &ip6h->daddr, sizeof(struct in6_addr)) != 0) + continue; + if (memcmp(&np->remote_ip.in6, &ip6h->saddr, sizeof(struct in6_addr)) != 0) + continue; + if (np->local_port && np->local_port != ntohs(uh->dest)) + continue; + + np->rx_hook(np, ntohs(uh->source), + (char *)(uh+1), + ulen - sizeof(struct udphdr)); + hits++; + } +#endif } if (!hits) @@ -671,11 +861,15 @@ out: void netpoll_print_options(struct netpoll *np) { np_info(np, "local port %d\n", np->local_port); - if (!np->ipv6) + if (np->ipv6) + np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6); + else np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip); np_info(np, "interface '%s'\n", np->dev_name); np_info(np, "remote port %d\n", np->remote_port); - if (!np->ipv6) + if (np->ipv6) + np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6); + else np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip); np_info(np, "remote ethernet address %pM\n", np->remote_mac); } @@ -919,6 +1113,38 @@ int netpoll_setup(struct netpoll *np) np->local_ip.ip = in_dev->ifa_list->ifa_local; rcu_read_unlock(); np_info(np, "local IP %pI4\n", &np->local_ip.ip); + } else { +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_dev *idev; + + err = -EDESTADDRREQ; + rcu_read_lock(); + idev = __in6_dev_get(ndev); + if (idev) { + struct inet6_ifaddr *ifp; + + read_lock_bh(&idev->lock); + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) + continue; + np->local_ip.in6 = ifp->addr; + err = 0; + break; + } + read_unlock_bh(&idev->lock); + } + rcu_read_unlock(); + if (err) { + np_err(np, "no IPv6 address for %s, aborting\n", + np->dev_name); + goto put; + } else + np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6); +#else + np_err(np, "IPv6 is not supported %s, aborting\n", + np->dev_name); + goto put; +#endif } } -- cgit v1.2.3 From f92d318023ff70a273690a91568597168ac32f05 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 14 Jan 2013 23:34:06 +0000 Subject: netpoll: fix a rtnl lock assertion failure v4: hold rtnl lock for the whole netpoll_setup() v3: remove the comment v2: use RCU read lock This patch fixes the following warning: [ 72.013864] RTNL: assertion failed at net/core/dev.c (4955) [ 72.017758] Pid: 668, comm: netpoll-prep-v6 Not tainted 3.8.0-rc1+ #474 [ 72.019582] Call Trace: [ 72.020295] [] netdev_master_upper_dev_get+0x35/0x58 [ 72.022545] [] netpoll_setup+0x61/0x340 [ 72.024846] [] store_enabled+0x82/0xc3 [ 72.027466] [] netconsole_target_attr_store+0x35/0x37 [ 72.029348] [] configfs_write_file+0xe2/0x10c [ 72.030959] [] vfs_write+0xaf/0xf6 [ 72.032359] [] ? sysret_check+0x22/0x5d [ 72.033824] [] sys_write+0x5c/0x84 [ 72.035328] [] system_call_fastpath+0x16/0x1b In case of other races, hold rtnl lock for the entire netpoll_setup() function. Cc: Eric Dumazet Cc: Jiri Pirko Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 9f0506726101..a5ad1c1c4b18 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -1048,11 +1048,13 @@ int netpoll_setup(struct netpoll *np) struct in_device *in_dev; int err; + rtnl_lock(); if (np->dev_name) - ndev = dev_get_by_name(&init_net, np->dev_name); + ndev = __dev_get_by_name(&init_net, np->dev_name); if (!ndev) { np_err(np, "%s doesn't exist, aborting\n", np->dev_name); - return -ENODEV; + err = -ENODEV; + goto unlock; } if (netdev_master_upper_dev_get(ndev)) { @@ -1066,15 +1068,14 @@ int netpoll_setup(struct netpoll *np) np_info(np, "device %s not up yet, forcing it\n", np->dev_name); - rtnl_lock(); err = dev_open(ndev); - rtnl_unlock(); if (err) { np_err(np, "failed to open %s\n", ndev->name); goto put; } + rtnl_unlock(); atleast = jiffies + HZ/10; atmost = jiffies + carrier_timeout * HZ; while (!netif_carrier_ok(ndev)) { @@ -1094,16 +1095,14 @@ int netpoll_setup(struct netpoll *np) np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n"); msleep(4000); } + rtnl_lock(); } if (!np->local_ip.ip) { if (!np->ipv6) { - rcu_read_lock(); - in_dev = __in_dev_get_rcu(ndev); - + in_dev = __in_dev_get_rtnl(ndev); if (!in_dev || !in_dev->ifa_list) { - rcu_read_unlock(); np_err(np, "no IP address for %s, aborting\n", np->dev_name); err = -EDESTADDRREQ; @@ -1111,14 +1110,12 @@ int netpoll_setup(struct netpoll *np) } np->local_ip.ip = in_dev->ifa_list->ifa_local; - rcu_read_unlock(); np_info(np, "local IP %pI4\n", &np->local_ip.ip); } else { #if IS_ENABLED(CONFIG_IPV6) struct inet6_dev *idev; err = -EDESTADDRREQ; - rcu_read_lock(); idev = __in6_dev_get(ndev); if (idev) { struct inet6_ifaddr *ifp; @@ -1133,7 +1130,6 @@ int netpoll_setup(struct netpoll *np) } read_unlock_bh(&idev->lock); } - rcu_read_unlock(); if (err) { np_err(np, "no IPv6 address for %s, aborting\n", np->dev_name); @@ -1151,17 +1147,17 @@ int netpoll_setup(struct netpoll *np) /* fill up the skb queue */ refill_skbs(); - rtnl_lock(); err = __netpoll_setup(np, ndev, GFP_KERNEL); - rtnl_unlock(); - if (err) goto put; + rtnl_unlock(); return 0; put: dev_put(ndev); +unlock: + rtnl_unlock(); return err; } EXPORT_SYMBOL(netpoll_setup); -- cgit v1.2.3 From 5bd30d398792eb6351da2087fe81bbf755900991 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 17 Jan 2013 12:21:08 +0800 Subject: netpoll: fix a missing dev refcounting __dev_get_by_name() doesn't refcount the network device, so we have to do this by ourselves. Noticed by Eric. Cc: Eric Dumazet Cc: Jiri Pirko Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a5ad1c1c4b18..a9b10048388a 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -1056,6 +1056,7 @@ int netpoll_setup(struct netpoll *np) err = -ENODEV; goto unlock; } + dev_hold(ndev); if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", np->dev_name); -- cgit v1.2.3 From e39363a9def53dd4086be107dc8b3ebca09f045d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 22 Jan 2013 17:39:11 +0000 Subject: netpoll: fix an uninitialized variable Fengguang reported: net/core/netpoll.c: In function 'netpoll_setup': net/core/netpoll.c:1049:6: warning: 'err' may be used uninitialized in this function [-Wmaybe-uninitialized] in !CONFIG_IPV6 case, we may error out without initializing 'err'. Reported-by: Fengguang Wu Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a9b10048388a..e2f79a14625c 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -1140,6 +1140,7 @@ int netpoll_setup(struct netpoll *np) #else np_err(np, "IPv6 is not supported %s, aborting\n", np->dev_name); + err = -EINVAL; goto put; #endif } -- cgit v1.2.3 From faeed828f9607923e9dc22182e819908e95c8852 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 27 Jan 2013 15:55:20 +0000 Subject: netpoll: use ipv6_addr_equal() to compare ipv6 addr ipv6_addr_equal() is faster. Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index e2f79a14625c..a6f39b6aeec0 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -635,7 +635,7 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo spin_lock_irqsave(&npinfo->rx_lock, flags); list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (memcmp(daddr, &np->local_ip, sizeof(*daddr))) + if (!ipv6_addr_equal(daddr, &np->local_ip.in6)) continue; hlen = LL_RESERVED_SPACE(np->dev); @@ -828,9 +828,9 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) if (udp6_csum_init(skb, uh, IPPROTO_UDP)) goto out; list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (memcmp(&np->local_ip.in6, &ip6h->daddr, sizeof(struct in6_addr)) != 0) + if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr)) continue; - if (memcmp(&np->remote_ip.in6, &ip6h->saddr, sizeof(struct in6_addr)) != 0) + if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr)) continue; if (np->local_port && np->local_port != ntohs(uh->dest)) continue; -- cgit v1.2.3 From 556e6256c813063b95a9f4e55109f77c1a7d3fdd Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 27 Jan 2013 15:55:21 +0000 Subject: netpoll: use the net namespace of current process instead of init_net This will allow us to setup netconsole in a different namespace rather than where init_net is. Cc: Eric W. Biederman Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a6f39b6aeec0..331ccb90f915 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -1049,8 +1049,10 @@ int netpoll_setup(struct netpoll *np) int err; rtnl_lock(); - if (np->dev_name) - ndev = __dev_get_by_name(&init_net, np->dev_name); + if (np->dev_name) { + struct net *net = current->nsproxy->net_ns; + ndev = __dev_get_by_name(net, np->dev_name); + } if (!ndev) { np_err(np, "%s doesn't exist, aborting\n", np->dev_name); err = -ENODEV; -- cgit v1.2.3 From ca99ca14c95ae49fb4c9cd3abf5f84d11a7e8a61 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 5 Feb 2013 08:05:43 +0000 Subject: netpoll: protect napi_poll and poll_controller during dev_[open|close] Ivan Vercera was recently backporting commit 9c13cb8bb477a83b9a3c9e5a5478a4e21294a760 to a RHEL kernel, and I noticed that, while this patch protects the tg3 driver from having its ndo_poll_controller routine called during device initalization, it does nothing for the driver during shutdown. I.e. it would be entirely possible to have the ndo_poll_controller method (or subsequently the ndo_poll) routine called for a driver in the netpoll path on CPU A while in parallel on CPU B, the ndo_close or ndo_open routine could be called. Given that the two latter routines tend to initizlize and free many data structures that the former two rely on, the result can easily be data corruption or various other crashes. Furthermore, it seems that this is potentially a problem with all net drivers that support netpoll, and so this should ideally be fixed in a common path. As Ben H Pointed out to me, we can't preform dev_open/dev_close in atomic context, so I've come up with this solution. We can use a mutex to sleep in open/close paths and just do a mutex_trylock in the napi poll path and abandon the poll attempt if we're locked, as we'll just retry the poll on the next send anyway. I've tested this here by flooding netconsole with messages on a system whos nic driver I modfied to periodically return NETDEV_TX_BUSY, so that the netpoll tx workqueue would be forced to send frames and poll the device. While this was going on I rapidly ifdown/up'ed the interface and watched for any problems. I've not found any. Signed-off-by: Neil Horman CC: Ivan Vecera CC: "David S. Miller" CC: Ben Hutchings CC: Francois Romieu CC: Eric Dumazet Signed-off-by: David S. Miller --- net/core/netpoll.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 331ccb90f915..edcd9ad95304 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -47,6 +47,8 @@ static struct sk_buff_head skb_pool; static atomic_t trapped; +static struct srcu_struct netpoll_srcu; + #define USEC_PER_POLL 50 #define NETPOLL_RX_ENABLED 1 #define NETPOLL_RX_DROP 2 @@ -199,6 +201,13 @@ static void netpoll_poll_dev(struct net_device *dev) const struct net_device_ops *ops; struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); + /* Don't do any rx activity if the dev_lock mutex is held + * the dev_open/close paths use this to block netpoll activity + * while changing device state + */ + if (!mutex_trylock(&dev->npinfo->dev_lock)) + return; + if (!dev || !netif_running(dev)) return; @@ -211,6 +220,8 @@ static void netpoll_poll_dev(struct net_device *dev) poll_napi(dev); + mutex_unlock(&dev->npinfo->dev_lock); + if (dev->flags & IFF_SLAVE) { if (ni) { struct net_device *bond_dev; @@ -231,6 +242,31 @@ static void netpoll_poll_dev(struct net_device *dev) zap_completion_queue(); } +int netpoll_rx_disable(struct net_device *dev) +{ + struct netpoll_info *ni; + int idx; + might_sleep(); + idx = srcu_read_lock(&netpoll_srcu); + ni = srcu_dereference(dev->npinfo, &netpoll_srcu); + if (ni) + mutex_lock(&ni->dev_lock); + srcu_read_unlock(&netpoll_srcu, idx); + return 0; +} +EXPORT_SYMBOL(netpoll_rx_disable); + +void netpoll_rx_enable(struct net_device *dev) +{ + struct netpoll_info *ni; + rcu_read_lock(); + ni = rcu_dereference(dev->npinfo); + if (ni) + mutex_unlock(&ni->dev_lock); + rcu_read_unlock(); +} +EXPORT_SYMBOL(netpoll_rx_enable); + static void refill_skbs(void) { struct sk_buff *skb; @@ -1004,6 +1040,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) INIT_LIST_HEAD(&npinfo->rx_np); spin_lock_init(&npinfo->rx_lock); + mutex_init(&npinfo->dev_lock); skb_queue_head_init(&npinfo->neigh_tx); skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); @@ -1169,6 +1206,7 @@ EXPORT_SYMBOL(netpoll_setup); static int __init netpoll_init(void) { skb_queue_head_init(&skb_pool); + init_srcu_struct(&netpoll_srcu); return 0; } core_initcall(netpoll_init); @@ -1208,6 +1246,8 @@ void __netpoll_cleanup(struct netpoll *np) spin_unlock_irqrestore(&npinfo->rx_lock, flags); } + synchronize_srcu(&netpoll_srcu); + if (atomic_dec_and_test(&npinfo->refcnt)) { const struct net_device_ops *ops; -- cgit v1.2.3 From 2cde6acd49daca58b96f1fbc697492825511ad31 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 11 Feb 2013 10:25:30 +0000 Subject: netpoll: Fix __netpoll_rcu_free so that it can hold the rtnl lock __netpoll_rcu_free is used to free netpoll structures when the rtnl_lock is already held. The mechanism is used to asynchronously call __netpoll_cleanup outside of the holding of the rtnl_lock, so as to avoid deadlock. Unfortunately, __netpoll_cleanup modifies pointers (dev->np), which means the rtnl_lock must be held while calling it. Further, it cannot be held, because rcu callbacks may be issued in softirq contexts, which cannot sleep. Fix this by converting the rcu callback to a work queue that is guaranteed to get scheduled in process context, so that we can hold the rtnl properly while calling __netpoll_cleanup Tested successfully by myself. Signed-off-by: Neil Horman CC: "David S. Miller" CC: Cong Wang CC: Eric Dumazet Signed-off-by: David S. Miller --- net/core/netpoll.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index edcd9ad95304..c536474e2260 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -61,6 +61,7 @@ static struct srcu_struct netpoll_srcu; static void zap_completion_queue(void); static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo); +static void netpoll_async_cleanup(struct work_struct *work); static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644); @@ -1020,6 +1021,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) np->dev = ndev; strlcpy(np->dev_name, ndev->name, IFNAMSIZ); + INIT_WORK(&np->cleanup_work, netpoll_async_cleanup); if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || !ndev->netdev_ops->ndo_poll_controller) { @@ -1255,25 +1257,27 @@ void __netpoll_cleanup(struct netpoll *np) if (ops->ndo_netpoll_cleanup) ops->ndo_netpoll_cleanup(np->dev); - RCU_INIT_POINTER(np->dev->npinfo, NULL); + rcu_assign_pointer(np->dev->npinfo, NULL); call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); } } EXPORT_SYMBOL_GPL(__netpoll_cleanup); -static void rcu_cleanup_netpoll(struct rcu_head *rcu_head) +static void netpoll_async_cleanup(struct work_struct *work) { - struct netpoll *np = container_of(rcu_head, struct netpoll, rcu); + struct netpoll *np = container_of(work, struct netpoll, cleanup_work); + rtnl_lock(); __netpoll_cleanup(np); + rtnl_unlock(); kfree(np); } -void __netpoll_free_rcu(struct netpoll *np) +void __netpoll_free_async(struct netpoll *np) { - call_rcu_bh(&np->rcu, rcu_cleanup_netpoll); + schedule_work(&np->cleanup_work); } -EXPORT_SYMBOL_GPL(__netpoll_free_rcu); +EXPORT_SYMBOL_GPL(__netpoll_free_async); void netpoll_cleanup(struct netpoll *np) { -- cgit v1.2.3 From 0790bbb68f9d483348c1d65381f3dd92602bfd05 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 11 Feb 2013 10:25:31 +0000 Subject: netpoll: cleanup sparse warnings With my recent commit I introduced two sparse warnings. Looking closer there were a few more in the same file, so I fixed them all up. Basic rcu pointer dereferencing suff. I've validated these changes using CONFIG_PROVE_RCU while starting and stopping netconsole repeatedly in bonded and non-bonded configurations Signed-off-by: Neil Horman CC: fengguang.wu@intel.com CC: David Miller CC: eric.dumazet@gmail.com Signed-off-by: David S. Miller --- net/core/netpoll.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c536474e2260..bcfd4f4599a5 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -206,7 +206,7 @@ static void netpoll_poll_dev(struct net_device *dev) * the dev_open/close paths use this to block netpoll activity * while changing device state */ - if (!mutex_trylock(&dev->npinfo->dev_lock)) + if (!mutex_trylock(&ni->dev_lock)) return; if (!dev || !netif_running(dev)) @@ -221,7 +221,7 @@ static void netpoll_poll_dev(struct net_device *dev) poll_napi(dev); - mutex_unlock(&dev->npinfo->dev_lock); + mutex_unlock(&ni->dev_lock); if (dev->flags & IFF_SLAVE) { if (ni) { @@ -1056,7 +1056,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) goto free_npinfo; } } else { - npinfo = ndev->npinfo; + npinfo = rtnl_dereference(ndev->npinfo); atomic_inc(&npinfo->refcnt); } @@ -1236,7 +1236,11 @@ void __netpoll_cleanup(struct netpoll *np) struct netpoll_info *npinfo; unsigned long flags; - npinfo = np->dev->npinfo; + /* rtnl_dereference would be preferable here but + * rcu_cleanup_netpoll path can put us in here safely without + * holding the rtnl, so plain rcu_dereference it is + */ + npinfo = rtnl_dereference(np->dev->npinfo); if (!npinfo) return; -- cgit v1.2.3 From 959d5fdee7aa32fa04bc2c37e3d3871ad266fe97 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 13 Feb 2013 11:32:42 -0500 Subject: netpoll: fix smatch warnings in netpoll core code Dan Carpenter contacted me with some notes regarding some smatch warnings in the netpoll code, some of which I introduced with my recent netpoll locking fixes, some which were there prior. Specifically they were: net-next/net/core/netpoll.c:243 netpoll_poll_dev() warn: inconsistent returns mutex:&ni->dev_lock: locked (213,217) unlocked (210,243) net-next/net/core/netpoll.c:706 netpoll_neigh_reply() warn: potential pointer math issue ('skb_transport_header(send_skb)' is a 128 bit pointer) This patch corrects the locking imbalance (the first error), and adds some parenthesis to correct the second error. Tested by myself. Applies to net-next Signed-off-by: Neil Horman CC: Dan Carpenter CC: "David S. Miller" Signed-off-by: David S. Miller --- net/core/netpoll.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index bcfd4f4599a5..fa32899006a2 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -209,12 +209,16 @@ static void netpoll_poll_dev(struct net_device *dev) if (!mutex_trylock(&ni->dev_lock)) return; - if (!dev || !netif_running(dev)) + if (!netif_running(dev)) { + mutex_unlock(&ni->dev_lock); return; + } ops = dev->netdev_ops; - if (!ops->ndo_poll_controller) + if (!ops->ndo_poll_controller) { + mutex_unlock(&ni->dev_lock); return; + } /* Process pending work on NIC */ ops->ndo_poll_controller(dev); @@ -703,7 +707,7 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; icmp6h->icmp6_router = 0; icmp6h->icmp6_solicited = 1; - target = (struct in6_addr *)skb_transport_header(send_skb) + sizeof(struct icmp6hdr); + target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr)); *target = msg->target; icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, IPPROTO_ICMPV6, -- cgit v1.2.3