summaryrefslogtreecommitdiff
path: root/net/ipv6/ip6_tunnel.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-17 16:26:30 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-17 16:26:30 -0700
commita7fd20d1c476af4563e66865213474a2f9f473a4 (patch)
treefb1399e2f82842450245fb058a8fb23c52865f43 /net/ipv6/ip6_tunnel.c
parentb80fed9595513384424cd141923c9161c4b5021b (diff)
parent917fa5353da05e8a0045b8acacba8d50400d5b12 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: 1) Support SPI based w5100 devices, from Akinobu Mita. 2) Partial Segmentation Offload, from Alexander Duyck. 3) Add GMAC4 support to stmmac driver, from Alexandre TORGUE. 4) Allow cls_flower stats offload, from Amir Vadai. 5) Implement bpf blinding, from Daniel Borkmann. 6) Optimize _ASYNC_ bit twiddling on sockets, unless the socket is actually using FASYNC these atomics are superfluous. From Eric Dumazet. 7) Run TCP more preemptibly, also from Eric Dumazet. 8) Support LED blinking, EEPROM dumps, and rxvlan offloading in mlx5e driver, from Gal Pressman. 9) Allow creating ppp devices via rtnetlink, from Guillaume Nault. 10) Improve BPF usage documentation, from Jesper Dangaard Brouer. 11) Support tunneling offloads in qed, from Manish Chopra. 12) aRFS offloading in mlx5e, from Maor Gottlieb. 13) Add RFS and RPS support to SCTP protocol, from Marcelo Ricardo Leitner. 14) Add MSG_EOR support to TCP, this allows controlling packet coalescing on application record boundaries for more accurate socket timestamp sampling. From Martin KaFai Lau. 15) Fix alignment of 64-bit netlink attributes across the board, from Nicolas Dichtel. 16) Per-vlan stats in bridging, from Nikolay Aleksandrov. 17) Several conversions of drivers to ethtool ksettings, from Philippe Reynes. 18) Checksum neutral ILA in ipv6, from Tom Herbert. 19) Factorize all of the various marvell dsa drivers into one, from Vivien Didelot 20) Add VF support to qed driver, from Yuval Mintz" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1649 commits) Revert "phy dp83867: Fix compilation with CONFIG_OF_MDIO=m" Revert "phy dp83867: Make rgmii parameters optional" r8169: default to 64-bit DMA on recent PCIe chips phy dp83867: Make rgmii parameters optional phy dp83867: Fix compilation with CONFIG_OF_MDIO=m bpf: arm64: remove callee-save registers use for tmp registers asix: Fix offset calculation in asix_rx_fixup() causing slow transmissions switchdev: pass pointer to fib_info instead of copy net_sched: close another race condition in tcf_mirred_release() tipc: fix nametable publication field in nl compat drivers: net: Don't print unpopulated net_device name qed: add support for dcbx. ravb: Add missing free_irq() calls to ravb_close() qed: Remove a stray tab net: ethernet: fec-mpc52xx: use phy_ethtool_{get|set}_link_ksettings net: ethernet: fec-mpc52xx: use phydev from struct net_device bpf, doc: fix typo on bpf_asm descriptions stmmac: hardware TX COE doesn't work when force_thresh_dma_mode is set net: ethernet: fs-enet: use phy_ethtool_{get|set}_link_ksettings net: ethernet: fs-enet: use phydev from struct net_device ...
Diffstat (limited to 'net/ipv6/ip6_tunnel.c')
-rw-r--r--net/ipv6/ip6_tunnel.c268
1 files changed, 177 insertions, 91 deletions
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 1f20345cbc97..e79330f214bd 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -238,6 +238,7 @@ static void ip6_dev_free(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
+ gro_cells_destroy(&t->gro_cells);
dst_cache_destroy(&t->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
@@ -753,97 +754,157 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
}
EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
-/**
- * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
- * @skb: received socket buffer
- * @protocol: ethernet protocol ID
- * @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
- *
- * Return: 0
- **/
-
-static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
- __u8 ipproto,
- int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
- const struct ipv6hdr *ipv6h,
- struct sk_buff *skb))
+static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
+ const struct tnl_ptk_info *tpi,
+ struct metadata_dst *tun_dst,
+ int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h,
+ struct sk_buff *skb),
+ bool log_ecn_err)
{
- struct ip6_tnl *t;
+ struct pcpu_sw_netstats *tstats;
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- u8 tproto;
int err;
- rcu_read_lock();
- t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
- if (t) {
- struct pcpu_sw_netstats *tstats;
+ if ((!(tpi->flags & TUNNEL_CSUM) &&
+ (tunnel->parms.i_flags & TUNNEL_CSUM)) ||
+ ((tpi->flags & TUNNEL_CSUM) &&
+ !(tunnel->parms.i_flags & TUNNEL_CSUM))) {
+ tunnel->dev->stats.rx_crc_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
+ }
- tproto = ACCESS_ONCE(t->parms.proto);
- if (tproto != ipproto && tproto != 0) {
- rcu_read_unlock();
- goto discard;
+ if (tunnel->parms.i_flags & TUNNEL_SEQ) {
+ if (!(tpi->flags & TUNNEL_SEQ) ||
+ (tunnel->i_seqno &&
+ (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
+ tunnel->dev->stats.rx_fifo_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
}
+ tunnel->i_seqno = ntohl(tpi->seq) + 1;
+ }
- if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- rcu_read_unlock();
- goto discard;
- }
+ skb->protocol = tpi->proto;
- if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
- t->dev->stats.rx_dropped++;
- rcu_read_unlock();
- goto discard;
+ /* Warning: All skb pointers will be invalidated! */
+ if (tunnel->dev->type == ARPHRD_ETHER) {
+ if (!pskb_may_pull(skb, ETH_HLEN)) {
+ tunnel->dev->stats.rx_length_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
}
- skb->mac_header = skb->network_header;
- skb_reset_network_header(skb);
- skb->protocol = htons(protocol);
- memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
-
- __skb_tunnel_rx(skb, t->dev, t->net);
-
- err = dscp_ecn_decapsulate(t, ipv6h, skb);
- if (unlikely(err)) {
- if (log_ecn_error)
- net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
- &ipv6h->saddr,
- ipv6_get_dsfield(ipv6h));
- if (err > 1) {
- ++t->dev->stats.rx_frame_errors;
- ++t->dev->stats.rx_errors;
- rcu_read_unlock();
- goto discard;
- }
+
+ ipv6h = ipv6_hdr(skb);
+ skb->protocol = eth_type_trans(skb, tunnel->dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+ } else {
+ skb->dev = tunnel->dev;
+ }
+
+ skb_reset_network_header(skb);
+ memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+
+ __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
+
+ err = dscp_ecn_decapsulate(tunnel, ipv6h, skb);
+ if (unlikely(err)) {
+ if (log_ecn_err)
+ net_info_ratelimited("non-ECT from %pI6 with DS=%#x\n",
+ &ipv6h->saddr,
+ ipv6_get_dsfield(ipv6h));
+ if (err > 1) {
+ ++tunnel->dev->stats.rx_frame_errors;
+ ++tunnel->dev->stats.rx_errors;
+ goto drop;
}
+ }
- tstats = this_cpu_ptr(t->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
+ tstats = this_cpu_ptr(tunnel->dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
- netif_rx(skb);
+ skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
- rcu_read_unlock();
- return 0;
+ gro_cells_receive(&tunnel->gro_cells, skb);
+ return 0;
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
+ const struct tnl_ptk_info *tpi,
+ struct metadata_dst *tun_dst,
+ bool log_ecn_err)
+{
+ return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate,
+ log_ecn_err);
+}
+EXPORT_SYMBOL(ip6_tnl_rcv);
+
+static const struct tnl_ptk_info tpi_v6 = {
+ /* no tunnel info required for ipxip6. */
+ .proto = htons(ETH_P_IPV6),
+};
+
+static const struct tnl_ptk_info tpi_v4 = {
+ /* no tunnel info required for ipxip6. */
+ .proto = htons(ETH_P_IP),
+};
+
+static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
+ const struct tnl_ptk_info *tpi,
+ int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h,
+ struct sk_buff *skb))
+{
+ struct ip6_tnl *t;
+ const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ int ret = -1;
+
+ rcu_read_lock();
+ t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
+
+ if (t) {
+ u8 tproto = ACCESS_ONCE(t->parms.proto);
+
+ if (tproto != ipproto && tproto != 0)
+ goto drop;
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto drop;
+ if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr))
+ goto drop;
+ if (iptunnel_pull_header(skb, 0, tpi->proto, false))
+ goto drop;
+ ret = __ip6_tnl_rcv(t, skb, tpi, NULL, dscp_ecn_decapsulate,
+ log_ecn_error);
}
+
rcu_read_unlock();
- return 1;
-discard:
+ return ret;
+
+drop:
+ rcu_read_unlock();
kfree_skb(skb);
return 0;
}
static int ip4ip6_rcv(struct sk_buff *skb)
{
- return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
- ip4ip6_dscp_ecn_decapsulate);
+ return ipxip6_rcv(skb, IPPROTO_IPIP, &tpi_v4,
+ ip4ip6_dscp_ecn_decapsulate);
}
static int ip6ip6_rcv(struct sk_buff *skb)
{
- return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
- ip6ip6_dscp_ecn_decapsulate);
+ return ipxip6_rcv(skb, IPPROTO_IPV6, &tpi_v6,
+ ip6ip6_dscp_ecn_decapsulate);
}
struct ipv6_tel_txoption {
@@ -918,13 +979,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
/**
- * ip6_tnl_xmit2 - encapsulate packet and send
+ * ip6_tnl_xmit - encapsulate packet and send
* @skb: the outgoing socket buffer
* @dev: the outgoing tunnel device
* @dsfield: dscp code for outer header
- * @fl: flow of tunneled packet
+ * @fl6: flow of tunneled packet
* @encap_limit: encapsulation limit
* @pmtu: Path MTU is stored if packet is too big
+ * @proto: next header value
*
* Description:
* Build new header and do some sanity checks on the packet before sending
@@ -936,12 +998,9 @@ EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
* %-EMSGSIZE message too big. return mtu in this case.
**/
-static int ip6_tnl_xmit2(struct sk_buff *skb,
- struct net_device *dev,
- __u8 dsfield,
- struct flowi6 *fl6,
- int encap_limit,
- __u32 *pmtu)
+int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
+ struct flowi6 *fl6, int encap_limit, __u32 *pmtu,
+ __u8 proto)
{
struct ip6_tnl *t = netdev_priv(dev);
struct net *net = t->net;
@@ -952,7 +1011,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
struct net_device *tdev;
int mtu;
unsigned int max_headroom = sizeof(struct ipv6hdr);
- u8 proto;
int err = -1;
/* NBMA tunnel */
@@ -1014,12 +1072,23 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
mtu = IPV6_MIN_MTU;
if (skb_dst(skb))
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
- if (skb->len > mtu) {
+ if (skb->len > mtu && !skb_is_gso(skb)) {
*pmtu = mtu;
err = -EMSGSIZE;
goto tx_err_dst_release;
}
+ if (t->err_count > 0) {
+ if (time_before(jiffies,
+ t->err_time + IP6TUNNEL_ERR_TIMEO)) {
+ t->err_count--;
+
+ dst_link_failure(skb);
+ } else {
+ t->err_count = 0;
+ }
+ }
+
skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
/*
@@ -1045,9 +1114,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
skb_dst_set(skb, dst);
- skb->transport_header = skb->network_header;
-
- proto = fl6->flowi6_proto;
if (encap_limit >= 0) {
init_tel_txopt(&opt, encap_limit);
ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
@@ -1058,6 +1124,11 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
skb->encapsulation = 1;
}
+ max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr)
+ + dst->header_len;
+ if (max_headroom > dev->needed_headroom)
+ dev->needed_headroom = max_headroom;
+
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
@@ -1076,6 +1147,7 @@ tx_err_dst_release:
dst_release(dst);
return err;
}
+EXPORT_SYMBOL(ip6_tnl_xmit);
static inline int
ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -1099,7 +1171,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
encap_limit = t->parms.encap_limit;
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPIP;
dsfield = ipv4_get_dsfield(iph);
@@ -1109,7 +1180,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
- err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+ IPPROTO_IPIP);
if (err != 0) {
/* XXX: send ICMP error even if DF is not set. */
if (err == -EMSGSIZE)
@@ -1153,7 +1225,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
encap_limit = t->parms.encap_limit;
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPV6;
dsfield = ipv6_get_dsfield(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
@@ -1163,7 +1234,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
- err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+ IPPROTO_IPV6);
if (err != 0) {
if (err == -EMSGSIZE)
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -1174,7 +1246,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
}
static netdev_tx_t
-ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
@@ -1370,6 +1442,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
struct net *net = t->net;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ memset(&p1, 0, sizeof(p1));
+
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ip6n->fb_tnl_dev) {
@@ -1464,8 +1538,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
* %-EINVAL if mtu too small
**/
-static int
-ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
+int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
{
struct ip6_tnl *tnl = netdev_priv(dev);
@@ -1481,6 +1554,7 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
dev->mtu = new_mtu;
return 0;
}
+EXPORT_SYMBOL(ip6_tnl_change_mtu);
int ip6_tnl_get_iflink(const struct net_device *dev)
{
@@ -1493,7 +1567,7 @@ EXPORT_SYMBOL(ip6_tnl_get_iflink);
static const struct net_device_ops ip6_tnl_netdev_ops = {
.ndo_init = ip6_tnl_dev_init,
.ndo_uninit = ip6_tnl_dev_uninit,
- .ndo_start_xmit = ip6_tnl_xmit,
+ .ndo_start_xmit = ip6_tnl_start_xmit,
.ndo_do_ioctl = ip6_tnl_ioctl,
.ndo_change_mtu = ip6_tnl_change_mtu,
.ndo_get_stats = ip6_get_stats,
@@ -1549,13 +1623,25 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
return -ENOMEM;
ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
- if (ret) {
- free_percpu(dev->tstats);
- dev->tstats = NULL;
- return ret;
- }
+ if (ret)
+ goto free_stats;
+
+ ret = gro_cells_init(&t->gro_cells, dev);
+ if (ret)
+ goto destroy_dst;
+
+ t->hlen = 0;
+ t->tun_hlen = 0;
return 0;
+
+destroy_dst:
+ dst_cache_destroy(&t->dst_cache);
+free_stats:
+ free_percpu(dev->tstats);
+ dev->tstats = NULL;
+
+ return ret;
}
/**