From 018c1dda5ff1e7bd1fe2d9fd1d0f5b82dc6fc0cd Mon Sep 17 00:00:00 2001 From: Eric Garver Date: Wed, 7 Sep 2016 12:56:59 -0400 Subject: openvswitch: 802.1AD Flow handling, actions, vlan parsing, netlink attributes Add support for 802.1ad including the ability to push and pop double tagged vlans. Add support for 802.1ad to netlink parsing and flow conversion. Uses double nested encap attributes to represent double tagged vlan. Inner TPID encoded along with ctci in nested attributes. This is based on Thomas F Herbert's original v20 patch. I made some small clean ups and bug fixes. Signed-off-by: Thomas F Herbert Signed-off-by: Eric Garver Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 65 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 18 deletions(-) (limited to 'net/openvswitch/flow.c') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 0ea128eeeab2..1240ae3b88d2 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -302,24 +302,57 @@ static bool icmp6hdr_ok(struct sk_buff *skb) sizeof(struct icmp6hdr)); } -static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +/** + * Parse vlan tag from vlan header. + * Returns ERROR on memory error. + * Returns 0 if it encounters a non-vlan or incomplete packet. + * Returns 1 after successfully parsing vlan tag. + */ +static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh) { - struct qtag_prefix { - __be16 eth_type; /* ETH_P_8021Q */ - __be16 tci; - }; - struct qtag_prefix *qp; + struct vlan_head *vh = (struct vlan_head *)skb->data; - if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16))) + if (likely(!eth_type_vlan(vh->tpid))) return 0; - if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) + - sizeof(__be16)))) + if (unlikely(skb->len < sizeof(struct vlan_head) + sizeof(__be16))) + return 0; + + if (unlikely(!pskb_may_pull(skb, sizeof(struct vlan_head) + + sizeof(__be16)))) return -ENOMEM; - qp = (struct qtag_prefix *) skb->data; - key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT); - __skb_pull(skb, sizeof(struct qtag_prefix)); + vh = (struct vlan_head *)skb->data; + key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT); + key_vh->tpid = vh->tpid; + + __skb_pull(skb, sizeof(struct vlan_head)); + return 1; +} + +static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +{ + int res; + + key->eth.vlan.tci = 0; + key->eth.vlan.tpid = 0; + key->eth.cvlan.tci = 0; + key->eth.cvlan.tpid = 0; + + if (likely(skb_vlan_tag_present(skb))) { + key->eth.vlan.tci = htons(skb->vlan_tci); + key->eth.vlan.tpid = skb->vlan_proto; + } else { + /* Parse outer vlan tag in the non-accelerated case. */ + res = parse_vlan_tag(skb, &key->eth.vlan); + if (res <= 0) + return res; + } + + /* Parse inner vlan tag. */ + res = parse_vlan_tag(skb, &key->eth.cvlan); + if (res <= 0) + return res; return 0; } @@ -480,12 +513,8 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) * update skb->csum here. */ - key->eth.tci = 0; - if (skb_vlan_tag_present(skb)) - key->eth.tci = htons(skb->vlan_tci); - else if (eth->h_proto == htons(ETH_P_8021Q)) - if (unlikely(parse_vlan(skb, key))) - return -ENOMEM; + if (unlikely(parse_vlan(skb, key))) + return -ENOMEM; key->eth.type = parse_ethertype(skb); if (unlikely(key->eth.type == htons(0))) -- cgit v1.2.3 From 40773966ccf1985a1b2bb570a03cbeaf1cbd4e00 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 15 Sep 2016 19:11:52 -0300 Subject: openvswitch: fix flow stats accounting when node 0 is not possible On a system with only node 1 as possible, all statistics is going to be accounted on node 0 as it will have a single writer. However, when getting and clearing the statistics, node 0 is not going to be considered, as it's not a possible node. Tested that statistics are not zero on a system with only node 1 possible. Also compile-tested with CONFIG_NUMA off. Signed-off-by: Thadeu Lima de Souza Cascardo Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/openvswitch/flow.c') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 1240ae3b88d2..5b80612df182 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -142,7 +142,8 @@ void ovs_flow_stats_get(const struct sw_flow *flow, *tcp_flags = 0; memset(ovs_stats, 0, sizeof(*ovs_stats)); - for_each_node(node) { + /* We open code this to make sure node 0 is always considered */ + for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) { struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]); if (stats) { @@ -165,7 +166,8 @@ void ovs_flow_stats_clear(struct sw_flow *flow) { int node; - for_each_node(node) { + /* We open code this to make sure node 0 is always considered */ + for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) { struct flow_stats *stats = ovsl_dereference(flow->stats[node]); if (stats) { -- cgit v1.2.3 From db74a3335e0f645e3139c80bcfc90feb01d8e304 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 15 Sep 2016 19:11:53 -0300 Subject: openvswitch: use percpu flow stats Instead of using flow stats per NUMA node, use it per CPU. When using megaflows, the stats lock can be a bottleneck in scalability. On a E5-2690 12-core system, usual throughput went from ~4Mpps to ~15Mpps when forwarding between two 40GbE ports with a single flow configured on the datapath. This has been tested on a system with possible CPUs 0-7,16-23. After module removal, there were no corruption on the slab cache. Signed-off-by: Thadeu Lima de Souza Cascardo Cc: pravin shelar Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) (limited to 'net/openvswitch/flow.c') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 5b80612df182..0fa45439def1 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -72,32 +73,33 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, { struct flow_stats *stats; int node = numa_node_id(); + int cpu = smp_processor_id(); int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); - stats = rcu_dereference(flow->stats[node]); + stats = rcu_dereference(flow->stats[cpu]); - /* Check if already have node-specific stats. */ + /* Check if already have CPU-specific stats. */ if (likely(stats)) { spin_lock(&stats->lock); /* Mark if we write on the pre-allocated stats. */ - if (node == 0 && unlikely(flow->stats_last_writer != node)) - flow->stats_last_writer = node; + if (cpu == 0 && unlikely(flow->stats_last_writer != cpu)) + flow->stats_last_writer = cpu; } else { stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */ spin_lock(&stats->lock); - /* If the current NUMA-node is the only writer on the + /* If the current CPU is the only writer on the * pre-allocated stats keep using them. */ - if (unlikely(flow->stats_last_writer != node)) { + if (unlikely(flow->stats_last_writer != cpu)) { /* A previous locker may have already allocated the - * stats, so we need to check again. If node-specific + * stats, so we need to check again. If CPU-specific * stats were already allocated, we update the pre- * allocated stats as we have already locked them. */ - if (likely(flow->stats_last_writer != NUMA_NO_NODE) - && likely(!rcu_access_pointer(flow->stats[node]))) { - /* Try to allocate node-specific stats. */ + if (likely(flow->stats_last_writer != -1) && + likely(!rcu_access_pointer(flow->stats[cpu]))) { + /* Try to allocate CPU-specific stats. */ struct flow_stats *new_stats; new_stats = @@ -114,12 +116,12 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, new_stats->tcp_flags = tcp_flags; spin_lock_init(&new_stats->lock); - rcu_assign_pointer(flow->stats[node], + rcu_assign_pointer(flow->stats[cpu], new_stats); goto unlock; } } - flow->stats_last_writer = node; + flow->stats_last_writer = cpu; } } @@ -136,15 +138,15 @@ void ovs_flow_stats_get(const struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { - int node; + int cpu; *used = 0; *tcp_flags = 0; memset(ovs_stats, 0, sizeof(*ovs_stats)); - /* We open code this to make sure node 0 is always considered */ - for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) { - struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]); + /* We open code this to make sure cpu 0 is always considered */ + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { + struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]); if (stats) { /* Local CPU may write on non-local stats, so we must @@ -164,11 +166,11 @@ void ovs_flow_stats_get(const struct sw_flow *flow, /* Called with ovs_mutex. */ void ovs_flow_stats_clear(struct sw_flow *flow) { - int node; + int cpu; - /* We open code this to make sure node 0 is always considered */ - for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) { - struct flow_stats *stats = ovsl_dereference(flow->stats[node]); + /* We open code this to make sure cpu 0 is always considered */ + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { + struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]); if (stats) { spin_lock_bh(&stats->lock); -- cgit v1.2.3 From 2279994d07ab67ff7a1d09bfbd65588332dfb6d8 Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Mon, 19 Sep 2016 13:51:00 -0700 Subject: openvswitch: avoid resetting flow key while installing new flow. since commit commit db74a3335e0f6 ("openvswitch: use percpu flow stats") flow alloc resets flow-key. So there is no need to reset the flow-key again if OVS is using newly allocated flow-key. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/openvswitch/flow.c') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 0fa45439def1..634cc10d6dee 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -767,8 +767,6 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, { int err; - memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE); - /* Extract metadata from netlink attributes. */ err = ovs_nla_get_flow_metadata(net, attr, key, log); if (err) -- cgit v1.2.3